Skip to content

Commit 44a4b56

Browse files
Create StackOverFlow.py
Stack Overflow API-Scrapper.
1 parent 014a8a3 commit 44a4b56

File tree

1 file changed

+152
-0
lines changed

1 file changed

+152
-0
lines changed

StackOverFlow.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
2+
3+
import requests
4+
import json,ast
5+
from pprint import pprint
6+
from sets import Set
7+
from pytz import timezone
8+
from datetime import datetime,timedelta
9+
import calendar
10+
11+
12+
13+
subject,numberofscholar=raw_input().split()
14+
numberofscholar=int(numberofscholar)
15+
16+
#Getting the top answers with given tag
17+
URL_TOP_ANSWER1 ='https://api.stackexchange.com/2.2/tags/'
18+
URl_TOP_ANSWER2='/top-answerers/all_time'
19+
20+
#Getting top answers of a given user with same tag to determine if he/she has given reply of the same tag ever
21+
URL_TAG_USER_ANSWERS1='https://api.stackexchange.com/2.2/users/'
22+
URL_TAG_USER_ANSWERS2='/top-answers'
23+
24+
#Getting the badge details of a user to check if he has any badge with 'Teacher'
25+
URL_USER_BADGE1='https://api.stackexchange.com/2.2/users/'
26+
URL_USER_BADGE2='/badges'
27+
28+
#Getting the reputation change details of last one month
29+
URL_USER_REPUTATION1='https://api.stackexchange.com/2.2/users/'
30+
URL_USER_REPUTATION2='/reputation'
31+
32+
#Getting the user detail
33+
URL_USER_DETAIL1='https://api.stackexchange.com/2.2/users/'
34+
35+
#Getting the date in UTC Strin as supported by stackexchange api
36+
today=str(calendar.timegm(datetime.now().timetuple()))
37+
oneMonthBefore=str(calendar.timegm((datetime.now()- timedelta(days=30)).timetuple()))
38+
39+
#parameters for rest api
40+
url_user_param={
41+
'site' : 'stackoverflow',
42+
'order' : 'desc',
43+
'sort' : 'reputation'
44+
}
45+
url_replutation_param={
46+
'site' : 'stackoverflow',
47+
'fromdate' : oneMonthBefore,
48+
'todate' : today
49+
}
50+
url_user_badge={
51+
'site' : 'stackoverflow',
52+
'order' : 'desc',
53+
'sort' : 'rank'
54+
}
55+
url_params_topAnswer = {
56+
'site' : 'stackoverflow'
57+
}
58+
url_params_tagUsers_answers={
59+
'site' : 'stackoverflow',
60+
'order' : 'desc',
61+
'sort' :'activity'
62+
}
63+
url_params_tags_answers={
64+
'site' : 'stackoverflow',
65+
'order' : 'desc',
66+
'sort' :'popular'
67+
}
68+
69+
page = 1
70+
not_done = True
71+
user_list = []
72+
73+
# replies are paginated so loop thru until none left
74+
while not_done and numberofscholar>0:
75+
# url_params['page'] = page
76+
# get next page of users
77+
78+
#Getting the top answers of given tag
79+
api_response = requests.get(URL_TOP_ANSWER1+subject+URl_TOP_ANSWER2,params=url_params_topAnswer)
80+
json_data = api_response.json()
81+
#rawData= api_response.json() # pull the list of users out of the json answer
82+
#data = json.load(api_response.text)
83+
84+
#Storing the user ID and url of their profile
85+
ll=json_data['items']
86+
for item_elem in ll:
87+
user_list.append({item_elem['user']['user_id']:item_elem['user']['link']})
88+
#user_list[item_elem['user']['user_id']]=item_elem['user']['link']
89+
# show progress each time thru loop
90+
for item_elem in user_list:
91+
userkey=list(item_elem.keys())[0]
92+
userUrl=item_elem[userkey]
93+
94+
95+
#Getting the response for answers of given tag by the user in whole life
96+
api_response_more_than_one_answer = requests.get(URL_TAG_USER_ANSWERS1+str(userkey)+'/tags/'+subject+URL_TAG_USER_ANSWERS2,params=url_params_tagUsers_answers)
97+
#pprint(api_response_more_than_one_answer.json())
98+
json_data_more=api_response_more_than_one_answer.json()
99+
listTemp=json_data_more['items']
100+
101+
#Gdtting the badge details and storing the names in a set and checking if it contains Teacher or not
102+
api_response_badge=requests.get(URL_USER_BADGE1+str(userkey)+URL_USER_BADGE2,params=url_user_badge)
103+
json_data_badge=api_response_badge.json()
104+
ll=json_data_badge['items']
105+
badgeSet=Set()
106+
for item_elem1 in ll:
107+
badgeSet.add(item_elem1['name'])
108+
109+
#getting the reputations and same way checking for reputation change of 200 or more
110+
api_respons_reputation=requests.get(URL_USER_REPUTATION1+str(userkey)+URL_USER_REPUTATION2,params=url_replutation_param)
111+
json_data_reputation=api_respons_reputation.json()
112+
reputationSet=Set()
113+
ll=json_data_reputation['items']
114+
for item_elem2 in ll:
115+
if('reputation_change' in item_elem2):
116+
reputationSet.add(item_elem2["reputation_change"])
117+
118+
#Checking all the conditions
119+
if (len(listTemp)>2) and 'Teacher' in badgeSet:
120+
fl=0
121+
for repItem in reputationSet:
122+
if(int(repItem)>=200):
123+
fl=1
124+
break
125+
if fl==1:
126+
api_response_user=requests.get(URL_USER_DETAIL1+str(userkey),params=url_user_param)
127+
json_data_user=api_response_user.json()
128+
json_data_user=json_data_user['items']
129+
location=""
130+
displayName=""
131+
for t in json_data_user:
132+
if('location' in t):
133+
location=t['location']
134+
if('display_name' in t):
135+
nameUser=t['display_name']
136+
#location=json_data_user['items'][15]
137+
#nameUser=json_data_user['items'][19]
138+
print "userkey:",userkey,"....name:",nameUser,"....top_answer....location:",location,'....url:',userUrl
139+
140+
#Continuing the process until it reaches required scholar
141+
numberofscholar=numberofscholar-1
142+
143+
#print userkey,":",":",badgeSet,":",reputationSet
144+
#print user_list
145+
#note only so man queries allowed per day
146+
147+
148+
# prepare for next iteration if needed
149+
page += 1
150+
not_done = json_data['has_more']
151+
152+

0 commit comments

Comments
 (0)