Skip to content

Commit 08f8e01

Browse files
committed
add function to get answers' voter
1 parent 8764fc5 commit 08f8e01

File tree

2 files changed

+40
-19
lines changed

2 files changed

+40
-19
lines changed

test.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def question_test(url):
1919
# 获取该问题所属话题
2020
topics = question.get_topics()
2121
# 获取该问题被浏览次数
22-
visit_times=question.get_visit_times()
22+
visit_times = question.get_visit_times()
2323
# 获取排名第一的回答
2424
top_answer = question.get_top_answer()
2525
# 获取排名前十的十个回答
@@ -41,7 +41,7 @@ def question_test(url):
4141
print followers_num # 输出:26910
4242
for topic in topics:
4343
print topic, # 输出:情感克制 现实 社会 个人经历
44-
print visit_times # 输出: 该问题当前被浏览的次数
44+
print visit_times # 输出: 该问题当前被浏览的次数
4545
print top_answer # 输出:<zhihu.Answer instance at 0x7f8b6582d0e0>(Answer类对象)
4646
print top_answers # 输出:<generator object get_top_i_answers at 0x7fed676eb320>(代表前十的Answer的生成器)
4747
print answers # 输出:<generator object get_all_answer at 0x7f8b66ba30a0>(代表所有Answer的生成器)
@@ -57,6 +57,8 @@ def answer_test(answer_url):
5757
upvote = answer.get_upvote()
5858
# 获取改该答案所属问题被浏览次数
5959
visit_times = answer.get_visit_times()
60+
# 获取所有给该答案点赞的用户信息
61+
voters = answer.get_voters()
6062
# 把答案输出为txt文件
6163
answer.to_txt()
6264
# 把答案输出为markdown文件
@@ -69,9 +71,12 @@ def answer_test(answer_url):
6971
print author
7072
# <zhihu.User instance at 0x7f0b25425b90>
7173
# 一个User对象
74+
for voter in voters:
75+
print voter
76+
# 一个 User 对象
7277
print author.get_user_id() # 输出:田浩
7378
print upvote # 输出:9320
74-
print visit_times # 输出: 改答案所属问题被浏览次数
79+
print visit_times # 输出: 改答案所属问题被浏览次数
7580

7681

7782
def user_test(user_url):
@@ -207,4 +212,5 @@ def main():
207212

208213

209214
if __name__ == '__main__':
210-
main()
215+
main()
216+

zhihu.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,11 @@ def create_session():
2323
global cookies
2424
cf = ConfigParser.ConfigParser()
2525
cf.read("config.ini")
26-
2726
cookies = cf._sections['cookies']
2827

2928
email = cf.get("info", "email")
3029
password = cf.get("info", "password")
31-
3230
cookies = dict(cookies)
33-
# print cookies
3431

3532
s = requests.session()
3633
login_data = {"email": email, "password": password}
@@ -824,9 +821,9 @@ def to_txt(self):
824821
print file_name
825822
# if platform.system() == 'Windows':
826823
# file_name = file_name.decode('utf-8').encode('gbk')
827-
# print file_name
824+
# print file_name
828825
# else:
829-
# print file_name
826+
# print file_name
830827
if os.path.exists(os.path.join(os.path.join(os.getcwd(), "text"), file_name)):
831828
f = open(os.path.join(os.path.join(os.getcwd(), "text"), file_name), "a")
832829
f.write("\n\n")
@@ -844,9 +841,9 @@ def to_txt(self):
844841
print file_name
845842
# if platform.system() == 'Windows':
846843
# file_name = file_name.decode('utf-8').encode('gbk')
847-
# print file_name
844+
# print file_name
848845
# else:
849-
# print file_name
846+
# print file_name
850847
f = open(os.path.join(os.path.join(os.getcwd(), "text"), file_name), "wt")
851848
f.write(self.get_question().get_title() + "\n\n")
852849
if platform.system() == 'Windows':
@@ -863,11 +860,11 @@ def to_txt(self):
863860

864861
# def to_html(self):
865862
# content = self.get_content()
866-
# if self.get_author().get_user_id() == "匿名用户":
867-
# file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
868-
# f = open(file_name, "wt")
869-
# print file_name
870-
# else:
863+
# if self.get_author().get_user_id() == "匿名用户":
864+
# file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
865+
# f = open(file_name, "wt")
866+
# print file_name
867+
# else:
871868
# file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
872869
# f = open(file_name, "wt")
873870
# print file_name
@@ -955,6 +952,24 @@ def get_visit_times(self):
955952
if "所属问题被浏览" in tag_p.contents[0].encode('utf-8'):
956953
return int(tag_p.contents[1].contents[0])
957954

955+
def get_voters(self):
956+
if self.soup == None:
957+
self.parser()
958+
soup = self.soup
959+
data_aid = soup.find("div", class_="zm-item-answer ")["data-aid"]
960+
request_url = 'http://www.zhihu.com/node/AnswerFullVoteInfoV2?params=%7B%22answer_id%22%3A%22' + str(
961+
data_aid) + '%22%7D'
962+
if session == None:
963+
create_session()
964+
s = session
965+
r = s.get(request_url)
966+
soup = BeautifulSoup(r.content)
967+
voters_info = soup.find_all("span")[1:-1]
968+
for voter_info in voters_info:
969+
voter_url = "http://www.zhihu.com" + str(voter_info.a["href"])
970+
voter_id = voter_info.a["title"].encode("utf-8")
971+
yield User(voter_url, voter_id)
972+
958973

959974
class Collection:
960975
url = None
@@ -1000,7 +1015,7 @@ def parser(self):
10001015
# print 'has_cookies', has_cookies
10011016
if has_cookies == False:
10021017
r = s.get(self.url)
1003-
#print 'r', r.content
1018+
# print 'r', r.content
10041019
soup = BeautifulSoup(r.content)
10051020
self.soup = soup
10061021

@@ -1057,8 +1072,8 @@ def get_all_answers(self):
10571072
question = Question(question_url, question_title)
10581073
answer_url = "http://www.zhihu.com" + answer.find("span", class_="answer-date-link-wrap").a["href"]
10591074
author = None
1060-
#print 'answer url',answer_url
1061-
#print 'answerfind',answer.find("h3", class_="zm-item-answer-author-wrap")
1075+
# print 'answer url',answer_url
1076+
# print 'answerfind',answer.find("h3", class_="zm-item-answer-author-wrap")
10621077
if answer.find("h3", class_="zm-item-answer-author-wrap").string == u"匿名用户":
10631078
author_url = None
10641079
author = User(author_url)

0 commit comments

Comments
 (0)