@@ -23,14 +23,11 @@ def create_session():
2323 global cookies
2424 cf = ConfigParser .ConfigParser ()
2525 cf .read ("config.ini" )
26-
2726 cookies = cf ._sections ['cookies' ]
2827
2928 email = cf .get ("info" , "email" )
3029 password = cf .get ("info" , "password" )
31-
3230 cookies = dict (cookies )
33- # print cookies
3431
3532 s = requests .session ()
3633 login_data = {"email" : email , "password" : password }
@@ -824,9 +821,9 @@ def to_txt(self):
824821 print file_name
825822 # if platform.system() == 'Windows':
826823 # file_name = file_name.decode('utf-8').encode('gbk')
827- # print file_name
824+ # print file_name
828825 # else:
829- # print file_name
826+ # print file_name
830827 if os .path .exists (os .path .join (os .path .join (os .getcwd (), "text" ), file_name )):
831828 f = open (os .path .join (os .path .join (os .getcwd (), "text" ), file_name ), "a" )
832829 f .write ("\n \n " )
@@ -844,9 +841,9 @@ def to_txt(self):
844841 print file_name
845842 # if platform.system() == 'Windows':
846843 # file_name = file_name.decode('utf-8').encode('gbk')
847- # print file_name
844+ # print file_name
848845 # else:
849- # print file_name
846+ # print file_name
850847 f = open (os .path .join (os .path .join (os .getcwd (), "text" ), file_name ), "wt" )
851848 f .write (self .get_question ().get_title () + "\n \n " )
852849 if platform .system () == 'Windows' :
@@ -863,11 +860,11 @@ def to_txt(self):
863860
864861 # def to_html(self):
865862 # content = self.get_content()
866- # if self.get_author().get_user_id() == "匿名用户":
867- # file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
868- # f = open(file_name, "wt")
869- # print file_name
870- # else:
863+ # if self.get_author().get_user_id() == "匿名用户":
864+ # file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
865+ # f = open(file_name, "wt")
866+ # print file_name
867+ # else:
871868 # file_name = self.get_question().get_title() + "--" + self.get_author().get_user_id() + "的回答.html"
872869 # f = open(file_name, "wt")
873870 # print file_name
@@ -955,6 +952,24 @@ def get_visit_times(self):
955952 if "所属问题被浏览" in tag_p .contents [0 ].encode ('utf-8' ):
956953 return int (tag_p .contents [1 ].contents [0 ])
957954
955+ def get_voters (self ):
956+ if self .soup == None :
957+ self .parser ()
958+ soup = self .soup
959+ data_aid = soup .find ("div" , class_ = "zm-item-answer " )["data-aid" ]
960+ request_url = 'http://www.zhihu.com/node/AnswerFullVoteInfoV2?params=%7B%22answer_id%22%3A%22' + str (
961+ data_aid ) + '%22%7D'
962+ if session == None :
963+ create_session ()
964+ s = session
965+ r = s .get (request_url )
966+ soup = BeautifulSoup (r .content )
967+ voters_info = soup .find_all ("span" )[1 :- 1 ]
968+ for voter_info in voters_info :
969+ voter_url = "http://www.zhihu.com" + str (voter_info .a ["href" ])
970+ voter_id = voter_info .a ["title" ].encode ("utf-8" )
971+ yield User (voter_url , voter_id )
972+
958973
959974class Collection :
960975 url = None
@@ -1000,7 +1015,7 @@ def parser(self):
10001015 # print 'has_cookies', has_cookies
10011016 if has_cookies == False :
10021017 r = s .get (self .url )
1003- #print 'r', r.content
1018+ # print 'r', r.content
10041019 soup = BeautifulSoup (r .content )
10051020 self .soup = soup
10061021
@@ -1057,8 +1072,8 @@ def get_all_answers(self):
10571072 question = Question (question_url , question_title )
10581073 answer_url = "http://www.zhihu.com" + answer .find ("span" , class_ = "answer-date-link-wrap" ).a ["href" ]
10591074 author = None
1060- #print 'answer url',answer_url
1061- #print 'answerfind',answer.find("h3", class_="zm-item-answer-author-wrap")
1075+ # print 'answer url',answer_url
1076+ # print 'answerfind',answer.find("h3", class_="zm-item-answer-author-wrap")
10621077 if answer .find ("h3" , class_ = "zm-item-answer-author-wrap" ).string == u"匿名用户" :
10631078 author_url = None
10641079 author = User (author_url )
0 commit comments