Skip to content

Commit 6ea4dfa

Browse files
committed
Small tweaks for the pagerank code.
1 parent 9e44215 commit 6ea4dfa

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

code/pagerank/spider.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@
7272
# If we are retrieving this page, there should be no links from it
7373
cur.execute('DELETE from Links WHERE from_id=?', (fromid, ) )
7474
try:
75-
# Deal with SSL certificate anomalies Python > 2.7
76-
# scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
75+
# Deal with SSL certificate anomalies Python > 2.7
76+
# scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
7777
# document = urllib.urlopen(url, context=scontext)
78-
79-
# Normal Unless you encounter certificate problems
78+
79+
# Normal Unless you encounter certificate problems
8080
document = urllib.urlopen(url)
8181

8282
html = document.read()
@@ -86,8 +86,7 @@
8686

8787
if 'text/html' != document.info().gettype() :
8888
print "Ignore non text/html page"
89-
cur.execute('DELETE FROM Pages WHERE url=?', ( url, ) )
90-
cur.execute('UPDATE Pages SET error=0 WHERE url=?', (url, ) )
89+
cur.execute('UPDATE Pages SET error=-1 WHERE url=?', (url, ) )
9190
conn.commit()
9291
continue
9392

@@ -125,7 +124,7 @@
125124
# print href
126125
if ( len(href) < 1 ) : continue
127126

128-
# Check if the URL is in any of the webs
127+
# Check if the URL is in any of the webs
129128
found = False
130129
for web in webs:
131130
if ( href.startswith(web) ) :

0 commit comments

Comments
 (0)