@@ -1328,28 +1328,11 @@ from urllib.parse import quote, quote_plus, unquote, unquote_plus
13281328Scraping
13291329--------
13301330``` python
1331- # $ pip3 install beautifulsoup4
1332- from http.cookiejar import CookieJar
1333- from urllib.error import HTTPError, URLError
1334- from urllib.request import build_opener, HTTPCookieProcessor
1335- from bs4 import BeautifulSoup
1336-
1337- def scrape (url ):
1338- """ Returns tree of HTML elements located at URL."""
1339- jar = CookieJar()
1340- opener = build_opener(HTTPCookieProcessor(jar))
1341- opener.addheaders = [(' User-agent' , ' Mozilla/5.0' )]
1342- try :
1343- html = opener.open(url)
1344- except ValueError as error:
1345- return print (f ' Malformed URL: { url} . \n { error} ' )
1346- except (HTTPError, URLError) as error:
1347- return print (f " Can't find URL: { url} . \n { error} " )
1348- return BeautifulSoup(html, ' html.parser' )
1349- ```
1350-
1351- ``` python
1352- >> > document = scrape(' https://en.wikipedia.org/wiki/Python_(programming_language)' )
1331+ # $ pip3 install requests beautifulsoup4
1332+ >> > import requests
1333+ >> > from bs4 import BeautifulSoup
1334+ >> > page = requests.get(' https://en.wikipedia.org/wiki/Python_(programming_language)' )
1335+ >> > document = BeautifulSoup(page.text, ' html.parser' )
13531336>> > table = document.find(' table' , class_ = ' infobox vevent' )
13541337>> > rows = table.find_all(' tr' )
13551338>> > website = rows[11 ].find(' a' )[' href' ]
0 commit comments