Skip to content

Commit d6b4159

Browse files
committed
tuchong
实现了获取图片的下载url
0 parents  commit d6b4159

File tree

5 files changed

+255
-0
lines changed

5 files changed

+255
-0
lines changed

tuchong/.idea/misc.xml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tuchong/.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tuchong/.idea/tuchong.iml

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tuchong/.idea/workspace.xml

Lines changed: 174 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tuchong/tuchongSipder.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
2+
import requests
3+
from bs4 import BeautifulSoup
4+
import re
5+
6+
url='https://tuchong.com/explore/'
7+
header={
8+
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
9+
}
10+
# 获取照片类别的url列表
11+
12+
def get_url_sort(url_list,url):
13+
'''
14+
:param url_list: 获得照片分类写入url_list列表
15+
:param url: 传入url进行请求
16+
:return: 返回url_list列表
17+
'''
18+
respons=requests.get(url,headers=header)
19+
html=respons.text
20+
soup=BeautifulSoup(html,'lxml')
21+
li_tags=soup.find_all('li',class_='tag-square-base')
22+
# url_list=[]
23+
count=0
24+
for li in li_tags:
25+
# 第一种方法
26+
# sort_url=li.a['href']
27+
# 第二种方法
28+
sort_url=li.find('a')['href']
29+
sort_name=li.span.get_text()
30+
url_list.append(sort_url)
31+
count +=1
32+
# print('{} {} {}'.format(count,sort_name,sort_url))
33+
# print(url_list)
34+
return url_list
35+
36+
def get_pic_list(url):
37+
# range为页数
38+
for i in range(1,5):
39+
part_url=url+'posts?page=%s&count=20&order=weekly' %i
40+
data=requests.get(part_url).json()
41+
print(data)
42+
count=len(data['postList'])
43+
print(len(data['postList']))
44+
for j in range(count):
45+
photo_urls=data['postList'][j]['url']
46+
print(photo_urls)
47+
# print(len(data['postList']))
48+
# print(html)
49+
50+
url_lists=[]
51+
get_url_sort(url_lists,url)
52+
53+
for pic_url in url_lists:
54+
# 转换url格式
55+
pic=pic_url.replace('tags','rest/tags')
56+
get_pic_list(pic)
57+
break
58+
# print(url_lists)

0 commit comments

Comments
 (0)