Skip to content

Commit bb0b70d

Browse files
authored
沪深港通aiothhp版
1 parent 8f17250 commit bb0b70d

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

hushengangtong.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sun Apr 18 17:34:03 2021
5+
6+
@author: lambda
7+
"""
8+
9+
import aiohttp
10+
import asyncio
11+
import re
12+
import os
13+
import sys
14+
import json
15+
import random
16+
import pandas as pd
17+
import datetime
18+
import time
19+
20+
user_agent = [
21+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
22+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
23+
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
24+
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
25+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
26+
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
27+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
28+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
29+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
30+
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
31+
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
32+
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
33+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
34+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
35+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
36+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
37+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
38+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
39+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
40+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
41+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
42+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
43+
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
44+
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
45+
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
46+
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
47+
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
48+
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
49+
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
50+
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
51+
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
52+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
53+
"UCWEB7.0.2.37/28/999",
54+
"NOKIA5700/ UCWEB7.0.2.37/28/999",
55+
"Openwave/ UCWEB7.0.2.37/28/999",
56+
"Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
57+
# iPhone 6:
58+
"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25",
59+
60+
]
61+
62+
today = datetime.date.today()
63+
yestoday = today - datetime.timedelta(days=1)
64+
print('今天获取的数据是: ',yestoday)
65+
fname = str(yestoday)+".xlsx"
66+
fname1 = "PPOS_POTE_"+fname
67+
fname2 = "PPOS_POTE_SZ_"+fname
68+
69+
file_path = '/home/lambda/Documents/jiatou'
70+
if not os.path.exists(file_path):
71+
os.mkdir(file_path)
72+
os.chdir(file_path)
73+
74+
if os.path.exists(fname):
75+
print('文件已存在,明天再来吧')
76+
sys.exit()
77+
78+
heads = {'HdDate', 'SCode', 'SName', 'NewPrice', 'ShareSZ_Chg_One', 'ShareSZ_Chg_Rate_One', 'LTZB_One', 'ZZB_One'}
79+
rows = []
80+
# 获取网页信息
81+
async def fetch(session, url):
82+
headers = {'User-Agent': random.choice(user_agent)}
83+
async with session.get(url, headers=headers) as response:
84+
return await response.text(encoding='utf-8')
85+
86+
# 解析网页
87+
async def parser(html):
88+
pat = re.compile('data:(.*)}', re.S) # 使用正则
89+
result = re.search(pat, html).group(1)
90+
data = json.loads(result)
91+
if len(data) == 0:
92+
print('日期有错误,看看是不是日期不对。。。。。。')
93+
sys.exit()
94+
for d in data:
95+
row = {key: value for key, value in d.items() if key in heads}
96+
rows.append(row)
97+
98+
# 下载网页
99+
async def download(url):
100+
async with aiohttp.ClientSession() as session:
101+
html = await fetch(session, url)
102+
await parser(html)
103+
104+
urls = [f'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&st=ShareSZ_Chg_One&sr=-1&p='+str(p)+'&ps=50&js=var%20mXyeKPjW={pages:(tp),data:(x)}&filter=(DateType=%27jd%27%20and%20HdDate=%27'+str(yestoday)+'%27)&rt=53931781' for p in range(1, 31)]
105+
106+
# 利用asyncio模块进行异步IO处理
107+
async def main():
108+
await asyncio.gather(*[download(url) for url in urls])
109+
110+
start = time.time()
111+
asyncio.run(main())
112+
# 将rows转化为pandas中的DataFrame
113+
df = pd.DataFrame(rows)
114+
df.columns = ['日期', '代码', '名称', '最新股价' , '市值', '市值增幅', '占流通股比', '占总股比']
115+
# 从大到小排序
116+
df.sort_values(by='市值')
117+
try:
118+
df.to_excel(fname) # 保存成Excel文件
119+
except Exception as e:
120+
print("请关闭文件后再试", e)
121+
122+
df1 = df.nlargest(20, '占总股比')
123+
df2 = df.nlargest(20, "占流通股比")
124+
df3 = df.nlargest(10, "市值")
125+
# 占总股比前20和流通股比前20的交集
126+
df1_df2 = pd.merge(df1, df2, on=list(df.columns), how='inner')
127+
# 三者的交集
128+
df1_df2_df3 = pd.merge(df1_df2, df3, on=list(df.columns), how='inner')
129+
try:
130+
df1_df2.to_excel(fname1)
131+
except Exception as e:
132+
print("请关闭文件后再试", e)
133+
134+
try:
135+
df1_df2_df3.to_excel(fname2)
136+
except Exception as e:
137+
print("请关闭文件后再试", e)
138+
139+
stop = time.time()
140+
print(f"使用aiohttp共耗时{stop-start} S")

0 commit comments

Comments
 (0)