This repository was archived by the owner on Jun 9, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathnba_stats_async.py
More file actions
143 lines (107 loc) · 4.16 KB
/
Copy pathnba_stats_async.py
File metadata and controls
143 lines (107 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
"""
Gathering statistics on NBA players asynchronously with the
aiohttp library
Running this on my machine, on my home network, took:
***NOTE***
On my OS-X box, a regular user is limited to 256 open files per process.
A socket is considered a file -- so this can crash out when it hits that limit.
(as of now, there are 491 players listed)
You can increase it with:
ulimit -n 2048
And see what it's set to with:
ulimit -a
***********
Borrowed from:
http://terriblecode.com/blog/asynchronous-http-requests-in-python/
"""
import pdb
import asyncio
import aiohttp
import json
import time
import requests
base_url = 'http://stats.nba.com/stats'
HEADERS = {
'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/45.0.2454.101 Safari/537.36'),
}
# # this needs to be run first before we can start -- so no need for async
# # but making it async so we can use the aiohttp lib.
# async def get_players(players):
# """
# get the names of all the players we are interested in
# This request will get JSON of the players for the 2016-17 season:
# http://stats.nba.com/stats/commonallplayers?LeagueID=00&season=2016-17&isonlycurrentseason=1
# """
# endpoint = '/commonallplayers'
# params = {'leagueid': '00', 'season': '2016-17', 'isonlycurrentseason': '1'}
# url = base_url + endpoint
# print('Getting all players...')
# async with aiohttp.ClientSession() as session:
# print("got the session")
# async with session.get(url, headers=HEADERS, params=params) as resp:
# print("got the response")
# data = await resp.json()
# players.append([(item[0], item[2]) for item in data['resultSets'][0]['rowSet']])
def get_players(player_args):
"""
get the names of all the players we are interested in
This request will get JSON of the players for the 2016-17 season:
http://stats.nba.com/stats/commonallplayers?LeagueID=00&season=2016-17&isonlycurrentseason=1
"""
endpoint = '/commonallplayers'
params = {'leagueid': '00', 'season': '2016-17', 'isonlycurrentseason': '1'}
url = base_url + endpoint
print('Getting all players...')
print("about to make request")
resp = requests.get(url, headers=HEADERS, params=params)
print("got the response")
data = resp.json()
player_args.extend(
[(item[0], item[2]) for item in data['resultSets'][0]['rowSet']])
# this is what we want to make concurrent
async def get_player(player_id, player_name):
endpoint = '/commonplayerinfo'
params = {'playerid': player_id}
url = base_url + endpoint
print("Getting player", player_name)
async with aiohttp.ClientSession() as session:
print("session created")
async with session.get(url,
skip_auto_headers=["User-Agent"],
headers=HEADERS,
params=params) as resp:
print("response:", resp)
all_players[player_name] = await resp.json()
print("got:", player_name)
print("Done with get_player:", player_name)
# async def get_all_stats(players):
# for id, name in players:
# print("getting:", name)
# all_players[name] = await get_player(id, name)
all_players = {}
players = []
start = time.time()
loop = asyncio.get_event_loop()
print("getting the players")
# loop.run_until_complete(get_players(players))
get_players(players)
print("got the players")
print("there are {} players".format(len(players)))
# print("getting the stats")
# loop.run_until_complete(get_all_stats(players[:200]))
# print("got the stats")
loop.run_until_complete(asyncio.gather(
*(get_player(*args) for args in players[:10])
)
)
# loop.run_until_complete(get_player(*players[0]))
# for id, name in players:
# all_players[name] = get_player(id, name)
print("Done getting data: it took {:.2F} seconds".format(time.time() - start))
# write it out to a file
with open("NBA_stats_2.json", 'w') as outfile:
json.dump(all_players, outfile, indent=2)
print("File written out")