Skip to content

Commit 46ba85b

Browse files
vdberghppigazzini
authored andcommitted
More refactorings of github_api calls
- Always pass exceptions to the caller. There are too many things that can go wrong, to silently suppress such exceptions. - More idiomatic javascript in rate_limits.mak. - Home grown LRUCache that better suits our needs. The implementation is a simple wrapper around OrderedDict. - The cacheing of GitHub api calls is now persistent across server restarts. - Recent users cannot have DEV repo that is not forked from official-stockfish.
1 parent e69734b commit 46ba85b

File tree

8 files changed

+300
-84
lines changed

8 files changed

+300
-84
lines changed

server/fishtest/github_api.py

Lines changed: 162 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,71 @@
11
import base64
2-
from functools import lru_cache
32
from pathlib import Path
43
from urllib.parse import urlparse
54

65
import requests
6+
from fishtest.lru_cache import LRUCache
77
from fishtest.schemas import sha as sha_schema
88
from vtjson import validate
99

10+
"""
11+
We treat this module as a singleton.
12+
"""
13+
"""
14+
Note: we generally don't suppress exceptions since too many things can
15+
go wrong. The caller should gracefully handle whatever comes their
16+
way.
17+
"""
18+
GITHUB_API_VERSION = 1
1019
TIMEOUT = 3
20+
INITIAL_RATELIMIT = 5000
21+
LRU_CACHE_SIZE = 6000
1122

23+
_github_rate_limit = None
24+
_lru_cache = None
25+
_kvstore = None
26+
27+
# This one is set externally
1228
_official_master_sha = None
13-
_github_rate_limit = -1
1429

1530

16-
def call(url, *args, **kwargs):
31+
def init(kvstore):
32+
global _github_rate_limit, _kvstore, _lru_cache
33+
_kvstore = kvstore
34+
_lru_cache = LRUCache(LRU_CACHE_SIZE)
35+
github_api_cache = {"version": GITHUB_API_VERSION, "lru_cache": []}
36+
try:
37+
if "github_api_cache" in _kvstore:
38+
github_api_cache = _kvstore["github_api_cache"]
39+
else:
40+
print("Initializing github_api_cache", flush=True)
41+
if github_api_cache["version"] != GITHUB_API_VERSION:
42+
raise Exception("Stored github_api_cache has different version")
43+
for k, v in github_api_cache["lru_cache"]:
44+
_lru_cache[tuple(k)] = v
45+
except Exception as e:
46+
print(f"Unable to restore github_api_cache from kvstore: {str(e)}", flush=True)
47+
try:
48+
_github_rate_limit = rate_limit()["remaining"]
49+
except Exception as e:
50+
print(
51+
f"Unable to initialize github rate limit :{str(e)}. Assuming {INITIAL_RATELIMIT}."
52+
)
53+
54+
55+
def save():
56+
global _kvstore
57+
_kvstore["github_api_cache"] = {
58+
"version": GITHUB_API_VERSION,
59+
"lru_cache": [(k, v) for k, v in _lru_cache.items()],
60+
}
61+
62+
63+
def call(url, *args, _method="GET", _ignore_rate_limit=False, **kwargs):
1764
global _github_rate_limit
18-
r = requests.get(url, *args, **kwargs)
65+
if not _ignore_rate_limit and _github_rate_limit < INITIAL_RATELIMIT / 2:
66+
raise Exception(r"Rate limit more than 50% consumed.")
67+
68+
r = requests.request(_method, url, *args, **kwargs)
1969
resource = r.headers.get("X-RateLimit-Resource", "")
2070
if resource == "core":
2171
_github_rate_limit = int(
@@ -28,135 +78,203 @@ def _download_from_github_raw(
2878
item, user="official-stockfish", repo="Stockfish", branch="master"
2979
):
3080
item_url = f"https://raw.githubusercontent.com/{user}/{repo}/{branch}/{item}"
31-
r = call(item_url, timeout=TIMEOUT)
81+
r = call(item_url, timeout=TIMEOUT, _ignore_rate_limit=True)
3282
r.raise_for_status()
3383
return r.content
3484

3585

3686
def _download_from_github_api(
37-
item, user="official-stockfish", repo="Stockfish", branch="master"
87+
item,
88+
user="official-stockfish",
89+
repo="Stockfish",
90+
branch="master",
91+
ignore_rate_limit=False,
3892
):
3993
item_url = (
4094
f"https://api.github.com/repos/{user}/{repo}/contents/{item}?ref={branch}"
4195
)
42-
r = call(item_url, timeout=TIMEOUT)
96+
r = call(item_url, timeout=TIMEOUT, _ignore_rate_limit=ignore_rate_limit)
4397
r.raise_for_status()
4498
git_url = r.json()["git_url"]
45-
r = call(git_url, timeout=TIMEOUT)
99+
r = call(git_url, timeout=TIMEOUT, _ignore_rate_limit=True)
46100
r.raise_for_status()
47101
return base64.b64decode(r.json()["content"])
48102

49103

50104
def download_from_github(
51-
item, user="official-stockfish", repo="Stockfish", branch="master", method="api"
105+
item,
106+
user="official-stockfish",
107+
repo="Stockfish",
108+
branch="master",
109+
method="api",
110+
ignore_rate_limit=False,
52111
):
53112
if method == "api":
54-
return _download_from_github_api(item, user=user, repo=repo, branch=branch)
113+
return _download_from_github_api(
114+
item,
115+
user=user,
116+
repo=repo,
117+
branch=branch,
118+
ignore_rate_limit=ignore_rate_limit,
119+
)
55120
elif method == "raw":
56121
return _download_from_github_raw(item, user=user, repo=repo, branch=branch)
57122
else:
58123
raise ValueError(f"Unknown method {method}")
59124

60125

61-
def get_commit(user="official-stockfish", repo="Stockfish", branch="master"):
126+
def get_commit(
127+
user="official-stockfish",
128+
repo="Stockfish",
129+
branch="master",
130+
ignore_rate_limit=False,
131+
):
62132
url = f"https://api.github.com/repos/{user}/{repo}/commits/{branch}"
63-
r = call(url, timeout=TIMEOUT)
133+
r = call(url, timeout=TIMEOUT, _ignore_rate_limit=ignore_rate_limit)
64134
r.raise_for_status()
65135
commit = r.json()
66136
return commit
67137

68138

69-
def get_commits(user="official-stockfish", repo="Stockfish"):
139+
def get_commits(user="official-stockfish", repo="Stockfish", ignore_rate_limit=False):
70140
url = f"https://api.github.com/repos/{user}/{repo}/commits"
71-
r = call(url, timeout=TIMEOUT)
141+
r = call(url, timeout=TIMEOUT, _ignore_rate_limit=ignore_rate_limit)
72142
r.raise_for_status()
73143
commit = r.json()
74144
return commit
75145

76146

77147
def rate_limit():
78148
url = "https://api.github.com/rate_limit"
79-
r = call(url, timeout=TIMEOUT)
149+
r = call(url, timeout=TIMEOUT, _ignore_rate_limit=True)
80150
r.raise_for_status()
81151
rate_limit = r.json()["resources"]["core"]
82152
return rate_limit
83153

84154

85-
@lru_cache(maxsize=1000)
86-
def compare_sha(user1="official-stockfish", sha1=None, user2=None, sha2=None):
155+
def compare_sha(
156+
user1="official-stockfish",
157+
sha1=None,
158+
user2=None,
159+
sha2=None,
160+
ignore_rate_limit=False,
161+
):
162+
global _lru_cache
87163
# Non sha arguments cannot be safely cached
88164
validate(sha_schema, sha1)
89165
validate(sha_schema, sha2)
90166

91-
# Protect against DOS'ing
92-
if _github_rate_limit < 2500:
93-
raise Exception(r"Rate limit more than 50% consumed.")
94-
95167
if user2 is None:
96168
user2 = user1
169+
170+
# it's not necessary to include user1, user2 as shas
171+
# are globally unique
172+
inputs = ("compare_sha", sha1, sha2)
173+
if inputs in _lru_cache:
174+
return _lru_cache[inputs]
97175
url = (
98176
"https://api.github.com/repos/official-stockfish/"
99177
f"Stockfish/compare/{user1}:{sha1}...{user2}:{sha2}"
100178
)
101-
r = call(url, headers={"Accept": "application/vnd.github+json"}, timeout=TIMEOUT)
179+
r = call(
180+
url,
181+
headers={"Accept": "application/vnd.github+json"},
182+
timeout=TIMEOUT,
183+
_ignore_rate_limit=ignore_rate_limit,
184+
)
102185
r.raise_for_status()
103-
return r.json()
186+
json = r.json()
187+
json1 = {}
188+
json1["merge_base_commit"] = {}
189+
json1["merge_base_commit"]["sha"] = json["merge_base_commit"]["sha"]
190+
_lru_cache[inputs] = json1
191+
return json1
104192

105193

106194
def parse_repo(repo_url):
107195
p = Path(urlparse(repo_url).path).parts
108196
return (p[1], p[2])
109197

110198

111-
def get_merge_base_commit(user1="official-stockfish", sha1=None, user2=None, sha2=None):
199+
def get_merge_base_commit(
200+
user1="official-stockfish",
201+
sha1=None,
202+
user2=None,
203+
sha2=None,
204+
ignore_rate_limit=False,
205+
):
112206
if user2 is None:
113207
user2 = user1
114-
master_diff = compare_sha(user1=user1, sha1=sha1, user2=user2, sha2=sha2)
208+
master_diff = compare_sha(
209+
user1=user1,
210+
sha1=sha1,
211+
user2=user2,
212+
sha2=sha2,
213+
ignore_rate_limit=ignore_rate_limit,
214+
)
115215
return master_diff["merge_base_commit"]["sha"]
116216

117217

118-
def is_ancestor(user1="official-stockfish", sha1=None, user2=None, sha2=None):
218+
def is_ancestor(
219+
user1="official-stockfish",
220+
sha1=None,
221+
user2=None,
222+
sha2=None,
223+
ignore_rate_limit=False,
224+
):
119225
if user2 is None:
120226
user2 = user1
121227
merge_base_commit = get_merge_base_commit(
122-
user1=user1, sha1=sha1, user2=user2, sha2=sha2
228+
user1=user1,
229+
sha1=sha1,
230+
user2=user2,
231+
sha2=sha2,
232+
ignore_rate_limit=ignore_rate_limit,
123233
)
124234
return merge_base_commit == sha1
125235

126236

127-
@lru_cache(maxsize=1000)
128-
def _is_master(sha, official_master_sha):
237+
def _is_master(sha, official_master_sha, ignore_rate_limit=False):
238+
global _lru_cache
239+
inputs = ("_is_master", sha, official_master_sha)
240+
if inputs in _lru_cache:
241+
return _lru_cache[inputs]
129242
try:
130-
return is_ancestor(sha1=sha, sha2=official_master_sha)
243+
ret = is_ancestor(
244+
sha1=sha, sha2=official_master_sha, ignore_rate_limit=ignore_rate_limit
245+
)
131246
except requests.HTTPError as e:
132247
if e.response.status_code == 404:
133-
return False
248+
ret = False
249+
# Positive answers are already cached in compare_sha
250+
_lru_cache[inputs] = ret
251+
return ret
134252

135253

136-
def is_master(sha):
137-
try:
138-
return _is_master(sha, _official_master_sha)
139-
except Exception as e:
140-
print(f"Unable to evaluate is_master({sha}): {str(e)}", flush=True)
141-
return False
254+
def is_master(sha, ignore_rate_limit=False):
255+
return _is_master(sha, _official_master_sha, ignore_rate_limit=ignore_rate_limit)
142256

143257

144-
def get_master_repo(user="official-stockfish", repo="Stockfish"):
258+
def get_master_repo(
259+
user="official-stockfish", repo="Stockfish", ignore_rate_limit=False
260+
):
145261
api_url = f"https://api.github.com/repos/{user}/{repo}"
146-
r = call(api_url, timeout=TIMEOUT)
262+
r = call(api_url, timeout=TIMEOUT, _ignore_rate_limit=ignore_rate_limit)
147263
r.raise_for_status()
148264
r = r.json()
149265
while True:
150-
if "fork" not in r:
151-
return None
152266
if not r["fork"]:
153-
return r.get("html_url", None)
267+
return r["html_url"]
154268
else:
155-
if "parent" not in r:
156-
return None
157269
r = r["parent"]
158270

159271

272+
def normalize_repo(repo):
273+
r = call(repo, _method="HEAD", allow_redirects=True, _ignore_rate_limit=True)
274+
r.raise_for_status()
275+
return r.url
276+
277+
160278
def compare_branches_url(
161279
user1="stockfish-chess", branch1="master", user2=None, branch2=None
162280
):

server/fishtest/kvstore.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1+
from datetime import UTC
2+
3+
from bson.codec_options import CodecOptions
14
from fishtest.schemas import kvstore_schema
5+
from pymongo import MongoClient
26
from vtjson import validate
37

48

59
class KeyValueStore:
6-
def __init__(self, db, collection="kvstore"):
10+
def __init__(self, db=None, db_name=None, collection="kvstore"):
11+
if db is None:
12+
conn = MongoClient("localhost")
13+
codec_options = CodecOptions(tz_aware=True, tzinfo=UTC)
14+
db = conn[db_name].with_options(codec_options=codec_options)
715
self.__kvstore = db[collection]
816

917
def __setitem__(self, key, value):
@@ -18,8 +26,33 @@ def __getitem__(self, key):
1826
else:
1927
return document["value"]
2028

29+
def __delitem__(self, key):
30+
d = self.__kvstore.delete_one({"_id": key})
31+
if d.deleted_count == 0:
32+
raise KeyError(key)
33+
34+
def __contains__(self, key):
35+
try:
36+
self[key]
37+
except KeyError:
38+
return False
39+
return True
40+
2141
def get(self, key, default):
2242
try:
2343
return self[key]
2444
except KeyError:
2545
return default
46+
47+
def items(self):
48+
documents = self.__kvstore.find()
49+
for d in documents:
50+
yield d["_id"], d["value"]
51+
52+
def keys(self):
53+
for i in self.items():
54+
yield i[0]
55+
56+
def values(self):
57+
for i in self.items():
58+
yield i[1]

server/fishtest/lru_cache.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from collections import OrderedDict
2+
3+
4+
class LRUCache(OrderedDict):
5+
def __init__(self, size):
6+
super().__init__()
7+
self.__size = size
8+
9+
def __setitem__(self, key, value):
10+
super().__setitem__(key, value)
11+
if len(self) > self.__size:
12+
self.popitem(last=False)
13+
14+
@property
15+
def size(self):
16+
return self.__size
17+
18+
@size.setter
19+
def size(self, val):
20+
while len(self) > val:
21+
self.popitem(last=False)
22+
self.__size = val

0 commit comments

Comments
 (0)