forked from MeltanoLabs/tap-github
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathauthenticator.py
More file actions
148 lines (125 loc) · 5.42 KB
/
authenticator.py
File metadata and controls
148 lines (125 loc) · 5.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""Classes to assist in authenticating to the GitHub API."""
import logging
from datetime import datetime
from os import environ
from random import choice, shuffle
from typing import Any, Dict, List, Optional
import requests
from singer_sdk.authenticators import APIAuthenticatorBase
from singer_sdk.streams import RESTStream
class TokenRateLimit:
"""A class to store token rate limiting information."""
DEFAULT_RATE_LIMIT = 5000
# The DEFAULT_RATE_LIMIT_BUFFER buffer serves two purposes:
# - keep some leeway and rotate tokens before erroring out on rate limit.
# - not consume all available calls when we rare using an org or user token.
DEFAULT_RATE_LIMIT_BUFFER = 1000
def __init__(self, token: str, rate_limit_buffer: Optional[int] = None):
"""Init TokenRateLimit info."""
self.token = token
self.rate_limit = self.DEFAULT_RATE_LIMIT
self.rate_limit_remaining = self.DEFAULT_RATE_LIMIT
self.rate_limit_reset: Optional[int] = None
self.rate_limit_used = 0
self.rate_limit_buffer = (
rate_limit_buffer
if rate_limit_buffer is not None
else self.DEFAULT_RATE_LIMIT_BUFFER
)
def update_rate_limit(self, response_headers: Any) -> None:
self.rate_limit = int(response_headers["X-RateLimit-Limit"])
self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"])
self.rate_limit_reset = int(response_headers["X-RateLimit-Reset"])
self.rate_limit_used = int(response_headers["X-RateLimit-Used"])
def is_valid(self) -> bool:
"""Check if token is valid.
Returns:
True if the token is valid and has enough api calls remaining.
"""
if self.rate_limit_reset is None:
return True
if (
self.rate_limit_used > (self.rate_limit - self.rate_limit_buffer)
and self.rate_limit_reset > datetime.now().timestamp()
):
return False
return True
class GitHubTokenAuthenticator(APIAuthenticatorBase):
"""Base class for offloading API auth."""
def prepare_tokens(self) -> Dict[str, TokenRateLimit]:
# Save GitHub tokens
available_tokens: List[str] = []
if "auth_token" in self._config:
available_tokens = available_tokens + [self._config["auth_token"]]
if "additional_auth_tokens" in self._config:
available_tokens = available_tokens + self._config["additional_auth_tokens"]
else:
# Accept multiple tokens using environment variables GITHUB_TOKEN*
env_tokens = [
value
for key, value in environ.items()
if key.startswith("GITHUB_TOKEN")
]
if len(env_tokens) > 0:
self.logger.info(
f"Found {len(env_tokens)} 'GITHUB_TOKEN' environment variables for authentication."
)
available_tokens = env_tokens
self.logger.info(f"Tap will run with {len(available_tokens)} auth tokens")
# Get rate_limit_buffer
rate_limit_buffer = self._config.get("rate_limit_buffer", None)
# Dedup tokens and create a dict of TokenRateLimit
return {
token: TokenRateLimit(token, rate_limit_buffer)
for token in list(set(available_tokens))
}
def __init__(self, stream: RESTStream) -> None:
"""Init authenticator.
Args:
stream: A stream for a RESTful endpoint.
"""
super().__init__(stream=stream)
self.logger: logging.Logger = stream.logger
self.tap_name: str = stream.tap_name
self._config: Dict[str, Any] = dict(stream.config)
self.tokens_map = self.prepare_tokens()
self.active_token: Optional[TokenRateLimit] = (
choice(list(self.tokens_map.values())) if len(self.tokens_map) else None
)
def get_next_auth_token(self) -> None:
tokens_list = list(self.tokens_map.items())
shuffle(tokens_list)
for _, token_rate_limit in tokens_list:
if token_rate_limit.is_valid():
self.active_token = token_rate_limit
self.logger.info(f"Switching to fresh auth token")
return
raise RuntimeError(
"All GitHub tokens have hit their rate limit. Stopping here."
)
def update_rate_limit(
self, response_headers: requests.models.CaseInsensitiveDict
) -> None:
# If no token or only one token is available, return early.
if len(self.tokens_map) <= 1 or self.active_token is None:
return
self.active_token.update_rate_limit(response_headers)
@property
def auth_headers(self) -> Dict[str, str]:
"""Return a dictionary of auth headers to be applied.
These will be merged with any `http_headers` specified in the stream.
Returns:
HTTP headers for authentication.
"""
result = super().auth_headers
if self.active_token:
# Make sure that our token is still valid or update it.
if not self.active_token.is_valid():
self.get_next_auth_token()
result["Authorization"] = f"token {self.active_token.token}"
else:
self.logger.info(
"No auth token detected. "
"For higher rate limits, please specify `auth_token` in config."
)
return result