forked from MeltanoLabs/tap-github
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtap.py
More file actions
188 lines (171 loc) · 6.9 KB
/
tap.py
File metadata and controls
188 lines (171 loc) · 6.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""GitHub tap class."""
from __future__ import annotations
import logging
import os
from singer_sdk import Stream, Tap
from singer_sdk import typing as th # JSON schema typing helpers
from singer_sdk.helpers._classproperty import classproperty
from tap_github.streams import Streams
class TapGitHub(Tap):
"""Singer tap for the GitHub API."""
name = "tap-github"
package_name = "meltanolabs-tap-github"
@classproperty
def logger(cls) -> logging.Logger: # noqa: N805
"""Get logger.
Returns:
Logger with local LOGLEVEL. LOGLEVEL from env takes priority.
"""
LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() # noqa: N806
assert LOGLEVEL in logging._levelToName.values(), (
f"Invalid LOGLEVEL configuration: {LOGLEVEL}"
)
logger = logging.getLogger(cls.name)
logger.setLevel(LOGLEVEL)
return logger
config_jsonschema = th.PropertiesList(
th.Property(
"user_agent",
th.StringType,
description="User agent to use for API requests.",
),
th.Property("metrics_log_level", th.StringType),
# Authentication options
th.Property(
"auth_token",
th.StringType,
description="GitHub token to authenticate with.",
),
th.Property(
"additional_auth_tokens",
th.ArrayType(th.StringType),
description="List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits.", # noqa: E501
),
th.Property(
"auth_app_keys",
th.ArrayType(th.StringType),
description=(
"List of GitHub App credentials to authenticate with. "
"These are organization-agnostic and will be used as "
"fallback for all organizations. Each credential should "
"be formatted as `:app_id:;;-----BEGIN RSA PRIVATE KEY-----"
"\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----`."
),
),
th.Property(
"org_auth_app_keys",
th.ObjectType(
additional_properties=th.ArrayType(th.StringType),
),
description=(
"Organization-specific GitHub App credentials. "
"Maps organization names to lists of app credentials. "
"When processing repositories from a specific organization, "
"the tap will prefer tokens configured for that organization. "
"Each credential should be formatted as "
"`:app_id:;;-----BEGIN RSA PRIVATE KEY-----"
"\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----`."
),
),
th.Property(
"rate_limit_buffer",
th.IntegerType,
description="Add a buffer to avoid consuming all query points for the token at hand. Defaults to 1000.", # noqa: E501
),
th.Property(
"expiry_time_buffer",
th.IntegerType,
description=(
"When authenticating as a GitHub App, this buffer controls how many "
"minutes before expiry the GitHub app tokens will be refreshed. "
"Defaults to 10 minutes."
),
),
th.Property(
"backoff_max_tries",
th.IntegerType,
default=5,
description="Maximum number of retry attempts for failed API requests that are retriable. Defaults to 5.", # noqa: E501
),
th.Property(
"searches",
th.ArrayType(
th.ObjectType(
th.Property("name", th.StringType, required=True),
th.Property("query", th.StringType, required=True),
)
),
description=(
"An array of search descriptor objects with the following properties:\n"
'"name" - a human readable name for the search query.\n'
'"query" - a github search string (generally the same as would come after ?q= in the URL)"' # noqa: E501
),
),
th.Property("organizations", th.ArrayType(th.StringType)),
th.Property("repositories", th.ArrayType(th.StringType)),
th.Property("user_usernames", th.ArrayType(th.StringType)),
th.Property("user_ids", th.ArrayType(th.StringType)),
th.Property(
"start_date",
th.DateTimeType,
description="Start date for incremental sync.",
),
th.Property("stream_maps", th.ObjectType()),
th.Property("stream_map_config", th.ObjectType()),
th.Property(
"skip_parent_streams",
th.BooleanType,
description=(
"Set to true to skip API calls for the parent "
"streams (such as repositories) if it is not selected but children are"
),
),
th.Property(
"stream_options",
th.ObjectType(
th.Property(
"milestones",
th.ObjectType(
th.Property(
"state",
th.StringType,
description=(
"Configures which states are of interest. "
"Must be one of [open, closed, all], defaults to open."
),
default="open",
allowed_values=["open", "closed", "all"],
),
additional_properties=False,
),
description="Options specific to the 'milestones' stream.",
),
additional_properties=False,
),
description="Options which change the behaviour of a specific stream.",
),
).to_dict()
def discover_streams(self) -> list[Stream]:
"""Return a list of discovered streams for each query."""
# If the config is empty, assume we are running --help or --capabilities.
if (
self.config
and len(Streams.all_valid_queries().intersection(self.config)) != 1
):
raise ValueError(
"This tap requires one and only one of the following path options: "
f"{Streams.all_valid_queries()}, provided config: {self.config}"
)
streams = []
for stream_type in Streams:
if (not self.config) or len(
stream_type.valid_queries.intersection(self.config)
) > 0:
streams += [
StreamClass(tap=self) for StreamClass in stream_type.streams
]
if not streams:
raise ValueError("No valid streams found.")
return streams
# CLI Execution:
cli = TapGitHub.cli