forked from MeltanoLabs/tap-github
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathorganization_streams.py
More file actions
1065 lines (972 loc) · 40.6 KB
/
organization_streams.py
File metadata and controls
1065 lines (972 loc) · 40.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""User Stream types classes for tap-github."""
from __future__ import annotations
import hashlib
from typing import TYPE_CHECKING, Any, ClassVar
from singer_sdk import typing as th # JSON Schema typing helpers
from singer_sdk.exceptions import FatalAPIError
from tap_github.client import GitHubGraphqlStream, GitHubRestStream
if TYPE_CHECKING:
from collections.abc import Iterable
from singer_sdk.helpers.types import Context
# Reusable GraphQL fragment for Actor fields
# https://docs.github.com/en/graphql/reference/interfaces#actor
ACTOR_FRAGMENT = """
login
resource_path: resourcePath
url
type: __typename
... on Bot {
node_id: id
id: databaseId
}
... on User {
node_id: id
id: databaseId
}
... on Organization {
node_id: id
id: databaseId
}
... on Mannequin {
node_id: id
id: databaseId
}
... on EnterpriseUserAccount {
node_id: id
}
"""
class OrganizationStream(GitHubRestStream):
"""Defines a GitHub Organization Stream.
API Reference: https://docs.github.com/en/rest/reference/orgs#get-an-organization
"""
name = "organizations"
path = "/orgs/{org}"
@property
def partitions(self) -> list[dict] | None:
return [{"org": org} for org in self.config["organizations"]]
def get_child_context(self, record: dict, context: Context | None) -> dict:
return {
"org": record["login"],
}
def get_records(self, context: Context | None) -> Iterable[dict[str, Any]]:
"""
Override the parent method to allow skipping API calls
if the stream is deselected and skip_parent_streams is True in config.
This allows running the tap with fewer API calls and preserving
quota when only syncing a child stream. Without this,
the API call is sent but data is discarded.
"""
if (
not self.selected
and "skip_parent_streams" in self.config
and self.config["skip_parent_streams"]
and context is not None
):
# build a minimal mock record so that self._sync_records
# can proceed with child streams
yield {
"org": context["org"],
}
else:
yield from super().get_records(context)
schema = th.PropertiesList(
th.Property("login", th.StringType),
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("url", th.StringType),
th.Property("repos_url", th.StringType),
th.Property("events_url", th.StringType),
th.Property("hooks_url", th.StringType),
th.Property("issues_url", th.StringType),
th.Property("members_url", th.StringType),
th.Property("public_members_url", th.StringType),
th.Property("avatar_url", th.StringType),
th.Property("description", th.StringType),
).to_dict()
class OrganizationMembersStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members
"""
name = "organization_members"
primary_keys: ClassVar[list[str]] = ["id"]
path = "/orgs/{org}/members"
ignore_parent_replication_key = True
parent_stream_type = OrganizationStream
state_partitioning_keys: ClassVar[list[str]] = ["org"]
schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
# Rest
th.Property("login", th.StringType),
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("avatar_url", th.StringType),
th.Property("gravatar_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("type", th.StringType),
th.Property("site_admin", th.BooleanType),
).to_dict()
class TeamsStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#list-teams
"""
name = "teams"
primary_keys: ClassVar[list[str]] = ["id"]
path = "/orgs/{org}/teams"
ignore_parent_replication_key = True
parent_stream_type = OrganizationStream
state_partitioning_keys: ClassVar[list[str]] = ["org"]
def get_child_context(self, record: dict, context: Context | None) -> dict:
new_context = {"team_slug": record["slug"]}
if context:
return {
**context,
**new_context,
}
return new_context
schema = th.PropertiesList(
# Parent Keys
th.Property("org", th.StringType),
# Rest
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("name", th.StringType),
th.Property("slug", th.StringType),
th.Property("description", th.StringType),
th.Property("privacy", th.StringType),
th.Property("permission", th.StringType),
th.Property("members_url", th.StringType),
th.Property("repositories_url", th.StringType),
th.Property(
"parent",
th.ObjectType(
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("name", th.StringType),
th.Property("slug", th.StringType),
th.Property("description", th.StringType),
th.Property("privacy", th.StringType),
th.Property("permission", th.StringType),
th.Property("members_url", th.StringType),
th.Property("repositories_url", th.StringType),
),
),
).to_dict()
class TeamMembersStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#list-team-members
"""
name = "team_members"
primary_keys: ClassVar[list[str]] = ["id", "team_slug"]
path = "/orgs/{org}/teams/{team_slug}/members"
ignore_parent_replication_key = True
parent_stream_type = TeamsStream
state_partitioning_keys: ClassVar[list[str]] = ["team_slug", "org"]
def get_child_context(self, record: dict, context: Context | None) -> dict:
new_context = {"username": record["login"]}
if context:
return {
**context,
**new_context,
}
return new_context
schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
th.Property("team_slug", th.StringType),
# Rest
th.Property("login", th.StringType),
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("avatar_url", th.StringType),
th.Property("gravatar_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("type", th.StringType),
th.Property("site_admin", th.BooleanType),
).to_dict()
class TeamRolesStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#get-team-membership-for-a-user
"""
name = "team_roles"
path = "/orgs/{org}/teams/{team_slug}/memberships/{username}"
ignore_parent_replication_key = True
primary_keys: ClassVar[list[str]] = ["url"]
parent_stream_type = TeamMembersStream
state_partitioning_keys: ClassVar[list[str]] = ["username", "team_slug", "org"]
schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
th.Property("team_slug", th.StringType),
th.Property("username", th.StringType),
# Rest
th.Property("url", th.StringType),
th.Property("role", th.StringType),
th.Property("state", th.StringType),
).to_dict()
class ProjectsStream(GitHubGraphqlStream):
"""Fetches GitHub projects (new projects aka ProjectsV2) for an organization.
API Reference: https://docs.github.com/en/graphql/reference/objects#projectv2
"""
name = "projects"
primary_keys: ClassVar[list[str]] = ["org", "id"]
parent_stream_type = OrganizationStream
ignore_parent_replication_key = True
state_partitioning_keys: ClassVar[list[str]] = ["org"]
query_jsonpath = "$.data.organization.projectsV2.nodes[*]"
@property
def query(self) -> str:
"""GraphQL query to fetch projects."""
return f"""
query OrganizationProjects($org: String!, $nextPageCursor_0: String) {{
organization(login: $org) {{
projectsV2(first: 100, after: $nextPageCursor_0) {{
nodes {{
closed
closed_at: closedAt
created_at: createdAt
creator {{
{ACTOR_FRAGMENT}
}}
id: fullDatabaseId
node_id: id
number
owner {{
node_id: id
type: __typename
}}
public
readme
resource_path: resourcePath
short_description: shortDescription
template
title
updated_at: updatedAt
url
viewer_can_close: viewerCanClose
viewer_can_reopen: viewerCanReopen
viewer_can_update: viewerCanUpdate
}}
pageInfo {{
hasNextPage_0: hasNextPage
endCursor_0: endCursor
startCursor_0: startCursor
}}
totalCount
}}
}}
rateLimit {{
cost
}}
}}
"""
def get_child_context(self, record: dict, context: Context | None) -> dict:
"""Return context for child streams."""
new_context = {"project_number": record["number"]}
if context:
return {**context, **new_context}
return new_context
def post_process(self, row: dict, context: Context | None = None) -> dict:
"""Post-process a fetched record."""
row = super().post_process(row, context)
if context:
row["org"] = context["org"]
return row
schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
# Project fields
th.Property(
"id", th.StringType, nullable=False
), # using fullDatabaseId from GraphQL as id, but is nullable in GraphQL
th.Property(
"node_id", th.StringType
), # using id from GraphQL as node_id, it is required (ID!)
th.Property("number", th.IntegerType),
th.Property("title", th.StringType),
th.Property("url", th.StringType),
th.Property("resource_path", th.StringType),
th.Property("created_at", th.DateTimeType),
th.Property("updated_at", th.DateTimeType),
th.Property("closed", th.BooleanType),
th.Property(
"closed_at", th.DateTimeType, required=False
), # closedAt is nullable in GraphQL
th.Property("public", th.BooleanType),
th.Property(
"readme", th.StringType, required=False
), # readme is nullable in GraphQL
th.Property(
"short_description", th.StringType, required=False
), # shortDescription is nullable in GraphQL
th.Property("template", th.BooleanType),
th.Property("viewer_can_close", th.BooleanType),
th.Property("viewer_can_reopen", th.BooleanType),
th.Property("viewer_can_update", th.BooleanType),
th.Property(
"owner",
th.ObjectType(
th.Property("node_id", th.StringType),
th.Property("type", th.StringType),
),
),
th.Property(
"creator",
th.ObjectType(
th.Property("login", th.StringType),
th.Property("resource_path", th.StringType),
th.Property("url", th.StringType),
th.Property("type", th.StringType),
th.Property("node_id", th.StringType),
th.Property("id", th.StringType, required=False),
),
required=False, # creator is nullable in GraphQL
),
).to_dict()
class ProjectFieldConfigurationsStream(GitHubGraphqlStream):
"""Fetches all fields defined within a GitHub organization's project and outputs
a single record per project containing all its fields.
This stream is a child of ProjectsStream. For each project, it retrieves all its
field configurations, including configurations for iteration and single-select
fields, and consolidates them into one record.
API Reference: https://docs.github.com/en/graphql/reference/objects#projectv2fieldconfiguration
"""
name = "project_field_configurations"
primary_keys: ClassVar[list[str]] = ["org", "project_number"]
parent_stream_type = ProjectsStream
ignore_parent_replication_key = True
state_partitioning_keys: ClassVar[list[str]] = ["org", "project_number"]
query_jsonpath = "$.data.organization.projectV2.fields.nodes[*]"
@property
def query(self) -> str:
"""GraphQL query to fetch a page of project fields."""
return """
query ProjectFieldsPage(
$org: String!,
$project_number: Int!,
$nextPageCursor_0: String
) {
organization(login: $org) {
projectV2(number: $project_number) {
fields(first: 100, after: $nextPageCursor_0) {
nodes {
... on ProjectV2Field {
id: databaseId
node_id: id
name
data_type: dataType
created_at: createdAt
updated_at: updatedAt
}
... on ProjectV2IterationField {
id: databaseId
node_id: id
name
data_type: dataType
created_at: createdAt
updated_at: updatedAt
configuration {
duration
start_day: startDay
iterations {
id
title
start_date: startDate
duration
}
completed_iterations: completedIterations {
id
title
start_date: startDate
duration
}
}
}
... on ProjectV2SingleSelectField {
id: databaseId
node_id: id
name
data_type: dataType
created_at: createdAt
updated_at: updatedAt
options {
id
name
color
description
}
}
}
pageInfo {
hasNextPage_0: hasNextPage
endCursor_0: endCursor
}
totalCount
}
}
}
rateLimit {
cost
}
}
"""
def get_records(self, context: Context | None) -> Iterable[dict[str, Any]]:
"""
Fetch all fields for a project, handling pagination, and yield a single record.
"""
if not context:
self.logger.warning("Received no context, skipping.")
return
org = context.get("org")
project_number = context.get("project_number")
if not org or project_number is None:
self.logger.warning(f"Missing org or project_number in context: {context}")
return
all_field_configurations: list[dict] = []
next_page_token: Any = None
# Can't use BaseAPIPaginator - here we need to aggregate all pages of
# fields of a project into one record, while BaseAPIPaginator yields
# records incrementally as pages are fetched.
while True:
prepared_request = self.prepare_request(
context=context, next_page_token=next_page_token
)
resp = self._request(prepared_request, context)
page_fields = list(self.parse_response(resp))
all_field_configurations.extend(page_fields)
current_page_info = (
resp.json()
.get("data", {})
.get("organization", {})
.get("projectV2", {})
.get("fields", {})
.get("pageInfo", {})
)
if current_page_info.get("hasNextPage_0"):
next_page_token = {
"nextPageCursor_0": current_page_info.get("endCursor_0")
}
else:
break
yield {
"org": org,
"project_number": project_number,
"all_field_configurations": all_field_configurations,
}
def get_child_context(self, record: dict, context: Context | None) -> dict:
"""Return context for child streams."""
child_context = dict(context or {}) # Includes org, project_number
child_context["project_field_configurations"] = record.get(
"all_field_configurations", []
)
return child_context
schema = th.PropertiesList(
th.Property("org", th.StringType),
th.Property("project_number", th.IntegerType),
th.Property(
"all_field_configurations",
th.ArrayType(
th.ObjectType(
# Schema for a single field definition
th.Property(
"id", th.StringType, nullable=False
), # using databaseId from GraphQL as id, nullable in GraphQL
th.Property(
"node_id", th.StringType
), # using id from GraphQL as node_id, it is required (ID!)
th.Property("name", th.StringType),
th.Property("data_type", th.StringType),
th.Property("created_at", th.DateTimeType),
th.Property("updated_at", th.DateTimeType),
th.Property(
"configuration",
th.ObjectType(
th.Property("duration", th.IntegerType),
th.Property("start_day", th.IntegerType),
th.Property(
"iterations",
th.ArrayType(
th.ObjectType(
th.Property("id", th.StringType),
th.Property("title", th.StringType),
th.Property("start_date", th.DateType),
th.Property("duration", th.IntegerType),
)
),
),
th.Property(
"completed_iterations",
th.ArrayType(
th.ObjectType(
th.Property("id", th.StringType),
th.Property("title", th.StringType),
th.Property("start_date", th.DateType),
th.Property("duration", th.IntegerType),
)
),
),
),
required=False, # Only present for ProjectV2IterationField
),
th.Property(
"options",
th.ArrayType(
th.ObjectType(
th.Property("id", th.StringType),
th.Property("name", th.StringType),
th.Property("color", th.StringType),
th.Property("description", th.StringType),
)
),
required=False, # Only present for ProjectV2SingleSelectField
),
)
),
),
).to_dict()
class ProjectItemsStream(GitHubGraphqlStream):
"""Fetches items for a project and their field values.
This stream is a child of ProjectFieldConfigurationsStream. For each project,
it fetches all items and then for each item, it queries the values of all
known fields.
API Reference: https://docs.github.com/en/graphql/reference/objects#projectv2item
"""
name = "project_items"
primary_keys: ClassVar[list[str]] = ["org", "project_number", "node_id"]
parent_stream_type = ProjectFieldConfigurationsStream
ignore_parent_replication_key = True
state_partitioning_keys: ClassVar[list[str]] = ["org", "project_number"]
tolerated_http_errors: ClassVar[list[int]] = [414]
query_jsonpath = "$.data.organization.projectV2.items.nodes[*]"
# Project's custom fields supports types: Text, Number, Date, SingleSelect,
# Iteration, so we fetch values from the corresponding types.
#
# Note: Other types are available in issues/pull requests so not included.
# - ProjectV2ItemFieldRepositoryValue,
# - ProjectV2ItemFieldUserValue,
# - ProjectV2ItemFieldLabelValue,
# - ProjectV2ItemFieldReviewerValue,
# - ProjectV2ItemFieldPullRequestValue,
# - ProjectV2ItemFieldMilestoneValue
_supported_project_item_field_value_types: ClassVar[tuple[str, ...]] = (
"ProjectV2ItemFieldTextValue",
"ProjectV2ItemFieldDateValue",
"ProjectV2ItemFieldNumberValue",
"ProjectV2ItemFieldSingleSelectValue",
"ProjectV2ItemFieldIterationValue",
)
# These fields are automatically created by GitHub and expected to present in
# the project items.
_common_fields: ClassVar[dict[str, dict[str, str]]] = {
"Title": {"column": "title", "type": "ProjectV2ItemFieldTextValue"},
"Status": {"column": "status", "type": "ProjectV2ItemFieldSingleSelectValue"},
}
def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003
super().__init__(*args, **kwargs)
self._current_project_field_configurations: list[dict] = []
def request_records(self, context: Context | None) -> Iterable[dict]:
"""Request records from the API, handling FORBIDDEN errors gracefully.
TODO: should rewrite to use validate_response once
https://github.com/meltano/sdk/issues/280 is implemented.
"""
try:
yield from super().request_records(context)
except FatalAPIError as e:
# Check if the error is FORBIDDEN. This error is raised when
# the organization has security settings that block access to
# the nodes of an item of a project, e.g. allowed IP list.
error_message = str(e.args[0]) if e.args else ""
if "FORBIDDEN" in error_message:
self.logger.warning(
f"Skipping project due to FORBIDDEN error. "
f"Context: {context}. Error: {e}"
)
return
elif "Timeout on validation of query" in error_message:
self.logger.warning(
f"Skipping project due to query validation timeout error. "
f"Context: {context}. Error: {e}"
)
return
raise
def _escape_graphql_string(self, value: str) -> str:
"""
Escape special characters in a string for use in GraphQL queries.
"""
# Escape backslashes first, then quotes
return value.replace("\\", "\\\\").replace('"', '\\"')
def _generate_gql_alias(self, field_name: str) -> str:
"""
Generate a unique GraphQL-safe alias from a field name.
"""
# Create a hash of the field name
hash_obj = hashlib.sha256(field_name.encode("utf-8"))
# Take first 8 characters of hex digest for a short but unique identifier
hash_suffix = hash_obj.hexdigest()[:8]
# GraphQL aliases must start with a letter or underscore
# Prefix with 'field_' to ensure it's always valid
return f"field_{hash_suffix}"
@property
def query(self) -> str:
"""Dynamically build GraphQL query to fetch item and its field values."""
field_value_queries = []
for field_config in self._current_project_field_configurations:
original_field_name = field_config.get("name")
if not original_field_name:
continue
alias = self._generate_gql_alias(original_field_name)
escaped_field_name = self._escape_graphql_string(original_field_name)
# Comprehensive inline fragments for ProjectV2ItemFieldValue. Project's
# custom fields supports types: Text, Number, Date, SingleSelect, Iteration,
# so we fetch values from the corresponding types.
field_value_query = f'''
{alias}: fieldValueByName(name: "{escaped_field_name}") {{
__typename
... on ProjectV2ItemFieldTextValue {{
node_id: id
id: databaseId
text
creator {{
{ACTOR_FRAGMENT}
}}
created_at: createdAt
updated_at: updatedAt
}}
... on ProjectV2ItemFieldDateValue {{
node_id: id
id: databaseId
date
creator {{
{ACTOR_FRAGMENT}
}}
created_at: createdAt
updated_at: updatedAt
}}
... on ProjectV2ItemFieldNumberValue {{
node_id: id
id: databaseId
number
creator {{
{ACTOR_FRAGMENT}
}}
created_at: createdAt
updated_at: updatedAt
}}
... on ProjectV2ItemFieldSingleSelectValue {{
node_id: id
id: databaseId
color
description
name
option_id: optionId
creator {{
{ACTOR_FRAGMENT}
}}
created_at: createdAt
updated_at: updatedAt
}}
... on ProjectV2ItemFieldIterationValue {{
node_id: id
id: databaseId
duration
start_date: startDate
iteration_id: iterationId
title
creator {{
{ACTOR_FRAGMENT}
}}
created_at: createdAt
updated_at: updatedAt
}}
}}'''
field_value_queries.append(field_value_query)
all_field_values_query_part = "\n".join(field_value_queries)
return f"""
query ProjectItemsWithFieldValues(
$org: String!,
$project_number: Int!,
$nextPageCursor_0: String
) {{
organization(login: $org) {{
projectV2(number: $project_number) {{
items(first: 100, after: $nextPageCursor_0) {{
nodes {{
node_id: id
id: fullDatabaseId
created_at: createdAt
updated_at: updatedAt
is_archived: isArchived
type
creator {{
{ACTOR_FRAGMENT}
}}
content {{
... on Issue {{
type: __typename
node_id: id
}}
... on DraftIssue {{
type: __typename
node_id: id
}}
... on PullRequest {{
type: __typename
node_id: id
}}
}}
{all_field_values_query_part}
}}
pageInfo {{
hasNextPage_0: hasNextPage
endCursor_0: endCursor
}}
totalCount
}}
}}
}}
rateLimit {{
cost
}}
}}
"""
def get_url_params(
self,
context: Context | None,
next_page_token: Any | None, # noqa: ANN401
) -> dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
if not context:
# This should not happen if parent_stream_type is correctly set
self.logger.warning("ProjectItemFieldValuesStream received no context.")
return {}
self._current_project_field_configurations = context.get(
"project_field_configurations", []
)
params = super().get_url_params(context, next_page_token)
# org and project_number are already in params via context from parent
return params
def post_process(self, row: dict, context: Context | None = None) -> dict:
"""Process the fetched record to extract field values and add context."""
row = super().post_process(row, context)
if not context:
return row
# Add context fields
row["org"] = context["org"]
row["project_number"] = context["project_number"]
# Process dynamic field values into a list of objects
field_values_output: list[dict[str, Any]] = []
# Initialize dedicated fields for common project fields
for field_config in self._common_fields.values():
row[field_config["column"]] = None
for field_config in self._current_project_field_configurations:
original_field_name = field_config.get("name")
if not original_field_name:
continue
alias = self._generate_gql_alias(original_field_name)
field_value_data = row.pop(alias, None) # Pop the aliased data
if field_value_data:
value_type = field_value_data.get("__typename")
if value_type not in self._supported_project_item_field_value_types:
continue
entry: dict[str, Any] = {
"field_name": original_field_name,
"value_type": value_type,
}
# Copy all the values
for key in ["node_id", "id", "created_at", "updated_at"]:
if key in field_value_data:
entry[key] = field_value_data[key]
# Copy creator if present
if "creator" in field_value_data:
entry["creator"] = field_value_data["creator"]
# Extract the actual value based on type
if value_type == "ProjectV2ItemFieldTextValue":
text_value = field_value_data.get("text")
entry["value"] = str(text_value) if text_value is not None else None
elif value_type == "ProjectV2ItemFieldDateValue":
date_value = field_value_data.get("date")
entry["value"] = str(date_value) if date_value is not None else None
elif value_type == "ProjectV2ItemFieldNumberValue":
number_value = field_value_data.get("number")
entry["value"] = (
str(number_value) if number_value is not None else None
)
elif value_type == "ProjectV2ItemFieldSingleSelectValue":
name_value = field_value_data.get("name")
entry["value"] = str(name_value) if name_value is not None else None
entry["option_id"] = field_value_data.get("option_id")
entry["color"] = field_value_data.get("color")
entry["description"] = field_value_data.get("description")
elif value_type == "ProjectV2ItemFieldIterationValue":
title_value = field_value_data.get("title")
entry["value"] = (
str(title_value) if title_value is not None else None
)
entry["iteration_id"] = field_value_data.get("iteration_id")
entry["start_date"] = field_value_data.get("start_date")
entry["duration"] = field_value_data.get("duration")
# Check if this is a common field that should be extracted separately
is_common_field = (
self._common_fields.get(original_field_name)
and self._common_fields[original_field_name]["type"] == value_type
)
if is_common_field:
column_name = self._common_fields[original_field_name]["column"]
row[column_name] = entry
else:
field_values_output.append(entry)
row["field_values"] = field_values_output
return row
@property
def schema(self) -> dict:
"""Define schema with dynamic_fields as an array of name/value objects."""
properties = th.PropertiesList(
th.Property("org", th.StringType),
th.Property("project_number", th.IntegerType),
th.Property("node_id", th.StringType), # id from GraphQL
th.Property(
"id", th.StringType, nullable=False
), # fullDatabaseId from GraphQL, nullable
th.Property("created_at", th.DateTimeType),
th.Property("updated_at", th.DateTimeType),
th.Property("is_archived", th.BooleanType),
th.Property("type", th.StringType),
# Dedicated fields for common project fields
th.Property(
"title",
th.ObjectType(
th.Property("value_type", th.StringType),
th.Property("node_id", th.StringType),
th.Property(
"id", th.StringType, required=False
), # databaseId is nullable
th.Property("created_at", th.DateTimeType),
th.Property("updated_at", th.DateTimeType),
th.Property(
"value", th.StringType, required=False
), # text value is nullable
th.Property(
"creator",
th.ObjectType(
th.Property("login", th.StringType),
th.Property("resource_path", th.StringType),
th.Property("url", th.StringType),
th.Property("type", th.StringType),
th.Property("node_id", th.StringType),
th.Property("id", th.StringType, required=False),
),
required=False, # creator is nullable
),
),
required=False,
),
th.Property(
"status",
th.ObjectType(
th.Property("value_type", th.StringType),
th.Property("node_id", th.StringType),
th.Property(
"id", th.StringType, required=False
), # databaseId is nullable
th.Property("created_at", th.DateTimeType),
th.Property("updated_at", th.DateTimeType),
th.Property(
"value", th.StringType, required=False
), # name value is nullable
th.Property("option_id", th.StringType, required=False), # nullable
th.Property("color", th.StringType, required=False), # nullable
th.Property(
"description", th.StringType, required=False
), # nullable
th.Property(
"creator",
th.ObjectType(
th.Property("login", th.StringType),
th.Property("resource_path", th.StringType),
th.Property("url", th.StringType),
th.Property("type", th.StringType),
th.Property("node_id", th.StringType),
th.Property("id", th.StringType, required=False),
),
required=False, # creator is nullable
),
),
required=False,
),
th.Property(
"creator",
th.ObjectType(
th.Property("login", th.StringType),
th.Property("resource_path", th.StringType),
th.Property("url", th.StringType),