-
Notifications
You must be signed in to change notification settings - Fork 1.7k
chore: Add heuristics for issue tracker #12703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1105931
83e9445
e3f47ad
ebc4a64
94cd442
3952eb4
65ce226
970cd9e
a26b680
2e724bd
10a1718
3dc545a
cab780e
7da8495
0ab99f2
782329f
e2eaae9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,9 +16,14 @@ | |
|
|
||
| import os | ||
| import requests | ||
| import logging | ||
| from typing import List, Optional | ||
| from dataclasses import dataclass | ||
|
|
||
| # Configure logging to output messages to console | ||
| logging.basicConfig(level=logging.INFO) # Set the desired logging level | ||
|
|
||
| import re | ||
|
|
||
| class MissingGithubToken(ValueError): | ||
| """Raised when the GITHUB_TOKEN environment variable is not set""" | ||
|
|
@@ -57,9 +62,20 @@ class MissingGithubToken(ValueError): | |
| # BASE_API defines the base API for Github. | ||
| BASE_API = "https://api.github.com" | ||
|
|
||
| # GITHUB_ISSUES defines the issues URL for a repository on GitHub. | ||
| GITHUB_ISSUES = "https://github.com/{repo}/issues" | ||
|
|
||
| # BASE_ISSUE_TRACKER defines the base URL for issue tracker. | ||
| BASE_ISSUE_TRACKER = "https://issuetracker.google.com" | ||
|
|
||
| # This issue-tracker component is part of some saved searches for listing API-side issues. | ||
| # However, when we construct URLs for filing new issues (which in some cases we do by analyzing | ||
| # the query string for a saved search), we want to ensure we DON'T file a new issue against | ||
| # this generic component but against a more specific one. | ||
| GENERIC_ISSUE_TRACKER_COMPONENT = "187065" | ||
|
|
||
| # This sentinel value is used to mark cache fields that have not been computed yet. | ||
| NOT_COMPUTED = -1 | ||
|
|
||
| class CloudClient: | ||
| repo: str = None | ||
|
|
@@ -68,13 +84,97 @@ class CloudClient: | |
| distribution_name: str = None | ||
| issue_tracker: str = None | ||
|
|
||
|
|
||
| def __init__(self, repo: dict): | ||
| self.repo = repo["repo"] | ||
| # For now, strip out "Google Cloud" to standardize the titles | ||
| self.title = repo["name_pretty"].replace("Google ", "").replace("Cloud ", "") | ||
| self.release_level = repo["release_level"] | ||
| self.distribution_name = repo["distribution_name"] | ||
| self.issue_tracker = repo.get("issue_tracker") | ||
| self._cached_component_id = NOT_COMPUTED | ||
| self._cached_template_id = NOT_COMPUTED | ||
| self._cached_saved_search_id = NOT_COMPUTED | ||
|
|
||
| @property | ||
| def saved_search_id(self): | ||
| if self._cached_saved_search_id != NOT_COMPUTED: | ||
| return self._cached_saved_search_id | ||
| if not self.issue_tracker: | ||
| self._cached_saved_search_id = None | ||
| else: | ||
| match = re.search(r'savedsearches/(\d+)', self.issue_tracker) | ||
| self._cached_saved_search_id = match.group(1) if match else None | ||
| return self._cached_saved_search_id | ||
|
|
||
| @property | ||
| def saved_search_response_text(self): | ||
| if not self.saved_search_id: | ||
| return None | ||
| url = f"{BASE_ISSUE_TRACKER}/action/saved_searches/{self.saved_search_id}" | ||
| response = _fetch_response(url) | ||
| return response.text if response else None | ||
|
|
||
| @property | ||
| def issue_tracker_component_id(self): | ||
| if self._cached_component_id != NOT_COMPUTED: | ||
| return self._cached_component_id | ||
|
|
||
| # First, check if the issue tracker is a saved search: | ||
| query_string = self.saved_search_response_text or self.issue_tracker | ||
| if not query_string: | ||
| self._cached_component_id = None | ||
| else: | ||
| # Try to match 'component=' in the query string | ||
| query_match = re.search(r'\bcomponent=(\d+)', query_string) | ||
| if query_match: | ||
| self._cached_component_id = query_match.group(1) | ||
| else: | ||
| # If not found, try to match 'componentid:' in the query string | ||
| query_match = re.findall(r'\bcomponentid:(\d+)', query_string) | ||
| for component_id in query_match: | ||
| if component_id == GENERIC_ISSUE_TRACKER_COMPONENT: | ||
| continue | ||
| if self._cached_component_id != NOT_COMPUTED: | ||
| self._cached_component_id = None | ||
| logging.error(f"More than one component ID found for issue tracker: {self.issue_tracker}") | ||
| break | ||
| self._cached_component_id = component_id | ||
| self._cached_component_id = self._cached_component_id if self._cached_component_id != NOT_COMPUTED else None | ||
| return self._cached_component_id | ||
|
|
||
| @property | ||
| def issue_tracker_template_id(self): | ||
|
vchudnov-g marked this conversation as resolved.
|
||
| if self._cached_template_id != NOT_COMPUTED: | ||
| return self._cached_template_id | ||
| if not self.issue_tracker: | ||
| self._cached_template_id = None | ||
| else: | ||
| match = re.search(r'(?:\?|&)template=(\d+)', self.issue_tracker) | ||
| self._cached_template_id = match.group(1) if match else None | ||
| return self._cached_template_id | ||
|
|
||
| @property | ||
| def show_client_issues(self): | ||
| return GITHUB_ISSUES.format(repo=self.repo) | ||
|
|
||
| @property | ||
| def file_api_issue(self): | ||
| if self.issue_tracker_component_id: | ||
| link = f"{BASE_ISSUE_TRACKER}/issues/new?component={self.issue_tracker_component_id}" | ||
| if self.issue_tracker_template_id: | ||
| link += f"&template={self.issue_tracker_template_id}" | ||
| return link | ||
| return None | ||
|
|
||
| @property | ||
| def show_api_issues(self): | ||
|
vchudnov-g marked this conversation as resolved.
|
||
| if self.saved_search_id: | ||
| # Return the original issue_tracker content, which already links to the saved search. | ||
| return self.issue_tracker | ||
| elif self.issue_tracker_component_id: | ||
| return f"{BASE_ISSUE_TRACKER}/issues?q=componentid:{self.issue_tracker_component_id}" | ||
| return None | ||
|
|
||
| # For sorting, we want to sort by release level, then API pretty_name | ||
| def __lt__(self, other): | ||
|
|
@@ -95,15 +195,35 @@ class Extractor: | |
| def client_for_repo(self, repo_slug) -> Optional[CloudClient]: | ||
| path = self.path_format.format(repo_slug=repo_slug) | ||
| url = f"{RAW_CONTENT_BASE_URL}/{path}/{REPO_METADATA_FILENAME}" | ||
| response = requests.get(url) | ||
| if response.status_code != requests.codes.ok: | ||
| return | ||
|
|
||
| return CloudClient(response.json()) | ||
| _, metadata = _fetch_and_parse_response(url) | ||
| if not metadata: | ||
| return None | ||
| return CloudClient(metadata) | ||
|
|
||
| def get_clients_from_batch_response(self, response_json) -> List[CloudClient]: | ||
| return [self.client_for_repo(repo[self.response_key]) for repo in response_json if allowed_repo(repo)] | ||
|
|
||
| def _fetch_response(url: str, headers:dict = None, params:Optional[dict] = None) -> Optional[requests.Response]: | ||
| try: | ||
| response = requests.get(url, headers=headers, params=params) | ||
| response.raise_for_status() | ||
| return response | ||
| except requests.RequestException as e: | ||
| logging.error(f"Request failed for URL {url}: {e}") | ||
| return None | ||
|
|
||
| def _parse_response(response: requests.Response) -> Optional[dict]: | ||
| try: | ||
| return response.json() | ||
| except ValueError as e: | ||
| logging.error(f"JSON decoding failed for URL {response.url}: {e}") | ||
| return None | ||
|
|
||
| def _fetch_and_parse_response(url: str, headers:dict = None, params:Optional[dict] = None): | ||
| response = _fetch_response(url, headers, params) | ||
| if not response: | ||
| return None, None | ||
| return response, _parse_response(response) | ||
|
|
||
| def replace_content_in_readme(content_rows: List[str]) -> None: | ||
| START_MARKER = ".. API_TABLE_START" | ||
|
|
@@ -135,16 +255,17 @@ def client_row(client: CloudClient) -> str: | |
| url = f"https://github.com/{client.repo}" | ||
| if client.repo == MONO_REPO: | ||
| url += f"/tree/main/packages/{client.distribution_name}" | ||
|
|
||
| _show_api_issues = client.show_api_issues | ||
| _file_api_issue = client.file_api_issue | ||
|
Comment on lines
+258
to
+259
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (No action needed--just a comment) |
||
| content_row = [ | ||
| f" * - `{client.title} <{url}>`_\n", | ||
| f" - " + client.release_level + "\n", | ||
| f" - |PyPI-{client.distribution_name}|\n", | ||
| f" - {client.release_level}\n", | ||
| f" - |PyPI-{client.distribution_name}|\n", | ||
| f" - `API Issues <{_show_api_issues}>`_\n" if _show_api_issues else " -\n", | ||
| f" - `File an API Issue <{_file_api_issue}>`_\n" if _file_api_issue else " -\n", | ||
| f" - `Client Library Issues <{client.show_client_issues}>`_\n" | ||
| ] | ||
|
|
||
| if client.issue_tracker: | ||
| content_row.append(f" - `API Issues <{client.issue_tracker}>`_\n") | ||
|
|
||
| return (content_row, pypi_badge) | ||
|
|
||
|
|
||
|
|
@@ -157,7 +278,9 @@ def generate_table_contents(clients: List[CloudClient]) -> List[str]: | |
| " * - Client\n", | ||
| " - Release Level\n", | ||
| " - Version\n", | ||
| " - API Issue Tracker\n", | ||
| " - API Issues\n", | ||
| " - File an API Issue\n", | ||
| " - Client Library Issues\n", | ||
| ] | ||
|
|
||
| pypi_links = ["\n"] | ||
|
|
@@ -181,30 +304,33 @@ def mono_repo_clients(token: str) -> List[CloudClient]: | |
| # all mono repo clients | ||
| url = f"{BASE_API}/repos/{MONO_REPO}/contents/packages" | ||
| headers = {'Authorization': f'token {token}'} | ||
| response = requests.get(url=url, headers=headers) | ||
| _, packages = _fetch_and_parse_response(url, headers) | ||
| if not packages: | ||
| return [] | ||
| mono_repo_extractor = Extractor(path_format=MONO_REPO_PATH_FORMAT, response_key=PACKAGE_RESPONSE_KEY) | ||
|
|
||
| return mono_repo_extractor.get_clients_from_batch_response(response.json()) | ||
| return mono_repo_extractor.get_clients_from_batch_response(packages) | ||
|
|
||
|
|
||
| def split_repo_clients(token: str) -> List[CloudClient]: | ||
|
|
||
| first_request = True | ||
| while first_request or 'next' in response.links: | ||
| if first_request: | ||
| url = f"{BASE_API}/search/repositories?page=1" | ||
| first_request = False | ||
| else: | ||
| url = response.links['next']['url'] | ||
| headers = {'Authorization': f'token {token}'} | ||
| params = {'per_page': 100, "q": "python- in:name org:googleapis"} | ||
| response = requests.get(url=url, params=params, headers=headers) | ||
| repositories = response.json().get("items", []) | ||
| clients = [] | ||
| url = f"{BASE_API}/search/repositories?page=1" | ||
| headers = {'Authorization': f'token {token}'} | ||
| params = {'per_page': 100, "q": "python- in:name org:googleapis"} | ||
|
|
||
| while url: | ||
| response, metadata = _fetch_and_parse_response(url, headers, params) | ||
| if not metadata: | ||
| break | ||
| repositories = metadata.get("items", []) | ||
| if len(repositories) == 0: | ||
| break | ||
|
|
||
| split_repo_extractor = Extractor(path_format=SPLIT_REPO_PATH_FORMAT, response_key=REPO_RESPONSE_KEY) | ||
| return split_repo_extractor.get_clients_from_batch_response(repositories) | ||
| clients.extend(split_repo_extractor.get_clients_from_batch_response(repositories)) | ||
|
|
||
| # Check for the 'next' link in the response headers for pagination | ||
|
vchudnov-g marked this conversation as resolved.
|
||
| url = response.links.get('next', {}).get('url') | ||
|
|
||
| return clients | ||
|
|
||
|
|
||
| def get_token(): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.