Skip to content

Commit e6d2bb2

Browse files
committed
Merge pull request tylertreat#38 from paulw54jrn/master
FIX: get_all_page fetch all tables according to NEXT_PAGE_TOKEN
2 parents 59c2127 + 43e61c1 commit e6d2bb2

File tree

2 files changed

+141
-8
lines changed

2 files changed

+141
-8
lines changed

bigquery/client.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import calendar
22
from collections import defaultdict
3-
from datetime import datetime
3+
from datetime import datetime, timedelta
44
from time import sleep
55
from time import time
66
from hashlib import sha256
@@ -20,6 +20,8 @@
2020
BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery'
2121
BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly'
2222

23+
CACHE_TIMEOUT = timedelta(seconds=30)
24+
2325
JOB_CREATE_IF_NEEDED = 'CREATE_IF_NEEDED'
2426
JOB_CREATE_NEVER = 'CREATE_NEVER'
2527
JOB_WRITE_TRUNCATE = 'WRITE_TRUNCATE'
@@ -107,6 +109,7 @@ def __init__(self, bq_service, project_id, swallow_results=True):
107109
self.bigquery = bq_service
108110
self.project_id = project_id
109111
self.swallow_results = swallow_results
112+
self.cache = {}
110113

111114
def query(self, query, max_results=None, timeout=0, dry_run=False):
112115
"""Submit a query to BigQuery.
@@ -790,19 +793,37 @@ def push_rows(self, dataset, table, rows, insert_id_key=None):
790793
}]
791794
}
792795

793-
def _get_all_tables(self, dataset_id):
796+
def _get_all_tables(self, dataset_id, cache=False):
794797
"""Retrieve a list of all tables for the dataset.
795798
796799
Args:
797800
dataset_id: the dataset to retrieve table names for.
798-
801+
cache: To use cached value or not. Timeout value
802+
equals CACHE_TIMEOUT.
799803
Returns:
800804
a dictionary of app ids mapped to their table names.
801805
"""
802-
803-
result = self.bigquery.tables().list(
804-
projectId=self.project_id,
805-
datasetId=dataset_id).execute()
806+
do_fetch = True
807+
if cache and self.cache.get(dataset_id):
808+
time, result = self.cache.get(dataset_id)
809+
if datetime.now() - time < CACHE_TIMEOUT:
810+
do_fetch = False
811+
812+
if do_fetch:
813+
result = self.bigquery.tables().list(
814+
projectId=self.project_id,
815+
datasetId=dataset_id).execute()
816+
817+
page_token = result.get('nextPageToken')
818+
while page_token:
819+
res = self.bigquery.tables().list(
820+
projectId=self.project_id,
821+
datasetId=dataset_id,
822+
pageToken=page_token
823+
).execute()
824+
page_token = res.get('nextPageToken')
825+
result['tables'] += res.get('tables', [])
826+
self.cache[dataset_id] = (datetime.now(), result)
806827

807828
return self._parse_table_list_response(result)
808829

bigquery/tests/test_client.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,42 @@ def test_not_inside_range(self):
10281028
self.assertEqual([], tables)
10291029

10301030

1031+
NEXT_TABLE_LIST_RESPONSE = {
1032+
"kind": "bigquery#tableList",
1033+
"etag": "\"t_UlB9a9mrx5sjQInRGzeDrLrS0/TsIP_i4gAeLegj84WzkPzBPIkjo\"",
1034+
"nextPageToken": "2013_05_appspot_1",
1035+
"tables": [
1036+
{
1037+
"kind": "bigquery#table",
1038+
"id": "project:dataset.2013_06_appspot_10",
1039+
"tableReference": {
1040+
"projectId": "project",
1041+
"datasetId": "dataset",
1042+
"tableId": "2013_06_appspot_10"
1043+
}
1044+
},
1045+
{
1046+
"kind": "bigquery#table",
1047+
"id": "project:dataset.2013_06_appspot_11",
1048+
"tableReference": {
1049+
"projectId": "project",
1050+
"datasetId": "dataset",
1051+
"tableId": "2013_06_appspot_11"
1052+
}
1053+
},
1054+
{
1055+
"kind": "bigquery#table",
1056+
"id": "project:dataset.2013_06_appspot_12",
1057+
"tableReference": {
1058+
"projectId": "project",
1059+
"datasetId": "dataset",
1060+
"tableId": "2013_06_appspot_12"
1061+
}
1062+
},
1063+
],
1064+
"totalItems": 3
1065+
}
1066+
10311067
FULL_TABLE_LIST_RESPONSE = {
10321068
"kind": "bigquery#tableList",
10331069
"etag": "\"GSclnjk0zID1ucM3F-xYinOm1oE/cn58Rpu8v8pB4eoJQaiTe11lPQc\"",
@@ -1663,7 +1699,83 @@ def test_get_tables(self):
16631699
'appspot': {'2013_05_appspot': 1367366400}
16641700
}
16651701

1666-
tables = bq._get_all_tables('dataset')
1702+
tables = bq._get_all_tables('dataset', cache=False)
1703+
self.assertEquals(expected_result, tables)
1704+
1705+
def test_get_all_tables_with_page_token(self):
1706+
"""Ensure get_all_tables fetches all tables names from BigQuery"""
1707+
1708+
mock_execute = mock.Mock()
1709+
mock_execute.execute.side_effect = [NEXT_TABLE_LIST_RESPONSE,
1710+
FULL_TABLE_LIST_RESPONSE]
1711+
1712+
mock_tables = mock.Mock()
1713+
mock_tables.list.return_value = mock_execute
1714+
1715+
mock_bq_service = mock.Mock()
1716+
mock_bq_service.tables.return_value = mock_tables
1717+
1718+
bq = client.BigQueryClient(mock_bq_service, 'project')
1719+
1720+
expected_result = {
1721+
'appspot-3': {'2013_06_appspot_3': 1370044800},
1722+
'appspot-2': {'2013_06_appspot_2': 1370044800},
1723+
'appspot-1': {'2013_06_appspot_1': 1370044800},
1724+
'appspot-6': {'appspot_6_2013_06': 1370044800},
1725+
'appspot-5': {'2013_06_appspot_5': 1370044800},
1726+
'appspot-4': {'2013_06_appspot_4': 1370044800},
1727+
'appspot': {'2013_05_appspot': 1367366400},
1728+
'appspot-10': {'2013_06_appspot_10': 1370044800},
1729+
'appspot-12': {'2013_06_appspot_12': 1370044800},
1730+
'appspot-11': {'2013_06_appspot_11': 1370044800},
1731+
}
1732+
tables = bq._get_all_tables('dataset', cache=False)
1733+
self.assertEquals(expected_result, tables)
1734+
1735+
def test_get_all_tables_with_cache(self):
1736+
"""Ensure get_all_tables uses cache when fetching"""
1737+
mock_execute = mock.Mock()
1738+
mock_execute.execute.return_value = FULL_TABLE_LIST_RESPONSE
1739+
1740+
mock_tables = mock.Mock()
1741+
mock_tables.list.return_value = mock_execute
1742+
1743+
mock_bq_service = mock.Mock()
1744+
mock_bq_service.tables.return_value = mock_tables
1745+
1746+
bq = client.BigQueryClient(mock_bq_service, 'project')
1747+
1748+
expected_result = {
1749+
'appspot-3': {'2013_06_appspot_3': 1370044800},
1750+
'appspot-2': {'2013_06_appspot_2': 1370044800},
1751+
'appspot-1': {'2013_06_appspot_1': 1370044800},
1752+
'appspot-6': {'appspot_6_2013_06': 1370044800},
1753+
'appspot-5': {'2013_06_appspot_5': 1370044800},
1754+
'appspot-4': {'2013_06_appspot_4': 1370044800},
1755+
'appspot': {'2013_05_appspot': 1367366400}
1756+
}
1757+
1758+
tables = bq._get_all_tables('dataset', cache=True)
1759+
self.assertEquals(expected_result, tables)
1760+
1761+
mock_execute.execute.side_effect = [NEXT_TABLE_LIST_RESPONSE,
1762+
FULL_TABLE_LIST_RESPONSE]
1763+
tables = bq._get_all_tables('dataset', cache=True)
1764+
self.assertEquals(expected_result, tables)
1765+
1766+
expected_result = {
1767+
'appspot-3': {'2013_06_appspot_3': 1370044800},
1768+
'appspot-2': {'2013_06_appspot_2': 1370044800},
1769+
'appspot-1': {'2013_06_appspot_1': 1370044800},
1770+
'appspot-6': {'appspot_6_2013_06': 1370044800},
1771+
'appspot-5': {'2013_06_appspot_5': 1370044800},
1772+
'appspot-4': {'2013_06_appspot_4': 1370044800},
1773+
'appspot': {'2013_05_appspot': 1367366400},
1774+
'appspot-10': {'2013_06_appspot_10': 1370044800},
1775+
'appspot-12': {'2013_06_appspot_12': 1370044800},
1776+
'appspot-11': {'2013_06_appspot_11': 1370044800},
1777+
}
1778+
tables = bq._get_all_tables('dataset', cache=False)
16671779
self.assertEquals(expected_result, tables)
16681780

16691781

0 commit comments

Comments
 (0)