Merge pull request tylertreat#38 from paulw54jrn/master

tylertreat · tylertreat · commit e6d2bb25c2d0 · 2014-12-22T23:02:31.000-06:00
FIX: get_all_page fetch all tables according to NEXT_PAGE_TOKEN
diff --git a/bigquery/client.py b/bigquery/client.py
@@ -1,6 +1,6 @@
 import calendar
 from collections import defaultdict
-from datetime import datetime
+from datetime import datetime, timedelta
 from time import sleep
 from time import time
 from hashlib import sha256
@@ -20,6 +20,8 @@
 BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery'
 BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly'
 
+CACHE_TIMEOUT = timedelta(seconds=30)
+
 JOB_CREATE_IF_NEEDED = 'CREATE_IF_NEEDED'
 JOB_CREATE_NEVER = 'CREATE_NEVER'
 JOB_WRITE_TRUNCATE = 'WRITE_TRUNCATE'
@@ -107,6 +109,7 @@ def __init__(self, bq_service, project_id, swallow_results=True):
         self.bigquery = bq_service
         self.project_id = project_id
         self.swallow_results = swallow_results
+        self.cache = {}
 
     def query(self, query, max_results=None, timeout=0, dry_run=False):
         """Submit a query to BigQuery.
@@ -790,19 +793,37 @@ def push_rows(self, dataset, table, rows, insert_id_key=None):
                     }]
                 }
 
-    def _get_all_tables(self, dataset_id):
+    def _get_all_tables(self, dataset_id, cache=False):
         """Retrieve a list of all tables for the dataset.
 
         Args:
             dataset_id: the dataset to retrieve table names for.
-
+            cache: To use cached value or not. Timeout value
+                   equals CACHE_TIMEOUT.
         Returns:
             a dictionary of app ids mapped to their table names.
         """
-
-        result = self.bigquery.tables().list(
-            projectId=self.project_id,
-            datasetId=dataset_id).execute()
+        do_fetch = True
+        if cache and self.cache.get(dataset_id):
+            time, result = self.cache.get(dataset_id)
+            if datetime.now() - time < CACHE_TIMEOUT:
+                do_fetch = False
+
+        if do_fetch:
+            result = self.bigquery.tables().list(
+                projectId=self.project_id,
+                datasetId=dataset_id).execute()
+
+            page_token = result.get('nextPageToken')
+            while page_token:
+                res = self.bigquery.tables().list(
+                    projectId=self.project_id,
+                    datasetId=dataset_id,
+                    pageToken=page_token
+                    ).execute()
+                page_token = res.get('nextPageToken')
+                result['tables'] += res.get('tables', [])
+            self.cache[dataset_id] = (datetime.now(), result)
 
         return self._parse_table_list_response(result)
 
diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py
@@ -1028,6 +1028,42 @@ def test_not_inside_range(self):
         self.assertEqual([], tables)
 
 
+NEXT_TABLE_LIST_RESPONSE = {
+    "kind": "bigquery#tableList",
+    "etag": "\"t_UlB9a9mrx5sjQInRGzeDrLrS0/TsIP_i4gAeLegj84WzkPzBPIkjo\"",
+    "nextPageToken": "2013_05_appspot_1",
+    "tables": [
+        {
+            "kind": "bigquery#table",
+            "id": "project:dataset.2013_06_appspot_10",
+            "tableReference": {
+                "projectId": "project",
+                "datasetId": "dataset",
+                "tableId": "2013_06_appspot_10"
+            }
+        },
+        {
+            "kind": "bigquery#table",
+            "id": "project:dataset.2013_06_appspot_11",
+            "tableReference": {
+                "projectId": "project",
+                "datasetId": "dataset",
+                "tableId": "2013_06_appspot_11"
+            }
+        },
+        {
+            "kind": "bigquery#table",
+            "id": "project:dataset.2013_06_appspot_12",
+            "tableReference": {
+                "projectId": "project",
+                "datasetId": "dataset",
+                "tableId": "2013_06_appspot_12"
+            }
+        },
+    ],
+    "totalItems": 3
+}
+
 FULL_TABLE_LIST_RESPONSE = {
     "kind": "bigquery#tableList",
     "etag": "\"GSclnjk0zID1ucM3F-xYinOm1oE/cn58Rpu8v8pB4eoJQaiTe11lPQc\"",
@@ -1663,7 +1699,83 @@ def test_get_tables(self):
             'appspot': {'2013_05_appspot': 1367366400}
         }
 
-        tables = bq._get_all_tables('dataset')
+        tables = bq._get_all_tables('dataset', cache=False)
+        self.assertEquals(expected_result, tables)
+
+    def test_get_all_tables_with_page_token(self):
+        """Ensure get_all_tables fetches all tables names from BigQuery"""
+
+        mock_execute = mock.Mock()
+        mock_execute.execute.side_effect = [NEXT_TABLE_LIST_RESPONSE,
+                                            FULL_TABLE_LIST_RESPONSE]
+
+        mock_tables = mock.Mock()
+        mock_tables.list.return_value = mock_execute
+
+        mock_bq_service = mock.Mock()
+        mock_bq_service.tables.return_value = mock_tables
+
+        bq = client.BigQueryClient(mock_bq_service, 'project')
+
+        expected_result = {
+            'appspot-3': {'2013_06_appspot_3': 1370044800},
+            'appspot-2': {'2013_06_appspot_2': 1370044800},
+            'appspot-1': {'2013_06_appspot_1': 1370044800},
+            'appspot-6': {'appspot_6_2013_06': 1370044800},
+            'appspot-5': {'2013_06_appspot_5': 1370044800},
+            'appspot-4': {'2013_06_appspot_4': 1370044800},
+            'appspot': {'2013_05_appspot': 1367366400},
+            'appspot-10': {'2013_06_appspot_10': 1370044800},
+            'appspot-12': {'2013_06_appspot_12': 1370044800},
+            'appspot-11': {'2013_06_appspot_11': 1370044800},
+        }
+        tables = bq._get_all_tables('dataset', cache=False)
+        self.assertEquals(expected_result, tables)
+
+    def test_get_all_tables_with_cache(self):
+        """Ensure get_all_tables uses cache when fetching"""
+        mock_execute = mock.Mock()
+        mock_execute.execute.return_value = FULL_TABLE_LIST_RESPONSE
+
+        mock_tables = mock.Mock()
+        mock_tables.list.return_value = mock_execute
+
+        mock_bq_service = mock.Mock()
+        mock_bq_service.tables.return_value = mock_tables
+
+        bq = client.BigQueryClient(mock_bq_service, 'project')
+
+        expected_result = {
+            'appspot-3': {'2013_06_appspot_3': 1370044800},
+            'appspot-2': {'2013_06_appspot_2': 1370044800},
+            'appspot-1': {'2013_06_appspot_1': 1370044800},
+            'appspot-6': {'appspot_6_2013_06': 1370044800},
+            'appspot-5': {'2013_06_appspot_5': 1370044800},
+            'appspot-4': {'2013_06_appspot_4': 1370044800},
+            'appspot': {'2013_05_appspot': 1367366400}
+        }
+
+        tables = bq._get_all_tables('dataset', cache=True)
+        self.assertEquals(expected_result, tables)
+
+        mock_execute.execute.side_effect = [NEXT_TABLE_LIST_RESPONSE,
+                                            FULL_TABLE_LIST_RESPONSE]
+        tables = bq._get_all_tables('dataset', cache=True)
+        self.assertEquals(expected_result, tables)
+
+        expected_result = {
+            'appspot-3': {'2013_06_appspot_3': 1370044800},
+            'appspot-2': {'2013_06_appspot_2': 1370044800},
+            'appspot-1': {'2013_06_appspot_1': 1370044800},
+            'appspot-6': {'appspot_6_2013_06': 1370044800},
+            'appspot-5': {'2013_06_appspot_5': 1370044800},
+            'appspot-4': {'2013_06_appspot_4': 1370044800},
+            'appspot': {'2013_05_appspot': 1367366400},
+            'appspot-10': {'2013_06_appspot_10': 1370044800},
+            'appspot-12': {'2013_06_appspot_12': 1370044800},
+            'appspot-11': {'2013_06_appspot_11': 1370044800},
+        }
+        tables = bq._get_all_tables('dataset', cache=False)
         self.assertEquals(expected_result, tables)