@@ -121,7 +121,6 @@ def __init__(self, bq_service, project_id, swallow_results=True):
121121 self .cache = {}
122122
123123 def _submit_query_job (self , query_data ):
124-
125124 """ Submit a query job to BigQuery.
126125
127126 This is similar to BigQueryClient.query, but gives the user
@@ -172,7 +171,6 @@ def _submit_query_job(self, query_data):
172171 return job_id , [self ._transform_row (row , schema ) for row in rows ]
173172
174173 def _insert_job (self , body_object ):
175-
176174 """ Submit a job to BigQuery
177175
178176 Direct proxy to the insert() method of the offical BigQuery
@@ -243,9 +241,7 @@ def get_query_schema(self, job_id):
243241 A list of dictionaries that represent the schema.
244242 """
245243
246- job_collection = self .bigquery .jobs ()
247- query_reply = self ._get_query_results (
248- job_collection , self .project_id , job_id , offset = 0 , limit = 0 )
244+ query_reply = self .get_query_results (job_id , offset = 0 , limit = 0 )
249245
250246 if not query_reply ['jobComplete' ]:
251247 logging .warning ('BigQuery job %s not complete' % job_id )
@@ -289,38 +285,72 @@ def check_job(self, job_id):
289285 included in the query table if it has completed.
290286 """
291287
292- job_collection = self .bigquery .jobs ()
293- query_reply = self ._get_query_results (
294- job_collection , self .project_id , job_id , offset = 0 , limit = 0 )
288+ query_reply = self .get_query_results (job_id , offset = 0 , limit = 0 )
295289
296290 return (query_reply .get ('jobComplete' , False ),
297291 int (query_reply .get ('totalRows' , 0 )))
298292
299- def get_query_rows (self , job_id , offset = None , limit = None ):
293+ def get_query_rows (self , job_id , offset = None , limit = None , timeout = 0 ):
300294 """Retrieve a list of rows from a query table by job id.
295+ This method will append results from multiple pages together. If you want
296+ to manually page through results, you can use `get_query_results`
297+ method directly.
301298
302299 Args:
303300 job_id: The job id that references a BigQuery query.
304301 offset: The offset of the rows to pull from BigQuery.
305302 limit: The number of rows to retrieve from a query table.
306-
303+ timeout: Timeout in seconds.
307304 Returns:
308305 A list of dictionaries that represent table rows.
309306 """
310307
311- job_collection = self .bigquery .jobs ()
312- query_reply = self ._get_query_results (
313- job_collection , self .project_id , job_id , offset = offset ,
314- limit = limit )
315-
308+ # Get query results
309+ query_reply = self .get_query_results (job_id , offset = offset , limit = limit , timeout = timeout )
316310 if not query_reply ['jobComplete' ]:
317311 logging .warning ('BigQuery job %s not complete' % job_id )
318312 raise UnfinishedQueryException ()
319313
320- schema = query_reply [' schema' ][ ' fields' ]
314+ schema = query_reply [" schema" ][ " fields" ]
321315 rows = query_reply .get ('rows' , [])
316+ page_token = query_reply .get ("pageToken" )
317+ records = [self ._transform_row (row , schema ) for row in rows ]
318+
319+ # Append to records if there are multiple pages for query results
320+ while page_token :
321+ query_reply = self .get_query_results (job_id , offset = offset , limit = limit ,
322+ page_token = page_token , timeout = timeout )
323+ page_token = query_reply .get ("pageToken" )
324+ rows = query_reply .get ('rows' , [])
325+ records += [self ._transform_row (row , schema ) for row in rows ]
326+ return records
327+
328+ def check_dataset (self , dataset_id ):
329+ """Check to see if a dataset exists.
330+ Args:
331+ dataset: dataset unique id
332+ Returns:
333+ bool indicating if the table exists.
334+ """
335+ dataset = self .get_dataset (dataset_id )
336+ return bool (dataset )
322337
323- return [self ._transform_row (row , schema ) for row in rows ]
338+ def get_dataset (self , dataset_id ):
339+ """
340+ Retrieve a dataset if it exists, otherwise return an empty dict.
341+ Args:
342+ dataset: dataset unique id
343+ Returns:
344+ dictionary containing the dataset object if it exists, otherwise
345+ an empty dictionary
346+ """
347+ try :
348+ dataset = self .bigquery .datasets ().get (
349+ projectId = self .project_id , datasetId = dataset_id ).execute ()
350+ except HttpError :
351+ dataset = {}
352+
353+ return dataset
324354
325355 def check_table (self , dataset , table ):
326356 """Check to see if a table exists.
@@ -1039,27 +1069,28 @@ def _in_range(self, start_time, end_time, time):
10391069 time <= start_time <= time + ONE_MONTH or \
10401070 time <= end_time <= time + ONE_MONTH
10411071
1042- def _get_query_results (self , job_collection , project_id , job_id ,
1043- offset = None , limit = None ):
1044- """Execute the query job indicated by the given job id.
1072+ def get_query_results (self , job_id , offset = None , limit = None , page_token = None , timeout = 0 ):
1073+ """Execute the query job indicated by the given job id. This is direct mapping to
1074+ bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults
10451075
10461076 Args:
1047- job_collection: The collection the job belongs to.
1048- project_id: The project id of the table.
10491077 job_id: The job id of the query to check.
10501078 offset: The index the result set should start at.
10511079 limit: The maximum number of results to retrieve.
1052-
1080+ page_token: Page token, returned by a previous call, to request the next page of results.
1081+ timeout: Timeout in seconds.
10531082 Returns:
10541083 The query reply.
10551084 """
10561085
1086+ job_collection = self .bigquery .jobs ()
10571087 return job_collection .getQueryResults (
1058- projectId = project_id ,
1088+ projectId = self . project_id ,
10591089 jobId = job_id ,
10601090 startIndex = offset ,
10611091 maxResults = limit ,
1062- timeoutMs = 0 ).execute ()
1092+ pageToken = page_token ,
1093+ timeoutMs = timeout * 1000 ).execute ()
10631094
10641095 def _transform_row (self , row , schema ):
10651096 """Apply the given schema to the given BigQuery data row.
0 commit comments