@@ -120,17 +120,22 @@ def __init__(self, bq_service, project_id, swallow_results=True):
120120 self .swallow_results = swallow_results
121121 self .cache = {}
122122
123- def query (self , query , max_results = None , timeout = 0 , dry_run = False ):
124- """Submit a query to BigQuery.
123+ def _submit_query_job (self , query_data ):
124+
125+ """ Submit a query job to BigQuery.
126+
127+ This is similar to BigQueryClient.query, but gives the user
128+ direct access to the query method on the offical BigQuery
129+ python client.
130+
131+ For fine-grained control over a query job, see:
132+ https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query
133+
134+
125135
126136 Args:
127- query: BigQuery query string.
128- max_results: maximum number of rows to return per page of results.
129- timeout: how long to wait for the query to complete, in seconds,
130- before the request times out and returns.
131- dry_run: if True, the query isn't actually run. A valid query will
132- return an empty response, while an invalid one will return
133- the same error message it would if it wasn't a dry run.
137+ query_data: query object as per "configuration.query" in
138+ https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query
134139
135140 Returns:
136141 job id and query results if query completed. If dry_run is True,
@@ -141,21 +146,15 @@ def query(self, query, max_results=None, timeout=0, dry_run=False):
141146 BigQueryTimeoutException on timeout
142147 """
143148
144- logging .debug ('Executing query: %s' % query )
149+ logging .debug ('Submitting query job : %s' % query_data )
145150
146151 job_collection = self .bigquery .jobs ()
147- query_data = {
148- 'query' : query ,
149- 'timeoutMs' : timeout * 1000 ,
150- 'dryRun' : dry_run ,
151- 'maxResults' : max_results ,
152- }
153152
154153 try :
155154 query_reply = job_collection .query (
156155 projectId = self .project_id , body = query_data ).execute ()
157156 except HttpError as e :
158- if dry_run :
157+ if query_data . get ( "dryRun" , False ) :
159158 return None , json .loads (e .content )
160159 raise
161160
@@ -166,12 +165,75 @@ def query(self, query, max_results=None, timeout=0, dry_run=False):
166165
167166 # raise exceptions if it's not an async query
168167 # and job is not completed after timeout
169- if not job_complete and timeout :
168+ if not job_complete and query_data . get ( "timeoutMs" , False ) :
170169 logging .error ('BigQuery job %s timeout' % job_id )
171170 raise BigQueryTimeoutException ()
172171
173172 return job_id , [self ._transform_row (row , schema ) for row in rows ]
174173
174+ def _insert_job (self , body_object ):
175+
176+ """ Submit a job to BigQuery
177+
178+ Direct proxy to the insert() method of the offical BigQuery
179+ python client.
180+
181+ Able to submit load, link, query, copy, or extract jobs.
182+
183+ For more details, see:
184+ https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert
185+
186+
187+ Args:
188+ body_object: body object passed to bigquery.jobs().insert()
189+
190+ Returns:
191+ response of the bigquery.jobs().insert().execute() call
192+
193+ Raises:
194+ BigQueryTimeoutException on timeout
195+ """
196+
197+ logging .debug ('Submitting job: %s' % body_object )
198+
199+ job_collection = self .bigquery .jobs ()
200+
201+ return job_collection .insert (
202+ projectId = self .project_id ,
203+ body = body_object
204+ ).execute ()
205+
206+ def query (self , query , max_results = None , timeout = 0 , dry_run = False ):
207+ """Submit a query to BigQuery.
208+
209+ Args:
210+ query: BigQuery query string.
211+ max_results: maximum number of rows to return per page of results.
212+ timeout: how long to wait for the query to complete, in seconds,
213+ before the request times out and returns.
214+ dry_run: if True, the query isn't actually run. A valid query will
215+ return an empty response, while an invalid one will return
216+ the same error message it would if it wasn't a dry run.
217+
218+ Returns:
219+ job id and query results if query completed. If dry_run is True,
220+ job id will be None and results will be empty if the query is valid
221+ or a dict containing the response if invalid.
222+
223+ Raises:
224+ BigQueryTimeoutException on timeout
225+ """
226+
227+ logging .debug ('Executing query: %s' % query )
228+
229+ query_data = {
230+ 'query' : query ,
231+ 'timeoutMs' : timeout * 1000 ,
232+ 'dryRun' : dry_run ,
233+ 'maxResults' : max_results ,
234+ }
235+ return self ._submit_query_job (query_data )
236+
175237 def get_query_schema (self , job_id ):
176238 """Retrieve the schema of a query by job id.
177239
@@ -534,9 +596,7 @@ def import_data_from_uris(
534596 }
535597
536598 logging .debug ("Creating load job %s" % body )
537- job_resource = self .bigquery .jobs () \
538- .insert (projectId = self .project_id , body = body ) \
539- .execute ()
599+ job_resource = self ._insert_job (body )
540600 self ._raise_insert_exception_if_error (job_resource )
541601 return job_resource
542602
@@ -620,9 +680,7 @@ def export_data_to_uris(
620680 }
621681
622682 logging .info ("Creating export job %s" % body )
623- job_resource = self .bigquery .jobs () \
624- .insert (projectId = self .project_id , body = body ) \
625- .execute ()
683+ job_resource = self ._insert_job (body )
626684 self ._raise_insert_exception_if_error (job_resource )
627685 return job_resource
628686
@@ -696,9 +754,7 @@ def write_to_table(
696754 }
697755
698756 logging .info ("Creating write to table job %s" % body )
699- job_resource = self .bigquery .jobs () \
700- .insert (projectId = self .project_id , body = body ) \
701- .execute ()
757+ job_resource = self ._insert_job (body )
702758 self ._raise_insert_exception_if_error (job_resource )
703759 return job_resource
704760
0 commit comments