Skip to content

Commit 6513ff3

Browse files
committed
Merge pull request tylertreat#56 from cpdean/refactor-unified-service-calls
factor querying and job insertion into shared method
2 parents d8c5964 + 408f6ec commit 6513ff3

File tree

1 file changed

+83
-27
lines changed

1 file changed

+83
-27
lines changed

bigquery/client.py

Lines changed: 83 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,22 @@ def __init__(self, bq_service, project_id, swallow_results=True):
120120
self.swallow_results = swallow_results
121121
self.cache = {}
122122

123-
def query(self, query, max_results=None, timeout=0, dry_run=False):
124-
"""Submit a query to BigQuery.
123+
def _submit_query_job(self, query_data):
124+
125+
""" Submit a query job to BigQuery.
126+
127+
This is similar to BigQueryClient.query, but gives the user
128+
direct access to the query method on the offical BigQuery
129+
python client.
130+
131+
For fine-grained control over a query job, see:
132+
https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query
133+
134+
125135
126136
Args:
127-
query: BigQuery query string.
128-
max_results: maximum number of rows to return per page of results.
129-
timeout: how long to wait for the query to complete, in seconds,
130-
before the request times out and returns.
131-
dry_run: if True, the query isn't actually run. A valid query will
132-
return an empty response, while an invalid one will return
133-
the same error message it would if it wasn't a dry run.
137+
query_data: query object as per "configuration.query" in
138+
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query
134139
135140
Returns:
136141
job id and query results if query completed. If dry_run is True,
@@ -141,21 +146,15 @@ def query(self, query, max_results=None, timeout=0, dry_run=False):
141146
BigQueryTimeoutException on timeout
142147
"""
143148

144-
logging.debug('Executing query: %s' % query)
149+
logging.debug('Submitting query job: %s' % query_data)
145150

146151
job_collection = self.bigquery.jobs()
147-
query_data = {
148-
'query': query,
149-
'timeoutMs': timeout * 1000,
150-
'dryRun': dry_run,
151-
'maxResults': max_results,
152-
}
153152

154153
try:
155154
query_reply = job_collection.query(
156155
projectId=self.project_id, body=query_data).execute()
157156
except HttpError as e:
158-
if dry_run:
157+
if query_data.get("dryRun", False):
159158
return None, json.loads(e.content)
160159
raise
161160

@@ -166,12 +165,75 @@ def query(self, query, max_results=None, timeout=0, dry_run=False):
166165

167166
# raise exceptions if it's not an async query
168167
# and job is not completed after timeout
169-
if not job_complete and timeout:
168+
if not job_complete and query_data.get("timeoutMs", False):
170169
logging.error('BigQuery job %s timeout' % job_id)
171170
raise BigQueryTimeoutException()
172171

173172
return job_id, [self._transform_row(row, schema) for row in rows]
174173

174+
def _insert_job(self, body_object):
175+
176+
""" Submit a job to BigQuery
177+
178+
Direct proxy to the insert() method of the offical BigQuery
179+
python client.
180+
181+
Able to submit load, link, query, copy, or extract jobs.
182+
183+
For more details, see:
184+
https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert
185+
186+
187+
Args:
188+
body_object: body object passed to bigquery.jobs().insert()
189+
190+
Returns:
191+
response of the bigquery.jobs().insert().execute() call
192+
193+
Raises:
194+
BigQueryTimeoutException on timeout
195+
"""
196+
197+
logging.debug('Submitting job: %s' % body_object)
198+
199+
job_collection = self.bigquery.jobs()
200+
201+
return job_collection.insert(
202+
projectId=self.project_id,
203+
body=body_object
204+
).execute()
205+
206+
def query(self, query, max_results=None, timeout=0, dry_run=False):
207+
"""Submit a query to BigQuery.
208+
209+
Args:
210+
query: BigQuery query string.
211+
max_results: maximum number of rows to return per page of results.
212+
timeout: how long to wait for the query to complete, in seconds,
213+
before the request times out and returns.
214+
dry_run: if True, the query isn't actually run. A valid query will
215+
return an empty response, while an invalid one will return
216+
the same error message it would if it wasn't a dry run.
217+
218+
Returns:
219+
job id and query results if query completed. If dry_run is True,
220+
job id will be None and results will be empty if the query is valid
221+
or a dict containing the response if invalid.
222+
223+
Raises:
224+
BigQueryTimeoutException on timeout
225+
"""
226+
227+
logging.debug('Executing query: %s' % query)
228+
229+
query_data = {
230+
'query': query,
231+
'timeoutMs': timeout * 1000,
232+
'dryRun': dry_run,
233+
'maxResults': max_results,
234+
}
235+
return self._submit_query_job(query_data)
236+
175237
def get_query_schema(self, job_id):
176238
"""Retrieve the schema of a query by job id.
177239
@@ -534,9 +596,7 @@ def import_data_from_uris(
534596
}
535597

536598
logging.debug("Creating load job %s" % body)
537-
job_resource = self.bigquery.jobs() \
538-
.insert(projectId=self.project_id, body=body) \
539-
.execute()
599+
job_resource = self._insert_job(body)
540600
self._raise_insert_exception_if_error(job_resource)
541601
return job_resource
542602

@@ -620,9 +680,7 @@ def export_data_to_uris(
620680
}
621681

622682
logging.info("Creating export job %s" % body)
623-
job_resource = self.bigquery.jobs() \
624-
.insert(projectId=self.project_id, body=body) \
625-
.execute()
683+
job_resource = self._insert_job(body)
626684
self._raise_insert_exception_if_error(job_resource)
627685
return job_resource
628686

@@ -696,9 +754,7 @@ def write_to_table(
696754
}
697755

698756
logging.info("Creating write to table job %s" % body)
699-
job_resource = self.bigquery.jobs() \
700-
.insert(projectId=self.project_id, body=body) \
701-
.execute()
757+
job_resource = self._insert_job(body)
702758
self._raise_insert_exception_if_error(job_resource)
703759
return job_resource
704760

0 commit comments

Comments
 (0)