Skip to content

Commit f09f4da

Browse files
committed
Merge branch 'master' into render_query_modifications
2 parents 2d3a08b + 9deda4e commit f09f4da

File tree

4 files changed

+342
-8
lines changed

4 files changed

+342
-8
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ key = 'key.pem'
2626
client = get_client(project_id, service_account=service_account,
2727
private_key_file=key, readonly=True)
2828

29+
# JSON key provided by Google
30+
json_key = 'key.json'
31+
32+
client = get_client(project_id, json_key_file=json_key, readonly=True)
33+
2934
# Submit an async query.
3035
job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000')
3136

@@ -182,6 +187,34 @@ try:
182187
except BigQueryTimeoutException:
183188
print "Timeout"
184189

190+
# write to permanent table with UDF in query string
191+
external_udf_uris = ["gs://bigquery-sandbox-udf/url_decode.js"]
192+
query = """SELECT requests, title
193+
FROM
194+
urlDecode(
195+
SELECT
196+
title, sum(requests) AS num_requests
197+
FROM
198+
[fh-bigquery:wikipedia.pagecounts_201504]
199+
WHERE language = 'fr'
200+
GROUP EACH BY title
201+
)
202+
WHERE title LIKE '%ç%'
203+
ORDER BY requests DESC
204+
LIMIT 100
205+
"""
206+
job = client.write_to_table(
207+
query,
208+
'dataset',
209+
'table',
210+
external_udf_uris=external_udf_uris
211+
)
212+
213+
try:
214+
job_resource = client.wait_for_job(job, timeout=60)
215+
print job_resource
216+
except BigQueryTimeoutException:
217+
print "Timeout"
185218

186219
# write to temporary table
187220
job = client.write_to_table('SELECT * FROM dataset.original_table LIMIT 100')
@@ -190,6 +223,8 @@ try:
190223
print job_resource
191224
except BigQueryTimeoutException:
192225
print "Timeout"
226+
227+
193228
```
194229

195230
# Import data from Google cloud storage

bigquery/client.py

Lines changed: 119 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@
4444

4545

4646
def get_client(project_id, credentials=None, service_account=None,
47-
private_key=None, private_key_file=None, readonly=True,
48-
swallow_results=True):
47+
private_key=None, private_key_file=None,
48+
json_key=None, json_key_file=None,
49+
readonly=True, swallow_results=True):
4950
"""Return a singleton instance of BigQueryClient. Either
5051
AssertionCredentials or a service account and private key combination need
5152
to be provided in order to authenticate requests to BigQuery.
@@ -60,6 +61,9 @@ def get_client(project_id, credentials=None, service_account=None,
6061
private_key_file: the name of the file containing the private key
6162
associated with the service account in PKCS12 or PEM
6263
format.
64+
json_key: the JSON key associated with the service account
65+
json_key_file: the name of the JSON key file associated with
66+
the service account
6367
readonly: bool indicating if BigQuery access is read-only. Has no
6468
effect if credentials are provided.
6569
swallow_results: If set to false then return the actual response value
@@ -70,13 +74,21 @@ def get_client(project_id, credentials=None, service_account=None,
7074
"""
7175

7276
if not credentials:
73-
assert service_account and (private_key or private_key_file), \
74-
'Must provide AssertionCredentials or service account and key'
77+
assert (service_account and (private_key or private_key_file)) or (json_key or json_key_file), \
78+
'Must provide AssertionCredentials or service account and P12 key or JSON key'
7579

7680
if private_key_file:
7781
with open(private_key_file, 'rb') as key_file:
7882
private_key = key_file.read()
7983

84+
if json_key_file:
85+
with open(json_key_file, 'r') as key_file:
86+
json_key = json.load(key_file)
87+
88+
if json_key:
89+
service_account = json_key['client_email']
90+
private_key = json_key['private_key']
91+
8092
bq_service = _get_bq_service(credentials=credentials,
8193
service_account=service_account,
8294
private_key=private_key,
@@ -315,13 +327,13 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0):
315327
records = [self._transform_row(row, schema) for row in rows]
316328

317329
# Append to records if there are multiple pages for query results
318-
while page_token:
330+
while page_token and (not limit or len(records) < limit):
319331
query_reply = self.get_query_results(job_id, offset=offset, limit=limit,
320332
page_token=page_token, timeout=timeout)
321333
page_token = query_reply.get("pageToken")
322334
rows = query_reply.get('rows', [])
323335
records += [self._transform_row(row, schema) for row in rows]
324-
return records
336+
return records[:limit] if limit else records
325337

326338
def check_dataset(self, dataset_id):
327339
"""Check to see if a dataset exists.
@@ -529,6 +541,90 @@ def create_table(self, dataset, table, schema, expiration_time=None):
529541
else:
530542
return {}
531543

544+
def update_table(self, dataset, table, schema):
545+
"""Update an existing table in the dataset.
546+
547+
Args:
548+
dataset: the dataset to update the table in.
549+
table: the name of table to update.
550+
schema: table schema dict.
551+
552+
Returns:
553+
bool indicating if the table was successfully updated or not,
554+
or response from BigQuery if swallow_results is set for False.
555+
"""
556+
557+
body = {
558+
'schema': {'fields': schema},
559+
'tableReference': {
560+
'tableId': table,
561+
'projectId': self.project_id,
562+
'datasetId': dataset
563+
}
564+
}
565+
566+
try:
567+
result = self.bigquery.tables().update(
568+
projectId=self.project_id,
569+
datasetId=dataset,
570+
body=body
571+
).execute()
572+
if self.swallow_results:
573+
return True
574+
else:
575+
return result
576+
577+
except HttpError as e:
578+
logging.error(('Cannot update table {0}.{1}\n'
579+
'Http Error: {2}').format(dataset, table,
580+
e.content))
581+
if self.swallow_results:
582+
return False
583+
else:
584+
return {}
585+
586+
def patch_table(self, dataset, table, schema):
587+
"""Patch an existing table in the dataset.
588+
589+
Args:
590+
dataset: the dataset to patch the table in.
591+
table: the name of table to patch.
592+
schema: table schema dict.
593+
594+
Returns:
595+
bool indicating if the table was successfully patched or not,
596+
or response from BigQuery if swallow_results is set for False.
597+
"""
598+
599+
body = {
600+
'schema': {'fields': schema},
601+
'tableReference': {
602+
'tableId': table,
603+
'projectId': self.project_id,
604+
'datasetId': dataset
605+
}
606+
}
607+
608+
try:
609+
result = self.bigquery.tables().patch(
610+
projectId=self.project_id,
611+
datasetId=dataset,
612+
body=body
613+
).execute()
614+
if self.swallow_results:
615+
return True
616+
else:
617+
return result
618+
619+
except HttpError as e:
620+
logging.error(('Cannot patch table {0}.{1}\n'
621+
'Http Error: {2}').format(dataset, table,
622+
e.content))
623+
if self.swallow_results:
624+
return False
625+
else:
626+
return {}
627+
532628
def create_view(self, dataset, view, query):
533629
"""Create a new view in the dataset.
534630
@@ -864,6 +960,7 @@ def write_to_table(
864960
query,
865961
dataset=None,
866962
table=None,
963+
external_udf_uris=[],
867964
allow_large_results=None,
868965
use_query_cache=None,
869966
priority=None,
@@ -877,6 +974,11 @@ def write_to_table(
877974
query: required BigQuery query string.
878975
dataset: optional string id of the dataset
879976
table: optional string id of the table
977+
external_udf_uris: optional list of external UDF URIs
978+
(if given,
979+
URIs must be Google Cloud Storage
980+
and have .js extensions
981+
)
880982
allow_large_results: optional boolean
881983
use_query_cache: optional boolean
882984
priority: optional string
@@ -922,6 +1024,14 @@ def write_to_table(
9221024
if write_disposition:
9231025
configuration['writeDisposition'] = write_disposition
9241026

1027+
configuration['userDefinedFunctionResources'] = []
1028+
for external_udf_uri in external_udf_uris:
1029+
configuration['userDefinedFunctionResources'].append(
1030+
{
1031+
"resourceUri": external_udf_uri
1032+
}
1033+
)
1034+
9251035
body = {
9261036
"configuration": {
9271037
'query': configuration
@@ -1234,6 +1344,9 @@ def _transform_row(self, row, schema):
12341344
elif col_dict['type'] == 'BOOLEAN':
12351345
row_value = row_value in ('True', 'true', 'TRUE')
12361346

1347+
elif col_dict['type'] == 'TIMESTAMP':
1348+
row_value = float(row_value)
1349+
12371350
log[col_name] = row_value
12381351

12391352
return log

0 commit comments

Comments
 (0)