1919# To install the latest published package dependency, execute the following:
2020# pip install google-cloud-vision
2121
22+ # sample-metadata
23+ # title:
24+ # description: Perform batch file annotation
25+ # usage: python3 samples/v1/vision_batch_annotate_files_gcs.py [--storage_uri "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"]
2226import sys
2327
2428# [START vision_batch_annotate_files_gcs]
2731from google .cloud .vision_v1 import enums
2832import six
2933
30- def sample_batch_annotate_files (gcs_uri ):
31- """Perform batch file annotation"""
34+ def sample_batch_annotate_files (storage_uri ):
35+ """
36+ Perform batch file annotation
37+
38+ Args:
39+ storage_uri Cloud Storage URI to source image in the format gs://[bucket]/
40+ [file]
41+ """
3242 # [START vision_batch_annotate_files_gcs_core]
3343
3444 client = vision_v1 .ImageAnnotatorClient ()
3545
36- # gcs_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf'
46+ # storage_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf'
3747
38- if isinstance (gcs_uri , six .binary_type ):
39- gcs_uri = gcs_uri .decode ('utf-8' )
40- gcs_source = {'uri' : gcs_uri }
48+ if isinstance (storage_uri , six .binary_type ):
49+ storage_uri = storage_uri .decode ('utf-8' )
50+ gcs_source = {'uri' : storage_uri }
4151 input_config = {'gcs_source' : gcs_source }
4252 type_ = enums .Feature .Type .DOCUMENT_TEXT_DETECTION
4353 features_element = {'type' : type_ }
4454 features = [features_element ]
4555
46- # The service can process up to 5 pages per document file. Here we specify the
47- # first, second, and last page of the document to be processed.
56+ # The service can process up to 5 pages per document file.
57+ # Here we specify the first, second, and last page of the document to be
58+ # processed.
4859 pages_element = 1
4960 pages_element_2 = 2
5061 pages_element_3 = - 1
@@ -57,7 +68,6 @@ def sample_batch_annotate_files(gcs_uri):
5768 print ('Full text: {}' .format (image_response .full_text_annotation .text ))
5869 for page in image_response .full_text_annotation .pages :
5970 for block in page .blocks :
60- # The service also returns the bounding boxes for blocks, paragraphs, words, and symbols.
6171 print ('\n Block confidence: {}' .format (block .confidence ))
6272 for par in block .paragraphs :
6373 print ('\t Paragraph confidence: {}' .format (par .confidence ))
@@ -73,10 +83,10 @@ def main():
7383 import argparse
7484
7585 parser = argparse .ArgumentParser ()
76- parser .add_argument ('--gcs_uri ' , type = str , default = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' )
86+ parser .add_argument ('--storage_uri ' , type = str , default = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' )
7787 args = parser .parse_args ()
7888
79- sample_batch_annotate_files (args .gcs_uri )
89+ sample_batch_annotate_files (args .storage_uri )
8090
8191if __name__ == '__main__' :
8292 main ()
0 commit comments