feat: [google-cloud-speech] add API for writing BatchRecognize transcripts in SRT and VTT formats (googleapis#12344)

gcf-owl-bot[bot] · ohmayr · web-flow · commit b76625a7f009 · 2024-02-21T18:38:43.000-05:00
BEGIN_COMMIT_OVERRIDE feat: add API for writing BatchRecognize transcripts in SRT and VTT formats docs: update field documentation based on field behavior updates END_COMMIT_OVERRIDE - [ ] Regenerate this pull request now. docs: update field documentation based on field behavior updates PiperOrigin-RevId: 609024258 Source-Link: googleapis/googleapis@4d70a58 Source-Link: https://github.com/googleapis/googleapis-gen/commit/aeb46892d9543cde1b51d9eebf553d9026056292 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXNwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiYWViNDY4OTJkOTU0M2NkZTFiNTFkOWVlYmY1NTNkOTAyNjA1NjI5MiJ9 --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: ohmayr <omairnaveed@ymail.com>
diff --git a/packages/google-cloud-speech/google/cloud/speech_v2/__init__.py b/packages/google-cloud-speech/google/cloud/speech_v2/__init__.py
@@ -51,7 +51,9 @@
     ListPhraseSetsResponse,
     ListRecognizersRequest,
     ListRecognizersResponse,
+    NativeOutputFileFormatConfig,
     OperationMetadata,
+    OutputFormatConfig,
     PhraseSet,
     RecognitionConfig,
     RecognitionFeatures,
@@ -64,6 +66,7 @@
     SpeechAdaptation,
     SpeechRecognitionAlternative,
     SpeechRecognitionResult,
+    SrtOutputFileFormatConfig,
     StreamingRecognitionConfig,
     StreamingRecognitionFeatures,
     StreamingRecognitionResult,
@@ -77,6 +80,7 @@
     UpdateCustomClassRequest,
     UpdatePhraseSetRequest,
     UpdateRecognizerRequest,
+    VttOutputFileFormatConfig,
     WordInfo,
 )
 
@@ -113,7 +117,9 @@
     "ListPhraseSetsResponse",
     "ListRecognizersRequest",
     "ListRecognizersResponse",
+    "NativeOutputFileFormatConfig",
     "OperationMetadata",
+    "OutputFormatConfig",
     "PhraseSet",
     "RecognitionConfig",
     "RecognitionFeatures",
@@ -127,6 +133,7 @@
     "SpeechClient",
     "SpeechRecognitionAlternative",
     "SpeechRecognitionResult",
+    "SrtOutputFileFormatConfig",
     "StreamingRecognitionConfig",
     "StreamingRecognitionFeatures",
     "StreamingRecognitionResult",
@@ -140,5 +147,6 @@
     "UpdateCustomClassRequest",
     "UpdatePhraseSetRequest",
     "UpdateRecognizerRequest",
+    "VttOutputFileFormatConfig",
     "WordInfo",
 )
diff --git a/packages/google-cloud-speech/google/cloud/speech_v2/types/__init__.py b/packages/google-cloud-speech/google/cloud/speech_v2/types/__init__.py
@@ -45,7 +45,9 @@
     ListPhraseSetsResponse,
     ListRecognizersRequest,
     ListRecognizersResponse,
+    NativeOutputFileFormatConfig,
     OperationMetadata,
+    OutputFormatConfig,
     PhraseSet,
     RecognitionConfig,
     RecognitionFeatures,
@@ -58,6 +60,7 @@
     SpeechAdaptation,
     SpeechRecognitionAlternative,
     SpeechRecognitionResult,
+    SrtOutputFileFormatConfig,
     StreamingRecognitionConfig,
     StreamingRecognitionFeatures,
     StreamingRecognitionResult,
@@ -71,6 +74,7 @@
     UpdateCustomClassRequest,
     UpdatePhraseSetRequest,
     UpdateRecognizerRequest,
+    VttOutputFileFormatConfig,
     WordInfo,
 )
 
@@ -106,7 +110,9 @@
     "ListPhraseSetsResponse",
     "ListRecognizersRequest",
     "ListRecognizersResponse",
+    "NativeOutputFileFormatConfig",
     "OperationMetadata",
+    "OutputFormatConfig",
     "PhraseSet",
     "RecognitionConfig",
     "RecognitionFeatures",
@@ -119,6 +125,7 @@
     "SpeechAdaptation",
     "SpeechRecognitionAlternative",
     "SpeechRecognitionResult",
+    "SrtOutputFileFormatConfig",
     "StreamingRecognitionConfig",
     "StreamingRecognitionFeatures",
     "StreamingRecognitionResult",
@@ -132,5 +139,6 @@
     "UpdateCustomClassRequest",
     "UpdatePhraseSetRequest",
     "UpdateRecognizerRequest",
+    "VttOutputFileFormatConfig",
     "WordInfo",
 )
diff --git a/packages/google-cloud-speech/google/cloud/speech_v2/types/cloud_speech.py b/packages/google-cloud-speech/google/cloud/speech_v2/types/cloud_speech.py
@@ -54,6 +54,10 @@
         "BatchRecognizeRequest",
         "GcsOutputConfig",
         "InlineOutputConfig",
+        "NativeOutputFileFormatConfig",
+        "VttOutputFileFormatConfig",
+        "SrtOutputFileFormatConfig",
+        "OutputFormatConfig",
         "RecognitionOutputConfig",
         "BatchRecognizeResponse",
         "BatchRecognizeResults",
@@ -581,7 +585,8 @@ class Recognizer(proto.Message):
 
     Attributes:
         name (str):
-            Output only. The resource name of the Recognizer. Format:
+            Output only. Identifier. The resource name of the
+            Recognizer. Format:
             ``projects/{project}/locations/{location}/recognizers/{recognizer}``.
         uid (str):
             Output only. System-assigned unique
@@ -1801,6 +1806,64 @@ class InlineOutputConfig(proto.Message):
     r"""Output configurations for inline response."""
 
 
+class NativeOutputFileFormatConfig(proto.Message):
+    r"""Output configurations for serialized ``BatchRecognizeResults``
+    protos.
+
+    """
+
+
+class VttOutputFileFormatConfig(proto.Message):
+    r"""Output configurations for
+    `WebVTT <https://www.w3.org/TR/webvtt1/>`__ formatted subtitle file.
+
+    """
+
+
+class SrtOutputFileFormatConfig(proto.Message):
+    r"""Output configurations `SubRip
+    Text <https://www.matroska.org/technical/subtitles.html#srt-subtitles>`__
+    formatted subtitle file.
+
+    """
+
+
+class OutputFormatConfig(proto.Message):
+    r"""Configuration for the format of the results stored to ``output``.
+
+    Attributes:
+        native (google.cloud.speech_v2.types.NativeOutputFileFormatConfig):
+            Configuration for the native output format.
+            If this field is set or if no other output
+            format field is set then transcripts will be
+            written to the sink in the native format.
+        vtt (google.cloud.speech_v2.types.VttOutputFileFormatConfig):
+            Configuration for the vtt output format. If
+            this field is set then transcripts will be
+            written to the sink in the vtt format.
+        srt (google.cloud.speech_v2.types.SrtOutputFileFormatConfig):
+            Configuration for the srt output format. If
+            this field is set then transcripts will be
+            written to the sink in the srt format.
+    """
+
+    native: "NativeOutputFileFormatConfig" = proto.Field(
+        proto.MESSAGE,
+        number=1,
+        message="NativeOutputFileFormatConfig",
+    )
+    vtt: "VttOutputFileFormatConfig" = proto.Field(
+        proto.MESSAGE,
+        number=2,
+        message="VttOutputFileFormatConfig",
+    )
+    srt: "SrtOutputFileFormatConfig" = proto.Field(
+        proto.MESSAGE,
+        number=3,
+        message="SrtOutputFileFormatConfig",
+    )
+
+
 class RecognitionOutputConfig(proto.Message):
     r"""Configuration options for the output(s) of recognition.
 
@@ -1828,6 +1891,10 @@ class RecognitionOutputConfig(proto.Message):
             with just one audio file.
 
             This field is a member of `oneof`_ ``output``.
+        output_format_config (google.cloud.speech_v2.types.OutputFormatConfig):
+            Optional. Configuration for the format of the results stored
+            to ``output``. If unspecified transcripts will be written in
+            the ``NATIVE`` format only.
     """
 
     gcs_output_config: "GcsOutputConfig" = proto.Field(
@@ -1842,6 +1909,11 @@ class RecognitionOutputConfig(proto.Message):
         oneof="output",
         message="InlineOutputConfig",
     )
+    output_format_config: "OutputFormatConfig" = proto.Field(
+        proto.MESSAGE,
+        number=3,
+        message="OutputFormatConfig",
+    )
 
 
 class BatchRecognizeResponse(proto.Message):
@@ -1905,12 +1977,28 @@ class CloudStorageResult(proto.Message):
         uri (str):
             The Cloud Storage URI to which recognition
             results were written.
+        vtt_format_uri (str):
+            The Cloud Storage URI to which recognition results were
+            written as VTT formatted captions. This is populated only
+            when ``VTT`` output is requested.
+        srt_format_uri (str):
+            The Cloud Storage URI to which recognition results were
+            written as SRT formatted captions. This is populated only
+            when ``SRT`` output is requested.
     """
 
     uri: str = proto.Field(
         proto.STRING,
         number=1,
     )
+    vtt_format_uri: str = proto.Field(
+        proto.STRING,
+        number=2,
+    )
+    srt_format_uri: str = proto.Field(
+        proto.STRING,
+        number=3,
+    )
 
 
 class InlineResult(proto.Message):
@@ -1919,13 +2007,27 @@ class InlineResult(proto.Message):
     Attributes:
         transcript (google.cloud.speech_v2.types.BatchRecognizeResults):
             The transcript for the audio file.
+        vtt_captions (str):
+            The transcript for the audio file as VTT formatted captions.
+            This is populated only when ``VTT`` output is requested.
+        srt_captions (str):
+            The transcript for the audio file as SRT formatted captions.
+            This is populated only when ``SRT`` output is requested.
     """
 
     transcript: "BatchRecognizeResults" = proto.Field(
         proto.MESSAGE,
         number=1,
         message="BatchRecognizeResults",
     )
+    vtt_captions: str = proto.Field(
+        proto.STRING,
+        number=2,
+    )
+    srt_captions: str = proto.Field(
+        proto.STRING,
+        number=3,
+    )
 
 
 class BatchRecognizeFileResult(proto.Message):
@@ -2308,9 +2410,9 @@ class Config(proto.Message):
 
     Attributes:
         name (str):
-            Output only. The name of the config resource. There is
-            exactly one config resource per project per location. The
-            expected format is
+            Output only. Identifier. The name of the config resource.
+            There is exactly one config resource per project per
+            location. The expected format is
             ``projects/{project}/locations/{location}/config``.
         kms_key_name (str):
             Optional. An optional `KMS key
@@ -2393,14 +2495,16 @@ class CustomClass(proto.Message):
 
     Attributes:
         name (str):
-            Output only. The resource name of the CustomClass. Format:
+            Output only. Identifier. The resource name of the
+            CustomClass. Format:
             ``projects/{project}/locations/{location}/customClasses/{custom_class}``.
         uid (str):
             Output only. System-assigned unique
             identifier for the CustomClass.
         display_name (str):
-            User-settable, human-readable name for the
-            CustomClass. Must be 63 characters or less.
+            Optional. User-settable, human-readable name
+            for the CustomClass. Must be 63 characters or
+            less.
         items (MutableSequence[google.cloud.speech_v2.types.CustomClass.ClassItem]):
             A collection of class items.
         state (google.cloud.speech_v2.types.CustomClass.State):
@@ -2417,9 +2521,9 @@ class CustomClass(proto.Message):
             Output only. The time at which this resource
             will be purged.
         annotations (MutableMapping[str, str]):
-            Allows users to store small amounts of
-            arbitrary data. Both the key and the value must
-            be 63 characters or less each. At most 100
+            Optional. Allows users to store small amounts
+            of arbitrary data. Both the key and the value
+            must be 63 characters or less each. At most 100
             annotations.
         etag (str):
             Output only. This checksum is computed by the
@@ -2545,7 +2649,8 @@ class PhraseSet(proto.Message):
 
     Attributes:
         name (str):
-            Output only. The resource name of the PhraseSet. Format:
+            Output only. Identifier. The resource name of the PhraseSet.
+            Format:
             ``projects/{project}/locations/{location}/phraseSets/{phrase_set}``.
         uid (str):
             Output only. System-assigned unique