Skip to content

Commit b76625a

Browse files
feat: [google-cloud-speech] add API for writing BatchRecognize transcripts in SRT and VTT formats (googleapis#12344)
BEGIN_COMMIT_OVERRIDE feat: add API for writing BatchRecognize transcripts in SRT and VTT formats docs: update field documentation based on field behavior updates END_COMMIT_OVERRIDE - [ ] Regenerate this pull request now. docs: update field documentation based on field behavior updates PiperOrigin-RevId: 609024258 Source-Link: googleapis/googleapis@4d70a58 Source-Link: https://github.com/googleapis/googleapis-gen/commit/aeb46892d9543cde1b51d9eebf553d9026056292 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXNwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiYWViNDY4OTJkOTU0M2NkZTFiNTFkOWVlYmY1NTNkOTAyNjA1NjI5MiJ9 --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: ohmayr <omairnaveed@ymail.com>
1 parent 8ff0f9e commit b76625a

3 files changed

Lines changed: 132 additions & 11 deletions

File tree

packages/google-cloud-speech/google/cloud/speech_v2/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@
5151
ListPhraseSetsResponse,
5252
ListRecognizersRequest,
5353
ListRecognizersResponse,
54+
NativeOutputFileFormatConfig,
5455
OperationMetadata,
56+
OutputFormatConfig,
5557
PhraseSet,
5658
RecognitionConfig,
5759
RecognitionFeatures,
@@ -64,6 +66,7 @@
6466
SpeechAdaptation,
6567
SpeechRecognitionAlternative,
6668
SpeechRecognitionResult,
69+
SrtOutputFileFormatConfig,
6770
StreamingRecognitionConfig,
6871
StreamingRecognitionFeatures,
6972
StreamingRecognitionResult,
@@ -77,6 +80,7 @@
7780
UpdateCustomClassRequest,
7881
UpdatePhraseSetRequest,
7982
UpdateRecognizerRequest,
83+
VttOutputFileFormatConfig,
8084
WordInfo,
8185
)
8286

@@ -113,7 +117,9 @@
113117
"ListPhraseSetsResponse",
114118
"ListRecognizersRequest",
115119
"ListRecognizersResponse",
120+
"NativeOutputFileFormatConfig",
116121
"OperationMetadata",
122+
"OutputFormatConfig",
117123
"PhraseSet",
118124
"RecognitionConfig",
119125
"RecognitionFeatures",
@@ -127,6 +133,7 @@
127133
"SpeechClient",
128134
"SpeechRecognitionAlternative",
129135
"SpeechRecognitionResult",
136+
"SrtOutputFileFormatConfig",
130137
"StreamingRecognitionConfig",
131138
"StreamingRecognitionFeatures",
132139
"StreamingRecognitionResult",
@@ -140,5 +147,6 @@
140147
"UpdateCustomClassRequest",
141148
"UpdatePhraseSetRequest",
142149
"UpdateRecognizerRequest",
150+
"VttOutputFileFormatConfig",
143151
"WordInfo",
144152
)

packages/google-cloud-speech/google/cloud/speech_v2/types/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
ListPhraseSetsResponse,
4646
ListRecognizersRequest,
4747
ListRecognizersResponse,
48+
NativeOutputFileFormatConfig,
4849
OperationMetadata,
50+
OutputFormatConfig,
4951
PhraseSet,
5052
RecognitionConfig,
5153
RecognitionFeatures,
@@ -58,6 +60,7 @@
5860
SpeechAdaptation,
5961
SpeechRecognitionAlternative,
6062
SpeechRecognitionResult,
63+
SrtOutputFileFormatConfig,
6164
StreamingRecognitionConfig,
6265
StreamingRecognitionFeatures,
6366
StreamingRecognitionResult,
@@ -71,6 +74,7 @@
7174
UpdateCustomClassRequest,
7275
UpdatePhraseSetRequest,
7376
UpdateRecognizerRequest,
77+
VttOutputFileFormatConfig,
7478
WordInfo,
7579
)
7680

@@ -106,7 +110,9 @@
106110
"ListPhraseSetsResponse",
107111
"ListRecognizersRequest",
108112
"ListRecognizersResponse",
113+
"NativeOutputFileFormatConfig",
109114
"OperationMetadata",
115+
"OutputFormatConfig",
110116
"PhraseSet",
111117
"RecognitionConfig",
112118
"RecognitionFeatures",
@@ -119,6 +125,7 @@
119125
"SpeechAdaptation",
120126
"SpeechRecognitionAlternative",
121127
"SpeechRecognitionResult",
128+
"SrtOutputFileFormatConfig",
122129
"StreamingRecognitionConfig",
123130
"StreamingRecognitionFeatures",
124131
"StreamingRecognitionResult",
@@ -132,5 +139,6 @@
132139
"UpdateCustomClassRequest",
133140
"UpdatePhraseSetRequest",
134141
"UpdateRecognizerRequest",
142+
"VttOutputFileFormatConfig",
135143
"WordInfo",
136144
)

packages/google-cloud-speech/google/cloud/speech_v2/types/cloud_speech.py

Lines changed: 116 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@
5454
"BatchRecognizeRequest",
5555
"GcsOutputConfig",
5656
"InlineOutputConfig",
57+
"NativeOutputFileFormatConfig",
58+
"VttOutputFileFormatConfig",
59+
"SrtOutputFileFormatConfig",
60+
"OutputFormatConfig",
5761
"RecognitionOutputConfig",
5862
"BatchRecognizeResponse",
5963
"BatchRecognizeResults",
@@ -581,7 +585,8 @@ class Recognizer(proto.Message):
581585
582586
Attributes:
583587
name (str):
584-
Output only. The resource name of the Recognizer. Format:
588+
Output only. Identifier. The resource name of the
589+
Recognizer. Format:
585590
``projects/{project}/locations/{location}/recognizers/{recognizer}``.
586591
uid (str):
587592
Output only. System-assigned unique
@@ -1801,6 +1806,64 @@ class InlineOutputConfig(proto.Message):
18011806
r"""Output configurations for inline response."""
18021807

18031808

1809+
class NativeOutputFileFormatConfig(proto.Message):
1810+
r"""Output configurations for serialized ``BatchRecognizeResults``
1811+
protos.
1812+
1813+
"""
1814+
1815+
1816+
class VttOutputFileFormatConfig(proto.Message):
1817+
r"""Output configurations for
1818+
`WebVTT <https://www.w3.org/TR/webvtt1/>`__ formatted subtitle file.
1819+
1820+
"""
1821+
1822+
1823+
class SrtOutputFileFormatConfig(proto.Message):
1824+
r"""Output configurations `SubRip
1825+
Text <https://www.matroska.org/technical/subtitles.html#srt-subtitles>`__
1826+
formatted subtitle file.
1827+
1828+
"""
1829+
1830+
1831+
class OutputFormatConfig(proto.Message):
1832+
r"""Configuration for the format of the results stored to ``output``.
1833+
1834+
Attributes:
1835+
native (google.cloud.speech_v2.types.NativeOutputFileFormatConfig):
1836+
Configuration for the native output format.
1837+
If this field is set or if no other output
1838+
format field is set then transcripts will be
1839+
written to the sink in the native format.
1840+
vtt (google.cloud.speech_v2.types.VttOutputFileFormatConfig):
1841+
Configuration for the vtt output format. If
1842+
this field is set then transcripts will be
1843+
written to the sink in the vtt format.
1844+
srt (google.cloud.speech_v2.types.SrtOutputFileFormatConfig):
1845+
Configuration for the srt output format. If
1846+
this field is set then transcripts will be
1847+
written to the sink in the srt format.
1848+
"""
1849+
1850+
native: "NativeOutputFileFormatConfig" = proto.Field(
1851+
proto.MESSAGE,
1852+
number=1,
1853+
message="NativeOutputFileFormatConfig",
1854+
)
1855+
vtt: "VttOutputFileFormatConfig" = proto.Field(
1856+
proto.MESSAGE,
1857+
number=2,
1858+
message="VttOutputFileFormatConfig",
1859+
)
1860+
srt: "SrtOutputFileFormatConfig" = proto.Field(
1861+
proto.MESSAGE,
1862+
number=3,
1863+
message="SrtOutputFileFormatConfig",
1864+
)
1865+
1866+
18041867
class RecognitionOutputConfig(proto.Message):
18051868
r"""Configuration options for the output(s) of recognition.
18061869
@@ -1828,6 +1891,10 @@ class RecognitionOutputConfig(proto.Message):
18281891
with just one audio file.
18291892
18301893
This field is a member of `oneof`_ ``output``.
1894+
output_format_config (google.cloud.speech_v2.types.OutputFormatConfig):
1895+
Optional. Configuration for the format of the results stored
1896+
to ``output``. If unspecified transcripts will be written in
1897+
the ``NATIVE`` format only.
18311898
"""
18321899

18331900
gcs_output_config: "GcsOutputConfig" = proto.Field(
@@ -1842,6 +1909,11 @@ class RecognitionOutputConfig(proto.Message):
18421909
oneof="output",
18431910
message="InlineOutputConfig",
18441911
)
1912+
output_format_config: "OutputFormatConfig" = proto.Field(
1913+
proto.MESSAGE,
1914+
number=3,
1915+
message="OutputFormatConfig",
1916+
)
18451917

18461918

18471919
class BatchRecognizeResponse(proto.Message):
@@ -1905,12 +1977,28 @@ class CloudStorageResult(proto.Message):
19051977
uri (str):
19061978
The Cloud Storage URI to which recognition
19071979
results were written.
1980+
vtt_format_uri (str):
1981+
The Cloud Storage URI to which recognition results were
1982+
written as VTT formatted captions. This is populated only
1983+
when ``VTT`` output is requested.
1984+
srt_format_uri (str):
1985+
The Cloud Storage URI to which recognition results were
1986+
written as SRT formatted captions. This is populated only
1987+
when ``SRT`` output is requested.
19081988
"""
19091989

19101990
uri: str = proto.Field(
19111991
proto.STRING,
19121992
number=1,
19131993
)
1994+
vtt_format_uri: str = proto.Field(
1995+
proto.STRING,
1996+
number=2,
1997+
)
1998+
srt_format_uri: str = proto.Field(
1999+
proto.STRING,
2000+
number=3,
2001+
)
19142002

19152003

19162004
class InlineResult(proto.Message):
@@ -1919,13 +2007,27 @@ class InlineResult(proto.Message):
19192007
Attributes:
19202008
transcript (google.cloud.speech_v2.types.BatchRecognizeResults):
19212009
The transcript for the audio file.
2010+
vtt_captions (str):
2011+
The transcript for the audio file as VTT formatted captions.
2012+
This is populated only when ``VTT`` output is requested.
2013+
srt_captions (str):
2014+
The transcript for the audio file as SRT formatted captions.
2015+
This is populated only when ``SRT`` output is requested.
19222016
"""
19232017

19242018
transcript: "BatchRecognizeResults" = proto.Field(
19252019
proto.MESSAGE,
19262020
number=1,
19272021
message="BatchRecognizeResults",
19282022
)
2023+
vtt_captions: str = proto.Field(
2024+
proto.STRING,
2025+
number=2,
2026+
)
2027+
srt_captions: str = proto.Field(
2028+
proto.STRING,
2029+
number=3,
2030+
)
19292031

19302032

19312033
class BatchRecognizeFileResult(proto.Message):
@@ -2308,9 +2410,9 @@ class Config(proto.Message):
23082410
23092411
Attributes:
23102412
name (str):
2311-
Output only. The name of the config resource. There is
2312-
exactly one config resource per project per location. The
2313-
expected format is
2413+
Output only. Identifier. The name of the config resource.
2414+
There is exactly one config resource per project per
2415+
location. The expected format is
23142416
``projects/{project}/locations/{location}/config``.
23152417
kms_key_name (str):
23162418
Optional. An optional `KMS key
@@ -2393,14 +2495,16 @@ class CustomClass(proto.Message):
23932495
23942496
Attributes:
23952497
name (str):
2396-
Output only. The resource name of the CustomClass. Format:
2498+
Output only. Identifier. The resource name of the
2499+
CustomClass. Format:
23972500
``projects/{project}/locations/{location}/customClasses/{custom_class}``.
23982501
uid (str):
23992502
Output only. System-assigned unique
24002503
identifier for the CustomClass.
24012504
display_name (str):
2402-
User-settable, human-readable name for the
2403-
CustomClass. Must be 63 characters or less.
2505+
Optional. User-settable, human-readable name
2506+
for the CustomClass. Must be 63 characters or
2507+
less.
24042508
items (MutableSequence[google.cloud.speech_v2.types.CustomClass.ClassItem]):
24052509
A collection of class items.
24062510
state (google.cloud.speech_v2.types.CustomClass.State):
@@ -2417,9 +2521,9 @@ class CustomClass(proto.Message):
24172521
Output only. The time at which this resource
24182522
will be purged.
24192523
annotations (MutableMapping[str, str]):
2420-
Allows users to store small amounts of
2421-
arbitrary data. Both the key and the value must
2422-
be 63 characters or less each. At most 100
2524+
Optional. Allows users to store small amounts
2525+
of arbitrary data. Both the key and the value
2526+
must be 63 characters or less each. At most 100
24232527
annotations.
24242528
etag (str):
24252529
Output only. This checksum is computed by the
@@ -2545,7 +2649,8 @@ class PhraseSet(proto.Message):
25452649
25462650
Attributes:
25472651
name (str):
2548-
Output only. The resource name of the PhraseSet. Format:
2652+
Output only. Identifier. The resource name of the PhraseSet.
2653+
Format:
25492654
``projects/{project}/locations/{location}/phraseSets/{phrase_set}``.
25502655
uid (str):
25512656
Output only. System-assigned unique

0 commit comments

Comments
 (0)