Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def streaming_synthesize(
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> Awaitable[AsyncIterable[cloud_tts.StreamingSynthesizeResponse]]:
r"""Performs bidirectional streaming speech synthesis:
receive audio while sending text.
receives audio while sending text.

.. code-block:: python

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def streaming_synthesize(
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> Iterable[cloud_tts.StreamingSynthesizeResponse]:
r"""Performs bidirectional streaming speech synthesis:
receive audio while sending text.
receives audio while sending text.

.. code-block:: python

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def streaming_synthesize(
r"""Return a callable for the streaming synthesize method over gRPC.

Performs bidirectional streaming speech synthesis:
receive audio while sending text.
receives audio while sending text.

Returns:
Callable[[~.StreamingSynthesizeRequest],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def streaming_synthesize(
r"""Return a callable for the streaming synthesize method over gRPC.

Performs bidirectional streaming speech synthesis:
receive audio while sending text.
receives audio while sending text.

Returns:
Callable[[~.StreamingSynthesizeRequest],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ class AudioEncoding(proto.Enum):
MP3 audio at 32kbps.
OGG_OPUS (3):
Opus encoded audio wrapped in an ogg
container. The result will be a file which can
be played natively on Android, and in browsers
(at least Chrome and Firefox). The quality of
the encoding is considerably higher than MP3
while using approximately the same bitrate.
container. The result is a file which can be
played natively on Android, and in browsers (at
least Chrome and Firefox). The quality of the
encoding is considerably higher than MP3 while
using approximately the same bitrate.
MULAW (5):
8-bit samples that compand 14-bit audio
samples using G.711 PCMU/mu-law. Audio content
Expand All @@ -107,7 +107,7 @@ class AudioEncoding(proto.Enum):
PCM (7):
Uncompressed 16-bit signed little-endian
samples (Linear PCM). Note that as opposed to
LINEAR16, audio will not be wrapped in a WAV (or
LINEAR16, audio won't be wrapped in a WAV (or
any other) header.
"""
AUDIO_ENCODING_UNSPECIFIED = 0
Expand Down Expand Up @@ -202,7 +202,7 @@ class AdvancedVoiceOptions(proto.Message):
Attributes:
low_latency_journey_synthesis (bool):
Only for Journey voices. If false, the
synthesis will be context aware and have higher
synthesis is context aware and has a higher
latency.

This field is a member of `oneof`_ ``_low_latency_journey_synthesis``.
Expand Down Expand Up @@ -268,10 +268,10 @@ class CustomPronunciationParams(proto.Message):

Attributes:
phrase (str):
The phrase to which the customization will be
applied. The phrase can be multiple words (in
the case of proper nouns etc), but should not
span to a whole sentence.
The phrase to which the customization is
applied. The phrase can be multiple words, such
as proper nouns, but shouldn't span the length
of the sentence.

This field is a member of `oneof`_ ``_phrase``.
phonetic_encoding (google.cloud.texttospeech_v1.types.CustomPronunciationParams.PhoneticEncoding):
Expand All @@ -292,10 +292,10 @@ class PhoneticEncoding(proto.Enum):
PHONETIC_ENCODING_UNSPECIFIED (0):
Not specified.
PHONETIC_ENCODING_IPA (1):
IPA. (e.g. apple -> ˈæpəl )
IPA, such as apple -> ˈæpəl.
https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
PHONETIC_ENCODING_X_SAMPA (2):
X-SAMPA (e.g. apple -> "{p@l" )
X-SAMPA, such as apple -> "{p@l".
https://en.wikipedia.org/wiki/X-SAMPA
"""
PHONETIC_ENCODING_UNSPECIFIED = 0
Expand Down Expand Up @@ -325,8 +325,7 @@ class CustomPronunciations(proto.Message):

Attributes:
pronunciations (MutableSequence[google.cloud.texttospeech_v1.types.CustomPronunciationParams]):
The pronunciation customizations to be
applied.
The pronunciation customizations are applied.
"""

pronunciations: MutableSequence["CustomPronunciationParams"] = proto.RepeatedField(
Expand All @@ -345,7 +344,7 @@ class MultiSpeakerMarkup(proto.Message):
"""

class Turn(proto.Message):
r"""A Multi-speaker turn.
r"""A multi-speaker turn.

Attributes:
speaker (str):
Expand Down Expand Up @@ -405,21 +404,19 @@ class SynthesisInput(proto.Message):

This field is a member of `oneof`_ ``input_source``.
custom_pronunciations (google.cloud.texttospeech_v1.types.CustomPronunciations):
Optional. The pronunciation customizations to
be applied to the input. If this is set, the
input will be synthesized using the given
Optional. The pronunciation customizations
are applied to the input. If this is set, the
input is synthesized using the given
pronunciation customizations.

The initial support will be for EFIGS (English,
French, Italian, German, Spanish) languages, as
provided in VoiceSelectionParams. Journey and
Instant Clone voices are not supported yet.
The initial support is for en-us, with plans to
expand to other locales in the future. Instant
Clone voices aren't supported.

In order to customize the pronunciation of a
phrase, there must be an exact match of the
phrase in the input types. If using SSML, the
phrase must not be inside a phoneme tag
(entirely or partially).
phrase must not be inside a phoneme tag.
"""

text: str = proto.Field(
Expand Down Expand Up @@ -481,8 +478,9 @@ class VoiceSelectionParams(proto.Message):
the custom voice matching the specified configuration.
voice_clone (google.cloud.texttospeech_v1.types.VoiceCloneParams):
Optional. The configuration for a voice clone. If
[VoiceCloneParams.voice_clone_key] is set, the service will
choose the voice clone matching the specified configuration.
[VoiceCloneParams.voice_clone_key] is set, the service
chooses the voice clone matching the specified
configuration.
"""

language_code: str = proto.Field(
Expand Down Expand Up @@ -519,10 +517,10 @@ class AudioConfig(proto.Message):
stream.
speaking_rate (float):
Optional. Input only. Speaking rate/speed, in the range
[0.25, 4.0]. 1.0 is the normal native speed supported by the
[0.25, 2.0]. 1.0 is the normal native speed supported by the
specific voice. 2.0 is twice as fast, and 0.5 is half as
fast. If unset(0.0), defaults to the native 1.0 speed. Any
other values < 0.25 or > 4.0 will return an error.
other values < 0.25 or > 2.0 will return an error.
pitch (float):
Optional. Input only. Speaking pitch, in the range [-20.0,
20.0]. 20 means increase 20 semitones from the original
Expand Down Expand Up @@ -669,12 +667,18 @@ class StreamingAudioConfig(proto.Message):

Attributes:
audio_encoding (google.cloud.texttospeech_v1.types.AudioEncoding):
Required. The format of the audio byte stream. For now,
streaming only supports PCM and OGG_OPUS. All other
encodings will return an error.
Required. The format of the audio byte stream. Streaming
supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
return an error.
sample_rate_hertz (int):
Optional. The synthesis sample rate (in
hertz) for this audio.
speaking_rate (float):
Optional. Input only. Speaking rate/speed, in the range
[0.25, 2.0]. 1.0 is the normal native speed supported by the
specific voice. 2.0 is twice as fast, and 0.5 is half as
fast. If unset(0.0), defaults to the native 1.0 speed. Any
other values < 0.25 or > 2.0 will return an error.
"""

audio_encoding: "AudioEncoding" = proto.Field(
Expand All @@ -686,6 +690,10 @@ class StreamingAudioConfig(proto.Message):
proto.INT32,
number=2,
)
speaking_rate: float = proto.Field(
proto.DOUBLE,
number=3,
)


class StreamingSynthesizeConfig(proto.Message):
Expand All @@ -699,6 +707,20 @@ class StreamingSynthesizeConfig(proto.Message):
streaming_audio_config (google.cloud.texttospeech_v1.types.StreamingAudioConfig):
Optional. The configuration of the
synthesized audio.
custom_pronunciations (google.cloud.texttospeech_v1.types.CustomPronunciations):
Optional. The pronunciation customizations
are applied to the input. If this is set, the
input is synthesized using the given
pronunciation customizations.

The initial support is for en-us, with plans to
expand to other locales in the future. Instant
Clone voices aren't supported.

In order to customize the pronunciation of a
phrase, there must be an exact match of the
phrase in the input types. If using SSML, the
phrase must not be inside a phoneme tag.
"""

voice: "VoiceSelectionParams" = proto.Field(
Expand All @@ -711,6 +733,11 @@ class StreamingSynthesizeConfig(proto.Message):
number=4,
message="StreamingAudioConfig",
)
custom_pronunciations: "CustomPronunciations" = proto.Field(
proto.MESSAGE,
number=5,
message="CustomPronunciations",
)


class StreamingSynthesisInput(proto.Message):
Expand All @@ -722,10 +749,8 @@ class StreamingSynthesisInput(proto.Message):
text (str):
The raw text to be synthesized. It is
recommended that each input contains complete,
terminating sentences, as this will likely
result in better prosody in the output audio.
That being said, users are free to input text
however they please.
terminating sentences, which results in better
prosody in the output audio.

This field is a member of `oneof`_ ``input_source``.
"""
Expand Down