Skip to content
This repository was archived by the owner on Nov 29, 2023. It is now read-only.

Commit f7f68ae

Browse files
feat: Support MULAW audio encoding feat: Support MP3_64_KBPS audio encoding feat: Support timepointing via SSML <mark> tag
PiperOrigin-RevId: 323424211 Source-Author: Google APIs <noreply@google.com> Source-Date: Mon Jul 27 13:05:41 2020 -0700 Source-Repo: googleapis/googleapis Source-Sha: a94df49e8f208649f2f5cb39a84668c6a3434ce8 Source-Link: googleapis/googleapis@a94df49
1 parent e72ca15 commit f7f68ae

File tree

5 files changed

+50
-3
lines changed

5 files changed

+50
-3
lines changed

google/cloud/texttospeech_v1beta1/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .types.cloud_tts import SynthesisInput
2525
from .types.cloud_tts import SynthesizeSpeechRequest
2626
from .types.cloud_tts import SynthesizeSpeechResponse
27+
from .types.cloud_tts import Timepoint
2728
from .types.cloud_tts import Voice
2829
from .types.cloud_tts import VoiceSelectionParams
2930

@@ -37,6 +38,7 @@
3738
"SynthesisInput",
3839
"SynthesizeSpeechRequest",
3940
"SynthesizeSpeechResponse",
41+
"Timepoint",
4042
"Voice",
4143
"VoiceSelectionParams",
4244
"TextToSpeechClient",

google/cloud/texttospeech_v1beta1/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
VoiceSelectionParams,
2525
AudioConfig,
2626
SynthesizeSpeechResponse,
27+
Timepoint,
2728
)
2829

2930

@@ -36,4 +37,5 @@
3637
"VoiceSelectionParams",
3738
"AudioConfig",
3839
"SynthesizeSpeechResponse",
40+
"Timepoint",
3941
)

google/cloud/texttospeech_v1beta1/types/cloud_tts.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"VoiceSelectionParams",
3232
"AudioConfig",
3333
"SynthesizeSpeechResponse",
34+
"Timepoint",
3435
},
3536
)
3637

@@ -52,7 +53,9 @@ class AudioEncoding(proto.Enum):
5253
AUDIO_ENCODING_UNSPECIFIED = 0
5354
LINEAR16 = 1
5455
MP3 = 2
56+
MP3_64_KBPS = 4
5557
OGG_OPUS = 3
58+
MULAW = 5
5659

5760

5861
class ListVoicesRequest(proto.Message):
@@ -128,14 +131,26 @@ class SynthesizeSpeechRequest(proto.Message):
128131
audio_config (~.cloud_tts.AudioConfig):
129132
Required. The configuration of the
130133
synthesized audio.
134+
enable_time_pointing (Sequence[~.cloud_tts.SynthesizeSpeechRequest.TimepointType]):
135+
Whether and what timepoints should be
136+
returned in the response.
131137
"""
132138

139+
class TimepointType(proto.Enum):
140+
r"""The type of timepoint information that is returned in the
141+
response.
142+
"""
143+
TIMEPOINT_TYPE_UNSPECIFIED = 0
144+
SSML_MARK = 1
145+
133146
input = proto.Field(proto.MESSAGE, number=1, message="SynthesisInput")
134147

135148
voice = proto.Field(proto.MESSAGE, number=2, message="VoiceSelectionParams")
136149

137150
audio_config = proto.Field(proto.MESSAGE, number=3, message="AudioConfig")
138151

152+
enable_time_pointing = proto.RepeatedField(proto.ENUM, number=4, enum=TimepointType)
153+
139154

140155
class SynthesisInput(proto.Message):
141156
r"""Contains text input to be synthesized. Either ``text`` or ``ssml``
@@ -270,9 +285,37 @@ class SynthesizeSpeechResponse(proto.Message):
270285
include the WAV header. Note: as with all bytes fields,
271286
protobuffers use a pure binary representation, whereas JSON
272287
representations use base64.
288+
timepoints (Sequence[~.cloud_tts.Timepoint]):
289+
A link between a position in the original request input and
290+
a corresponding time in the output audio. It's only
291+
supported via ``<mark>`` of SSML input.
292+
audio_config (~.cloud_tts.AudioConfig):
293+
The audio metadata of ``audio_content``.
273294
"""
274295

275296
audio_content = proto.Field(proto.BYTES, number=1)
276297

298+
timepoints = proto.RepeatedField(proto.MESSAGE, number=2, message="Timepoint")
299+
300+
audio_config = proto.Field(proto.MESSAGE, number=4, message=AudioConfig)
301+
302+
303+
class Timepoint(proto.Message):
304+
r"""This contains a mapping between a certain point in the input
305+
text and a corresponding time in the output audio.
306+
307+
Attributes:
308+
mark_name (str):
309+
Timepoint name as received from the client within ``<mark>``
310+
tag.
311+
time_seconds (float):
312+
Time offset in seconds from the start of the
313+
synthesized audio.
314+
"""
315+
316+
mark_name = proto.Field(proto.STRING, number=4)
317+
318+
time_seconds = proto.Field(proto.DOUBLE, number=3)
319+
277320

278321
__all__ = tuple(sorted(__protobuf__.manifest))

scripts/fixup_texttospeech_v1beta1_keywords.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class texttospeechCallTransformer(cst.CSTTransformer):
4141
CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
4242
METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
4343
'list_voices': ('language_code', ),
44-
'synthesize_speech': ('input', 'voice', 'audio_config', ),
44+
'synthesize_speech': ('input', 'voice', 'audio_config', 'enable_time_pointing', ),
4545

4646
}
4747

synth.metadata

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"git": {
1212
"name": "googleapis",
1313
"remote": "https://github.com/googleapis/googleapis.git",
14-
"sha": "55094be6405640329ddc93730962b9f7e68a0fc1",
15-
"internalRef": "314438331"
14+
"sha": "a94df49e8f208649f2f5cb39a84668c6a3434ce8",
15+
"internalRef": "323424211"
1616
}
1717
},
1818
{

0 commit comments

Comments
 (0)