Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,12 @@ public record AudioParameters(
public enum Voice {
/** Alloy voice */
@JsonProperty("alloy") ALLOY,
/** Ash voice */
@JsonProperty("ash") ASH,
/** Ballad voice */
@JsonProperty("ballad") BALLAD,
/** Coral voice */
@JsonProperty("coral") CORAL,
/** Echo voice */
@JsonProperty("echo") ECHO,
/** Fable voice */
Expand All @@ -1188,6 +1194,8 @@ public enum Voice {
@JsonProperty("onyx") ONYX,
/** Nova voice */
@JsonProperty("nova") NOVA,
/** Sage voice */
@JsonProperty("sage") SAGE,
/** Shimmer voice */
@JsonProperty("shimmer") SHIMMER
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,12 @@ public enum TtsModel {
* The latest text to speech model, optimized for quality.
*/
@JsonProperty("tts-1-hd")
TTS_1_HD("tts-1-hd");
TTS_1_HD("tts-1-hd"),
/**
* Text-to-speech model powered by GPT-4o mini
*/
@JsonProperty("gpt-4o-mini-tts")
GPT_4_O_MINI_TTS("gpt-4o-mini-tts");
// @formatter:on

public final String value;
Expand Down Expand Up @@ -330,14 +335,15 @@ public Class<?> getResponseType() {
* Speech</a>
*
* @param model The model to use for generating the audio. One of the available TTS
* models: tts-1 or tts-1-hd.
* models: tts-1, tts-1-hd, or gpt-4o-mini-tts.
* @param input The input text to synthesize. Must be at most 4096 tokens long.
* @param voice The voice to use for synthesis. One of the available voices for the
* chosen model: 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
* chosen model: 'alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova',
* 'sage', 'shimmer', and 'verse'.
* @param responseFormat The format to audio in. Supported formats are mp3, opus, aac,
* and flac. Defaults to mp3.
* flac, wav, and pcm. Defaults to mp3.
* @param speed The speed of the voice synthesis. The acceptable range is from 0.25
* (slowest) to 4.0 (fastest).
* (slowest) to 4.0 (fastest). Does not work with gpt-4o-mini-tts.
*/
@JsonInclude(Include.NON_NULL)
public record SpeechRequest(
Expand All @@ -361,6 +367,8 @@ public enum Voice {
// @formatter:off
@JsonProperty("alloy")
ALLOY("alloy"),
@JsonProperty("ballad")
BALLAD("ballad"),
@JsonProperty("echo")
ECHO("echo"),
@JsonProperty("fable")
Expand All @@ -376,7 +384,9 @@ public enum Voice {
@JsonProperty("coral")
CORAL("coral"),
@JsonProperty("ash")
ASH("ash");
ASH("ash"),
@JsonProperty("verse")
VERSE("verse");
// @formatter:on

public final String value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
}

@ParameterizedTest(name = "{0} : {displayName} ")
@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash" })
@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash", "verse",
"ballad" })
void speechVoicesTest(String voice) {
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
.voice(voice)
Expand Down