spring-projects · sobychacko · Jun 5, 2025 · Jun 2, 2025 · Jun 5, 2025
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
@@ -1180,6 +1180,12 @@ public record AudioParameters(
 public enum Voice {
 /** Alloy voice */
 @JsonProperty("alloy") ALLOY,
+/** Ash voice */
+@JsonProperty("ash") ASH,
+/** Ballad voice */
+@JsonProperty("ballad") BALLAD,
+/** Coral voice */
+@JsonProperty("coral") CORAL,
 /** Echo voice */
 @JsonProperty("echo") ECHO,
 /** Fable voice */
@@ -1188,6 +1194,8 @@ public enum Voice {
 @JsonProperty("onyx") ONYX,
 /** Nova voice */
 @JsonProperty("nova") NOVA,
+/** Sage voice */
+@JsonProperty("sage") SAGE,
 /** Shimmer voice */
 @JsonProperty("shimmer") SHIMMER
 }

diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java
@@ -240,7 +240,12 @@ public enum TtsModel {
  * The latest text to speech model, optimized for quality.
  */
 @JsonProperty("tts-1-hd")
-TTS_1_HD("tts-1-hd");
+TTS_1_HD("tts-1-hd"),
+/**
+ * Text-to-speech model powered by GPT-4o mini
+ */
+@JsonProperty("gpt-4o-mini-tts")
+GPT_4_O_MINI_TTS("gpt-4o-mini-tts");
 // @formatter:on
 
 public final String value;
@@ -330,14 +335,15 @@ public Class<?> getResponseType() {
  * Speech</a>
  *
  * @param model The model to use for generating the audio. One of the available TTS
- * models: tts-1 or tts-1-hd.
+ * models: tts-1, tts-1-hd, or gpt-4o-mini-tts.
  * @param input The input text to synthesize. Must be at most 4096 tokens long.
  * @param voice The voice to use for synthesis. One of the available voices for the
- * chosen model: 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
+ * chosen model: 'alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova',
+ * 'sage', 'shimmer', and 'verse'.
  * @param responseFormat The format to audio in. Supported formats are mp3, opus, aac,
- * and flac. Defaults to mp3.
+ * flac, wav, and pcm. Defaults to mp3.
  * @param speed The speed of the voice synthesis. The acceptable range is from 0.25
- * (slowest) to 4.0 (fastest).
+ * (slowest) to 4.0 (fastest). Does not work with gpt-4o-mini-tts.
  */
 @JsonInclude(Include.NON_NULL)
 public record SpeechRequest(
@@ -361,6 +367,8 @@ public enum Voice {
 // @formatter:off
 @JsonProperty("alloy")
 ALLOY("alloy"),
+@JsonProperty("ballad")
+BALLAD("ballad"),
 @JsonProperty("echo")
 ECHO("echo"),
 @JsonProperty("fable")
@@ -376,7 +384,9 @@ public enum Voice {
 @JsonProperty("coral")
 CORAL("coral"),
 @JsonProperty("ash")
-ASH("ash");
+ASH("ash"),
+@JsonProperty("verse")
+VERSE("verse");
 // @formatter:on
 
 public final String value;

diff --git a/...-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java b/...-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java
@@ -136,7 +136,8 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
 }
 
 @ParameterizedTest(name = "{0} : {displayName} ")
-@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash" })
+@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash", "verse",
+"ballad" })
 void speechVoicesTest(String voice) {
 OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
 .voice(voice)