Skip to content

Commit b716b59

Browse files
committed
feat(google-genai): add Gemini Text-to-Speech (TTS) implementation
This commit adds comprehensive Text-to-Speech support for Google GenAI (Gemini) with the following components: ## API Client Layer - GeminiTtsApi: Low-level REST client for Gemini TTS API - Support for single-speaker and multi-speaker (conversational) TTS - PCM audio format (s16le, 24kHz, mono) - Request/response POJOs with Jackson annotations - Convenience factory methods for simpler API usage ## Model Layer - GeminiTtsModel: Spring AI TextToSpeechModel implementation - GeminiTtsOptions: Builder-based configuration with runtime overrides - Support for 30+ voices across 24+ languages - Prompt-based style control (accent, pace, delivery) ## Spring Boot Integration - Auto-configuration with properties binding - Dedicated starter: spring-ai-starter-model-google-genai-tts - Configuration prefix: spring.ai.google.genai.tts - Conditional bean creation based on spring.ai.model.audio.speech property Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
1 parent 95bf79a commit b716b59

File tree

23 files changed

+2429
-2
lines changed

23 files changed

+2429
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright 2023-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.model.google.genai.autoconfigure.tts;
18+
19+
import org.springframework.ai.google.genai.tts.GeminiTtsModel;
20+
import org.springframework.ai.google.genai.tts.api.GeminiTtsApi;
21+
import org.springframework.ai.model.SpringAIModelProperties;
22+
import org.springframework.ai.model.SpringAIModels;
23+
import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
24+
import org.springframework.boot.autoconfigure.AutoConfiguration;
25+
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
26+
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
27+
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
28+
import org.springframework.boot.context.properties.EnableConfigurationProperties;
29+
import org.springframework.context.annotation.Bean;
30+
import org.springframework.core.retry.RetryTemplate;
31+
import org.springframework.util.Assert;
32+
import org.springframework.util.StringUtils;
33+
34+
/**
35+
* Auto-configuration for Google GenAI Text-to-Speech.
36+
*
37+
* @author Alexandros Pappas
38+
* @since 1.1.0
39+
*/
40+
@AutoConfiguration(after = SpringAiRetryAutoConfiguration.class)
41+
@ConditionalOnClass({ GeminiTtsApi.class, GeminiTtsModel.class })
42+
@ConditionalOnProperty(name = SpringAIModelProperties.AUDIO_SPEECH_MODEL, havingValue = SpringAIModels.GOOGLE_GEN_AI,
43+
matchIfMissing = false)
44+
@EnableConfigurationProperties({ GoogleGenAiTtsProperties.class, GoogleGenAiTtsConnectionProperties.class })
45+
public class GoogleGenAiTtsAutoConfiguration {
46+
47+
@Bean
48+
@ConditionalOnMissingBean
49+
public GeminiTtsApi geminiTtsApi(GoogleGenAiTtsConnectionProperties connectionProperties) {
50+
Assert.hasText(connectionProperties.getApiKey(), "Google GenAI API key must be set!");
51+
52+
if (StringUtils.hasText(connectionProperties.getBaseUrl())) {
53+
return new GeminiTtsApi(connectionProperties.getApiKey(), connectionProperties.getBaseUrl());
54+
}
55+
56+
return new GeminiTtsApi(connectionProperties.getApiKey());
57+
}
58+
59+
@Bean
60+
@ConditionalOnMissingBean
61+
public GeminiTtsModel geminiTtsModel(GeminiTtsApi geminiTtsApi, GoogleGenAiTtsProperties ttsProperties,
62+
RetryTemplate retryTemplate) {
63+
64+
return GeminiTtsModel.builder()
65+
.geminiTtsApi(geminiTtsApi)
66+
.defaultOptions(ttsProperties.getOptions())
67+
.retryTemplate(retryTemplate)
68+
.build();
69+
}
70+
71+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright 2023-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.model.google.genai.autoconfigure.tts;
18+
19+
import org.springframework.boot.context.properties.ConfigurationProperties;
20+
21+
/**
22+
* Connection properties for Google GenAI TTS.
23+
*
24+
* @author Alexandros Pappas
25+
* @since 1.1.0
26+
*/
27+
@ConfigurationProperties(GoogleGenAiTtsConnectionProperties.CONFIG_PREFIX)
28+
public class GoogleGenAiTtsConnectionProperties {
29+
30+
public static final String CONFIG_PREFIX = "spring.ai.google.genai";
31+
32+
/**
33+
* Google GenAI API Key for TTS.
34+
*/
35+
private String apiKey;
36+
37+
/**
38+
* Base URL for the Google GenAI TTS API.
39+
*/
40+
private String baseUrl = "https://generativelanguage.googleapis.com";
41+
42+
public String getApiKey() {
43+
return this.apiKey;
44+
}
45+
46+
public void setApiKey(String apiKey) {
47+
this.apiKey = apiKey;
48+
}
49+
50+
public String getBaseUrl() {
51+
return this.baseUrl;
52+
}
53+
54+
public void setBaseUrl(String baseUrl) {
55+
this.baseUrl = baseUrl;
56+
}
57+
58+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright 2025-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.model.google.genai.autoconfigure.tts;
18+
19+
import org.springframework.ai.google.genai.tts.GeminiTtsOptions;
20+
import org.springframework.boot.context.properties.ConfigurationProperties;
21+
import org.springframework.boot.context.properties.NestedConfigurationProperty;
22+
23+
/**
24+
* Configuration properties for Google GenAI Text-to-Speech.
25+
*
26+
* @author Alexandros Pappas
27+
* @since 1.1.0
28+
*/
29+
@ConfigurationProperties(GoogleGenAiTtsProperties.CONFIG_PREFIX)
30+
public class GoogleGenAiTtsProperties {
31+
32+
public static final String CONFIG_PREFIX = "spring.ai.google.genai.tts";
33+
34+
public static final String DEFAULT_MODEL = "gemini-2.5-flash-preview-tts";
35+
36+
public static final String DEFAULT_VOICE = "Kore";
37+
38+
/**
39+
* Google GenAI TTS options.
40+
*/
41+
@NestedConfigurationProperty
42+
private final GeminiTtsOptions options = GeminiTtsOptions.builder()
43+
.model(DEFAULT_MODEL)
44+
.voice(DEFAULT_VOICE)
45+
.build();
46+
47+
public GeminiTtsOptions getOptions() {
48+
return this.options;
49+
}
50+
51+
}

auto-configurations/models/spring-ai-autoconfigure-model-google-genai/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@
1616
org.springframework.ai.model.google.genai.autoconfigure.chat.GoogleGenAiChatAutoConfiguration
1717
org.springframework.ai.model.google.genai.autoconfigure.embedding.GoogleGenAiEmbeddingConnectionAutoConfiguration
1818
org.springframework.ai.model.google.genai.autoconfigure.embedding.GoogleGenAiTextEmbeddingAutoConfiguration
19+
org.springframework.ai.model.google.genai.autoconfigure.tts.GoogleGenAiTtsAutoConfiguration
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright 2025-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.model.google.genai.autoconfigure.tts;
18+
19+
import org.junit.jupiter.api.Test;
20+
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
21+
22+
import org.springframework.ai.google.genai.tts.GeminiTtsModel;
23+
import org.springframework.ai.utils.SpringAiTestAutoConfigurations;
24+
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
25+
26+
import static org.assertj.core.api.Assertions.assertThat;
27+
28+
/**
29+
* Integration tests for the {@link GoogleGenAiTtsAutoConfiguration}.
30+
*
31+
* @author Alexandros Pappas
32+
*/
33+
@EnabledIfEnvironmentVariable(named = "GOOGLE_API_KEY", matches = ".*")
34+
public class GoogleGenAiTtsAutoConfigurationIT {
35+
36+
private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory
37+
.getLog(GoogleGenAiTtsAutoConfigurationIT.class);
38+
39+
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
40+
.withPropertyValues("spring.ai.google.genai.api-key=" + System.getenv("GOOGLE_API_KEY"),
41+
"spring.ai.model.audio.speech=google-genai")
42+
.withConfiguration(SpringAiTestAutoConfigurations.of(GoogleGenAiTtsAutoConfiguration.class));
43+
44+
@Test
45+
void ttsModelBeanCreation() {
46+
this.contextRunner.run(context -> {
47+
assertThat(context).hasSingleBean(GeminiTtsModel.class);
48+
GeminiTtsModel ttsModel = context.getBean(GeminiTtsModel.class);
49+
assertThat(ttsModel).isNotNull();
50+
});
51+
}
52+
53+
@Test
54+
void ttsModelApiCall() {
55+
this.contextRunner.run(context -> {
56+
GeminiTtsModel ttsModel = context.getBean(GeminiTtsModel.class);
57+
byte[] response = ttsModel.call("Hello");
58+
59+
assertThat(response).isNotNull();
60+
assertThat(response).isNotEmpty();
61+
// PCM audio should be substantial (24kHz * 2 bytes/sample * ~1 second)
62+
assertThat(response.length).isGreaterThan(1000);
63+
64+
logger.debug("PCM audio response size: " + response.length + " bytes");
65+
});
66+
}
67+
68+
@Test
69+
void ttsModelWithCustomVoice() {
70+
this.contextRunner
71+
.withPropertyValues("spring.ai.google.genai.tts.options.voice=Puck",
72+
"spring.ai.google.genai.tts.options.model=gemini-2.5-flash-preview-tts")
73+
.run(context -> {
74+
GeminiTtsModel ttsModel = context.getBean(GeminiTtsModel.class);
75+
byte[] response = ttsModel.call("Testing custom voice");
76+
77+
assertThat(response).isNotNull();
78+
assertThat(response).isNotEmpty();
79+
assertThat(response.length).isGreaterThan(1000);
80+
});
81+
}
82+
83+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright 2025-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.model.google.genai.autoconfigure.tts;
18+
19+
import org.junit.jupiter.api.Test;
20+
21+
import org.springframework.boot.context.properties.EnableConfigurationProperties;
22+
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
23+
import org.springframework.context.annotation.Configuration;
24+
25+
import static org.assertj.core.api.Assertions.assertThat;
26+
27+
/**
28+
* Unit tests for Google GenAI TTS properties binding.
29+
*
30+
* @author Alexandros Pappas
31+
*/
32+
public class GoogleGenAiTtsPropertiesTests {
33+
34+
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
35+
.withUserConfiguration(PropertiesTestConfiguration.class);
36+
37+
@Test
38+
void connectionPropertiesBinding() {
39+
this.contextRunner
40+
.withPropertyValues("spring.ai.google.genai.api-key=test-tts-key",
41+
"spring.ai.google.genai.base-url=https://test.api.google.com")
42+
.run(context -> {
43+
GoogleGenAiTtsConnectionProperties connectionProperties = context
44+
.getBean(GoogleGenAiTtsConnectionProperties.class);
45+
assertThat(connectionProperties.getApiKey()).isEqualTo("test-tts-key");
46+
assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://test.api.google.com");
47+
});
48+
}
49+
50+
@Test
51+
void ttsPropertiesBinding() {
52+
this.contextRunner
53+
.withPropertyValues("spring.ai.google.genai.tts.options.model=gemini-2.5-pro-preview-tts",
54+
"spring.ai.google.genai.tts.options.voice=Puck", "spring.ai.google.genai.tts.options.speed=1.2")
55+
.run(context -> {
56+
GoogleGenAiTtsProperties ttsProperties = context.getBean(GoogleGenAiTtsProperties.class);
57+
assertThat(ttsProperties.getOptions().getModel()).isEqualTo("gemini-2.5-pro-preview-tts");
58+
assertThat(ttsProperties.getOptions().getVoice()).isEqualTo("Puck");
59+
assertThat(ttsProperties.getOptions().getSpeed()).isEqualTo(1.2);
60+
});
61+
}
62+
63+
@Test
64+
void ttsDefaultValuesBinding() {
65+
// Test that defaults are applied when not specified
66+
this.contextRunner.run(context -> {
67+
GoogleGenAiTtsProperties ttsProperties = context.getBean(GoogleGenAiTtsProperties.class);
68+
assertThat(ttsProperties.getOptions().getModel()).isEqualTo("gemini-2.5-flash-preview-tts");
69+
assertThat(ttsProperties.getOptions().getVoice()).isEqualTo("Kore");
70+
assertThat(ttsProperties.getOptions().getFormat()).isEqualTo("pcm");
71+
});
72+
}
73+
74+
@Test
75+
void connectionDefaultBaseUrl() {
76+
this.contextRunner.withPropertyValues("spring.ai.google.genai.api-key=test-key").run(context -> {
77+
GoogleGenAiTtsConnectionProperties connectionProperties = context
78+
.getBean(GoogleGenAiTtsConnectionProperties.class);
79+
assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://generativelanguage.googleapis.com");
80+
});
81+
}
82+
83+
@Configuration
84+
@EnableConfigurationProperties({ GoogleGenAiTtsConnectionProperties.class, GoogleGenAiTtsProperties.class })
85+
static class PropertiesTestConfiguration {
86+
87+
}
88+
89+
}

0 commit comments

Comments
 (0)