Skip to content

Commit 25c484c

Browse files
committed
Merge branch 'bugfix/fix_google_translate_device_example' into 'master'
bugfix(examples): Fix the google translate device example See merge request adf/esp-adf-internal!1425
2 parents 45dc171 + 9473bf1 commit 25c484c

File tree

4 files changed

+42
-41
lines changed

4 files changed

+42
-41
lines changed

examples/cloud_services/google_translate_device/main/google_sr.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ google_sr_handle_t google_sr_init(google_sr_config_t *config)
224224

225225
i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT();
226226
i2s_cfg.type = AUDIO_STREAM_READER;
227+
i2s_cfg.std_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
228+
i2s_stream_set_channel_type(&i2s_cfg, I2S_CHANNEL_TYPE_ONLY_RIGHT);
229+
i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = config->record_sample_rates;
227230
sr->i2s_reader = i2s_stream_init(&i2s_cfg);
228231

229232
http_stream_cfg_t http_cfg = {
@@ -238,7 +241,7 @@ google_sr_handle_t google_sr_init(google_sr_config_t *config)
238241
sr->on_begin = config->on_begin;
239242

240243
audio_pipeline_register(sr->pipeline, sr->http_stream_writer, "sr_http");
241-
audio_pipeline_register(sr->pipeline, sr->i2s_reader, "sr_i2s");
244+
audio_pipeline_register(sr->pipeline, sr->i2s_reader, "sr_i2s");
242245
const char *link_tag[2] = {"sr_i2s", "sr_http"};
243246
audio_pipeline_link(sr->pipeline, &link_tag[0], 2);
244247
i2s_stream_set_clk(sr->i2s_reader, config->record_sample_rates, 16, 1);

examples/cloud_services/google_translate_device/main/google_tts.c

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,32 +45,30 @@
4545

4646
static const char *TAG = "GOOGLE_TTS";
4747

48-
49-
#define GOOGLE_TTS_ENDPOINT "https://texttospeech.googleapis.com/v1beta1/text:synthesize?key=%s"
50-
#define GOOGLE_TTS_TEMPLATE "{"\
51-
"\"audioConfig\": { \"audioEncoding\" : \"MP3\", \"sampleRateHertz\": %d },"\
52-
"\"voice\": { \"languageCode\" : \"%s\" },"\
53-
"\"input\": { \"text\" : \"%s\" }"\
54-
"}"
55-
#define GOOGLE_TTS_TASK_STACK (8*1024)
48+
#define GOOGLE_TTS_ENDPOINT "https://texttospeech.googleapis.com/v1beta1/text:synthesize?key=%s"
49+
#define GOOGLE_TTS_TEMPLATE "{" \
50+
"\"audioConfig\": { \"audioEncoding\" : \"MP3\", \"sampleRateHertz\": %d }," \
51+
"\"voice\": { \"languageCode\" : \"%s\", \"name\" : \"%s\" }," \
52+
"\"input\": { \"text\" : \"%s\" }" \
53+
"}"
54+
#define GOOGLE_TTS_TASK_STACK (8 * 1024)
5655

5756
typedef struct google_tts {
5857
audio_pipeline_handle_t pipeline;
5958
audio_element_handle_t i2s_writer;
6059
audio_element_handle_t http_stream_reader;
6160
audio_element_handle_t mp3_decoder;
62-
char *api_key;
63-
char *lang_code;
61+
char *api_key;
62+
char *lang_code;
63+
char *voice_name;
6464
int buffer_size;
65-
char *buffer;
66-
char *text;
65+
char *buffer;
66+
char *text;
6767
bool is_begin;
6868
int tts_total_read;
6969
int sample_rate;
7070
int remain_len;
7171
} google_tts_t;
72-
73-
7472
static esp_err_t _http_stream_reader_event_handle(http_stream_event_msg_t *msg)
7573
{
7674
esp_http_client_handle_t http = (esp_http_client_handle_t)msg->http_client;
@@ -83,7 +81,7 @@ static esp_err_t _http_stream_reader_event_handle(http_stream_event_msg_t *msg)
8381
ESP_LOGI(TAG, "[ + ] HTTP client HTTP_STREAM_PRE_REQUEST, length=%d", msg->buffer_len);
8482
tts->tts_total_read = 0;
8583
tts->is_begin = true;
86-
int payload_len = snprintf(tts->buffer, tts->buffer_size, GOOGLE_TTS_TEMPLATE, tts->sample_rate, tts->lang_code, tts->text);
84+
int payload_len = snprintf(tts->buffer, tts->buffer_size, GOOGLE_TTS_TEMPLATE, tts->sample_rate, tts->lang_code, tts->voice_name, tts->text);
8785
esp_http_client_set_post_field(http, tts->buffer, payload_len);
8886
esp_http_client_set_method(http, HTTP_METHOD_POST);
8987
esp_http_client_set_header(http, "Content-Type", "application/json");
@@ -121,7 +119,6 @@ static esp_err_t _http_stream_reader_event_handle(http_stream_event_msg_t *msg)
121119
// *data_ptr = 0;
122120
data_ptr++;
123121
process_data_len--;
124-
125122
}
126123
unsigned int mp3_len = 0;
127124
int keep_next_time = 0;
@@ -141,11 +138,9 @@ static esp_err_t _http_stream_reader_event_handle(http_stream_event_msg_t *msg)
141138

142139
if (msg->event_id == HTTP_STREAM_POST_REQUEST) {
143140
ESP_LOGI(TAG, "[ + ] HTTP client HTTP_STREAM_POST_REQUEST, write end chunked marker");
144-
145141
}
146142

147143
if (msg->event_id == HTTP_STREAM_FINISH_REQUEST) {
148-
149144
}
150145

151146
return ESP_OK;
@@ -234,8 +229,7 @@ bool google_tts_check_event_finish(google_tts_handle_t tts, audio_event_iface_ms
234229
return false;
235230
}
236231

237-
238-
esp_err_t google_tts_start(google_tts_handle_t tts, const char *text, const char *lang_code)
232+
esp_err_t google_tts_start(google_tts_handle_t tts, const char *text, const char *lang_code, const char *voice_name)
239233
{
240234
free(tts->lang_code);
241235
free(tts->text);
@@ -244,6 +238,12 @@ esp_err_t google_tts_start(google_tts_handle_t tts, const char *text, const char
244238
ESP_LOGE(TAG, "Error no mem");
245239
return ESP_ERR_NO_MEM;
246240
}
241+
free(tts->voice_name);
242+
tts->voice_name = strdup(voice_name);
243+
if (tts->voice_name == NULL) {
244+
ESP_LOGE(TAG, "Error no mem");
245+
return ESP_ERR_NO_MEM;
246+
}
247247
tts->text = strdup(text);
248248
if (tts->text == NULL) {
249249
free(tts->lang_code);

examples/cloud_services/google_translate_device/main/google_tts.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,17 @@ typedef struct {
5353
google_tts_handle_t google_tts_init(google_tts_config_t *config);
5454

5555
/**
56-
* @brief Start sending text to Google Cloud Text-to-Speech and play audio received
56+
* @brief Start sending text to Google Cloud Text-to-Speech and play audio received
5757
*
5858
* @param[in] tts The Text-to-Speech context
5959
* @param[in] text The text
6060
* @param[in] lang_code The language code
6161
*
6262
* @return
63-
* - ESP_OK
64-
* - ESP_FAIL
63+
* - ESP_OK
64+
* - ESP_FAIL
6565
*/
66-
esp_err_t google_tts_start(google_tts_handle_t tts, const char *text, const char *lang_code);
66+
esp_err_t google_tts_start(google_tts_handle_t tts, const char *text, const char *lang_code, const char *voice_name);
6767

6868
/**
6969
* @brief Stop playing audio from Google Cloud Text-to-Speech

examples/cloud_services/google_translate_device/main/translate_device_example.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
#include "audio_common.h"
2121
#include "board.h"
2222
#include "esp_peripherals.h"
23+
#include "periph_adc_button.h"
2324
#include "periph_button.h"
2425
#include "periph_wifi.h"
2526
#include "periph_led.h"
2627
#include "google_tts.h"
2728
#include "google_sr.h"
2829
#include "google_translate.h"
29-
#include "board.h"
3030

3131
#include "audio_idf_version.h"
3232

@@ -38,10 +38,11 @@
3838

3939
static const char *TAG = "GOOGLE_TRANSLATION_EXAMPLE";
4040

41-
#define GOOGLE_SR_LANG "cmn-Hans-CN" // https://cloud.google.com/speech-to-text/docs/languages
42-
#define GOOGLE_TRANSLATE_LANG_FROM "zh-CN" //https://cloud.google.com/translate/docs/languages
43-
#define GOOGLE_TRANSLATE_LANG_TO "en" //https://cloud.google.com/translate/docs/languages
44-
#define GOOGLE_TTS_LANG "en-US-Wavenet-D" //https://cloud.google.com/text-to-speech/docs/voices
41+
#define GOOGLE_SR_LANG "cmn-Hans-CN" // https://cloud.google.com/speech-to-text/docs/languages
42+
#define GOOGLE_TRANSLATE_LANG_FROM "zh-CN" // https://cloud.google.com/translate/docs/languages
43+
#define GOOGLE_TRANSLATE_LANG_TO "en" // https://cloud.google.com/translate/docs/languages
44+
#define GOOGLE_TTS_LANG_CODE "en-US" // https://cloud.google.com/text-to-speech/docs/voices
45+
#define GOOGLE_TTS_VOICE_NAME "en-US-Wavenet-D" // https://cloud.google.com/text-to-speech/docs/voices
4546

4647
#define EXAMPLE_RECORD_PLAYBACK_SAMPLE_RATE (16000)
4748

@@ -81,10 +82,7 @@ void translate_task(void *pv)
8182
esp_periph_handle_t wifi_handle = periph_wifi_init(&wifi_cfg);
8283

8384
// Initialize Button peripheral
84-
periph_button_cfg_t btn_cfg = {
85-
.gpio_mask = (1ULL << get_input_mode_id()) | (1ULL << get_input_rec_id()),
86-
};
87-
esp_periph_handle_t button_handle = periph_button_init(&btn_cfg);
85+
audio_board_key_init(set);
8886

8987
periph_led_cfg_t led_cfg = {
9088
.led_speed_mode = LEDC_LOW_SPEED_MODE,
@@ -94,9 +92,7 @@ void translate_task(void *pv)
9492
};
9593
led_handle = periph_led_init(&led_cfg);
9694

97-
9895
// Start wifi & button peripheral
99-
esp_periph_start(set, button_handle);
10096
esp_periph_start(set, wifi_handle);
10197
esp_periph_start(set, led_handle);
10298

@@ -112,12 +108,14 @@ void translate_task(void *pv)
112108
.record_sample_rates = EXAMPLE_RECORD_PLAYBACK_SAMPLE_RATE,
113109
.encoding = ENCODING_LINEAR16,
114110
.on_begin = google_sr_begin,
111+
.buffer_size = 6144,
115112
};
116113
google_sr_handle_t sr = google_sr_init(&sr_config);
117114

118115
google_tts_config_t tts_config = {
119116
.api_key = CONFIG_GOOGLE_API_KEY,
120117
.playback_sample_rate = EXAMPLE_RECORD_PLAYBACK_SAMPLE_RATE,
118+
.buffer_size = 6144,
121119
};
122120
google_tts_handle_t tts = google_tts_init(&tts_config);
123121

@@ -149,7 +147,7 @@ void translate_task(void *pv)
149147
continue;
150148
}
151149

152-
if (msg.source_type != PERIPH_ID_BUTTON) {
150+
if (msg.source_type != PERIPH_ID_BUTTON && msg.source_type != PERIPH_ID_ADC_BTN) {
153151
continue;
154152
}
155153

@@ -162,11 +160,12 @@ void translate_task(void *pv)
162160
continue;
163161
}
164162

165-
if (msg.cmd == PERIPH_BUTTON_PRESSED) {
163+
if (msg.cmd == PERIPH_BUTTON_PRESSED || msg.cmd == PERIPH_ADC_BUTTON_PRESSED) {
166164
google_tts_stop(tts);
167165
ESP_LOGI(TAG, "[ * ] Resuming pipeline");
168166
google_sr_start(sr);
169-
} else if (msg.cmd == PERIPH_BUTTON_RELEASE || msg.cmd == PERIPH_BUTTON_LONG_RELEASE) {
167+
} else if (msg.cmd == PERIPH_BUTTON_RELEASE || msg.cmd == PERIPH_BUTTON_LONG_RELEASE \
168+
|| msg.cmd == PERIPH_ADC_BUTTON_RELEASE || msg.cmd == PERIPH_ADC_BUTTON_LONG_RELEASE) {
170169
ESP_LOGI(TAG, "[ * ] Stop pipeline");
171170

172171
periph_led_stop(led_handle, get_green_led_gpio());
@@ -181,9 +180,8 @@ void translate_task(void *pv)
181180
continue;
182181
}
183182
ESP_LOGI(TAG, "Translated text = %s", translated_text);
184-
google_tts_start(tts, translated_text, GOOGLE_TTS_LANG);
183+
google_tts_start(tts, translated_text, GOOGLE_TTS_LANG_CODE, GOOGLE_TTS_VOICE_NAME);
185184
}
186-
187185
}
188186
ESP_LOGI(TAG, "[ 6 ] Stop audio_pipeline");
189187
google_sr_destroy(sr);

0 commit comments

Comments
 (0)