diff --git a/build/modules.conf.in b/build/modules.conf.in index 8453e290b7..0210972cc3 100644 --- a/build/modules.conf.in +++ b/build/modules.conf.in @@ -60,6 +60,9 @@ applications/mod_voicemail #asr_tts/mod_flite #asr_tts/mod_pocketsphinx #asr_tts/mod_tts_commandline +#asr_tts/mod_google_tts +#asr_tts/mod_openai_tts +#asr_tts/mod_piper_tts codecs/mod_amr #codecs/mod_amrwb codecs/mod_b64 diff --git a/configure.ac b/configure.ac index 6991398ae7..a7640174f4 100644 --- a/configure.ac +++ b/configure.ac @@ -2155,6 +2155,9 @@ AC_CONFIG_FILES([Makefile src/mod/asr_tts/mod_flite/Makefile src/mod/asr_tts/mod_pocketsphinx/Makefile src/mod/asr_tts/mod_tts_commandline/Makefile + src/mod/asr_tts/mod_google_tts/Makefile + src/mod/asr_tts/mod_openai_tts/Makefile + src/mod/asr_tts/mod_piper_tts/Makefile src/mod/codecs/mod_amr/Makefile src/mod/codecs/mod_amrwb/Makefile src/mod/codecs/mod_b64/Makefile diff --git a/src/mod/asr_tts/mod_google_tts/Makefile.am b/src/mod/asr_tts/mod_google_tts/Makefile.am new file mode 100644 index 0000000000..a0acc9a7dc --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_google_tts + +mod_LTLIBRARIES = mod_google_tts.la +mod_google_tts_la_SOURCES = mod_google_tts.c utils.c +mod_google_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_google_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_google_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_google_tts_la_OBJECTS): mod_google_tts.h diff --git a/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml b/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml new file mode 100644 index 0000000000..bc85f56ca0 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml new file mode 100644 index 0000000000..0d571e43a0 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.c b/src/mod/asr_tts/mod_google_tts/mod_google_tts.c new file mode 100644 index 0000000000..01793278da --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/mod_google_tts.c @@ -0,0 +1,509 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use Google TTS service in the Freeswitch + * https://cloud.google.com/text-to-speech/docs/reference/rest + * + * Development repository: + * https://github.com/akscf/mod_google_tts + * + */ +#include "mod_google_tts.h" + +static struct { + char *file_ext; + char *cache_path; + char *tmp_path; + char *opt_gender; + char *opt_encoding; + char *user_agent; + char *api_url; + char *api_key; + char *proxy; + char *proxy_credentials; + char *api_url_ep; + uint32_t file_size_max; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint8_t fl_voice_name_as_lang; + uint8_t fl_log_http_error; + uint8_t fl_cache_enabled; +} globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_google_tts, mod_google_tts_load, mod_google_tts_shutdown, NULL); + + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && tts_ctx->curl_recv_buffer) { + switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; + + memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); + tts_ctx->curl_send_buffer_ref += ncur; + tts_ctx->curl_send_buffer_len -= ncur; + + return ncur; +} + +static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + const char *xgender = (tts_ctx->gender ? tts_ctx->gender : globals.opt_gender); + const char *ygender = (!globals.fl_voice_name_as_lang && tts_ctx->voice_name) ? tts_ctx->voice_name : NULL; + char *pdata = NULL; + char *qtext = NULL; + + if(text) { + qtext = escape_squotes(text); + } + + pdata = switch_mprintf( + "{'input':{'text':'%s'},'voice':{'ssmlGender':'%s', 'languageCode':'%s'},'audioConfig':{'audioEncoding':'%s', 'sampleRateHertz':'%d'}}\n\n", + qtext ? qtext : "", + ygender ? ygender : xgender, + tts_ctx->lang_code, + globals.opt_encoding, + tts_ctx->samplerate + ); + +#ifdef GTTS_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url_ep, pdata); +#endif + + tts_ctx->curl_send_buffer_len = strlen(pdata); + tts_ctx->curl_send_buffer_ref = pdata; + + curl_handle = switch_curl_easy_init(); + + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + headers = switch_curl_slist_append(headers, "Expect:"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *)pdata); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *)tts_ctx); + + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)tts_ctx); + + if(globals.connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); + } + if(globals.request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); + } + if(globals.user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); + } + if(strncasecmp(globals.api_url_ep, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals.proxy) { + if(globals.proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); + } + if(strncasecmp(globals.proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); + } + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url_ep); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); + status = SWITCH_STATUS_FALSE; + } + + if(tts_ctx->curl_recv_buffer) { + if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { + switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); + } + } + + if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } + if(headers) { switch_curl_slist_free_all(headers); } + + switch_safe_free(pdata); + switch_safe_free(qtext); + return status; +} + +static switch_status_t extract_audio(tts_ctx_t *tts_ctx, char *buf_in, uint32_t buf_len) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_memory_pool_t *pool = tts_ctx->pool; + switch_file_t *fd = NULL; + char *buf_out = NULL, *ptr = NULL; + size_t len = buf_len, dec_len = 0; + uint32_t ofs1 = 0, ofs2 = 0; + + if((ptr = strnstr(buf_in, "\"audioContent\"", len)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + for(ofs1 = ((ptr - buf_in) + 14); ofs1 < len; ofs1++) { + if(buf_in[ofs1] == '"') { ofs1++; break; } + } + if(ofs1 >= len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + for(ofs2 = len; ofs2 > ofs1; ofs2--) { + if(buf_in[ofs2] == '"') { buf_in[ofs2]='\0'; ofs2--; break; } + } + if(ofs2 <= ofs1) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + ptr = (void *)(buf_in + ofs1); + len = (ofs2 - ofs1); + dec_len = BASE64_DEC_SZ(len); + + if(dec_len < 4 ) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + if((buf_out = switch_core_alloc(pool, dec_len)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc() failed\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + len = switch_b64_decode(ptr, buf_out, dec_len); + if(len != dec_len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "switch_b64_decode: (len != dec_len)\n"); + dec_len = len; + } + + status = switch_file_open(&fd, tts_ctx->dst_file, + (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_CREATE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_BINARY), + (SWITCH_FPROT_UREAD | SWITCH_FPROT_UWRITE), pool); + if(status != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to create output file (%s)\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + status = switch_file_write(fd, buf_out, &len); + if(status != SWITCH_STATUS_SUCCESS || len != dec_len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to write into file (%s)\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + +out: + if(fd) { + switch_file_close(fd); + } + return status; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// speech api +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->voice_name = switch_core_strdup(tts_ctx->pool, voice); + tts_ctx->lang_code = (globals.fl_voice_name_as_lang && voice) ? switch_core_strdup(sh->memory_pool, lang2bcp47(voice)) : "en-gb"; + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + tts_ctx->dst_file = NULL; + + sh->private_info = tts_ctx; + + if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); + goto out; + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sgoogle-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + globals.file_ext + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->curl_recv_buffer) { + switch_buffer_destroy(&tts_ctx->curl_recv_buffer); + } + + if(tts_ctx->dst_file && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_file); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + switch_status_t status = SWITCH_STATUS_SUCCESS; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + const void *ptr = NULL; + uint32_t recv_len = 0; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_file) { + switch_md5_string(digest, (void *) text, strlen(text)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + globals.file_ext + ); + } + + if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_buffer_zero(tts_ctx->curl_recv_buffer); + status = curl_perform(tts_ctx , text); + recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); + if(status == SWITCH_STATUS_SUCCESS) { + if((status = extract_audio(tts_ctx, (char *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to extract media\n"); + status = SWITCH_STATUS_FALSE; + } + } else { + if(globals.fl_log_http_error && recv_len > 0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); + } + } + } +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) tts_ctx->lang_code = switch_core_strdup(sh->memory_pool, lang2bcp47(val)); + } else if(strcasecmp(param, "gender") == 0) { + if(val) tts_ctx->gender = switch_core_strdup(sh->memory_pool, fmt_gender(val)); + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *)switch_xml_attr_soft(param, "name"); + char *val = (char *)switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "gender")) { + if(val) globals.opt_gender = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_name_as_lang = switch_true(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_error = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } else if(!strcasecmp(var, "file-size-max")) { + if(val) globals.file_size_max = atoi(val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } + } + } + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if(!globals.api_key) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.api_url_ep = switch_string_replace(globals.api_url, "${api-key}", globals.api_key); + globals.cache_path = (globals.cache_path == NULL ? "/tmp/google-tts-cache" : globals.cache_path); + globals.opt_gender = fmt_gender(globals.opt_gender == NULL ? "female" : globals.opt_gender); + globals.opt_encoding = fmt_encode(globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); + globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; + globals.file_ext = fmt_enct2fext(globals.opt_encoding); + + if(!globals.api_url_ep) { + globals.api_url_ep = strdup(globals.api_key); + } + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "google"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "GoogleTTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown) { + + switch_safe_free(globals.api_url_ep); + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.h b/src/mod/asr_tts/mod_google_tts/mod_google_tts.h new file mode 100644 index 0000000000..18090f9fee --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/mod_google_tts.h @@ -0,0 +1,58 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_GOOGLE_TTS_H +#define MOD_GOOGLE_TTS_H + +#include +#include + +#define MOD_VERSION "1.0_gcp_api_v1" +#define MOD_CONFIG_NAME "google_tts.conf" +#define FILE_SIZE_MAX (2*1024*1024) +#define BASE64_DEC_SZ(n) ((n*3)/4) +//#define GTTS_DEBUG + +typedef struct { + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + switch_buffer_t *curl_recv_buffer; + char *curl_send_buffer_ref; + char *lang_code; + char *gender; + char *voice_name; + char *dst_file; + uint32_t samplerate; + uint32_t channels; + size_t curl_send_buffer_len; +} tts_ctx_t; + + +/* utils.c */ +char *lang2bcp47(const char *lng); +char *fmt_enct2fext(const char *fmt); +char *fmt_gender(const char *gender); +char *fmt_encode(const char *fmt); + +char *strnstr(const char *s, const char *find, size_t slen); +char *escape_squotes(const char *string); + +#endif diff --git a/src/mod/asr_tts/mod_google_tts/utils.c b/src/mod/asr_tts/mod_google_tts/utils.c new file mode 100644 index 0000000000..b83043199c --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/utils.c @@ -0,0 +1,112 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_google_tts.h" + +char *lang2bcp47(const char *lng) { + if(strcasecmp(lng, "en") == 0) { return "en-gb"; } + if(strcasecmp(lng, "de") == 0) { return "de-de"; } + if(strcasecmp(lng, "es") == 0) { return "es-es"; } + if(strcasecmp(lng, "it") == 0) { return "it-it"; } + if(strcasecmp(lng, "ru") == 0) { return "ru-ru"; } + return (char *)lng; +} + +char *fmt_gender(const char *gender) { + if(strcasecmp(gender, "male") == 0) { return "MALE"; } + if(strcasecmp(gender, "female") == 0) { return "FEMALE"; } + return (char *)gender; +} + +char *fmt_encode(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "MP3"; } + if(strcasecmp(fmt, "wav") == 0) { return "LINEAR16"; } + if(strcasecmp(fmt, "ulaw") == 0) { return "MULAW"; } + if(strcasecmp(fmt, "alaw") == 0) { return "ALAW"; } + return (char *)fmt; +} + +char *fmt_enct2fext(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } + if(strcasecmp(fmt, "linear16") == 0) { return "wav"; } + if(strcasecmp(fmt, "mulaw") == 0) { return "ulaw"; } + if(strcasecmp(fmt, "alaw") == 0) { return "alaw"; } + return (char *)fmt; +} + +char *escape_squotes(const char *string) { + size_t string_len = strlen(string); + size_t i; + size_t n = 0; + size_t dest_len = 0; + char *dest; + + dest_len = strlen(string) + 1; + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\'': dest_len += 1; break; + } + } + + dest = (char *) malloc(sizeof(char) * dest_len); + switch_assert(dest); + + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\'': + dest[n++] = '\\'; + dest[n++] = '\''; + break; + default: + dest[n++] = string[i]; + } + } + dest[n++] = '\0'; + + switch_assert(n == dest_len); + return dest; +} + +/*- + * Copyright (c) 2001 Mike Barcroft + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + */ +char *strnstr(const char *s, const char *find, size_t slen) { + char c, sc; + size_t len; + + if ((c = *find++) != '\0') { + len = strlen(find); + do { + do { + if (slen-- < 1 || (sc = *s++) == '\0') + return (NULL); + } while (sc != c); + if (len > slen) + return (NULL); + } while (strncmp(s, find, len) != 0); + s--; + } + + return ((char *)s); +} + diff --git a/src/mod/asr_tts/mod_openai_tts/Makefile.am b/src/mod/asr_tts/mod_openai_tts/Makefile.am new file mode 100644 index 0000000000..c15cee1758 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_openai_tts + +mod_LTLIBRARIES = mod_openai_tts.la +mod_openai_tts_la_SOURCES = mod_openai_tts.c utils.c +mod_openai_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_openai_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_openai_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_openai_tts_la_OBJECTS): mod_openai_tts.h diff --git a/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml b/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml new file mode 100644 index 0000000000..5ebf5c5d58 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml new file mode 100644 index 0000000000..3829423d2c --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c new file mode 100644 index 0000000000..acedadfca1 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c @@ -0,0 +1,494 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use OpenAI TTS service in the Freeswitch + * https://platform.openai.com/docs/guides/text-to-speech + * + * Development repository: + * https://github.com/akscf/mod_openai_tts + * + */ +#include "mod_openai_tts.h" + +static struct { + switch_mutex_t *mutex; + switch_hash_t *models; + char *cache_path; + char *tmp_path; + char *opt_encoding; + char *user_agent; + char *api_url; + char *api_key; + char *proxy; + char *proxy_credentials; + uint32_t file_size_max; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint8_t fl_voice_name_as_language; + uint8_t fl_log_http_error; + uint8_t fl_cache_enabled; +} globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_openai_tts, mod_openai_tts_load, mod_openai_tts_shutdown, NULL); + +static tts_model_info_t *tts_model_lookup(const char *lang) { + tts_model_info_t *model = NULL; + + if(!lang) { return NULL; } + + switch_mutex_lock(globals.mutex); + model = switch_core_hash_find(globals.models, lang); + switch_mutex_unlock(globals.mutex); + + return model; +} + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && tts_ctx->curl_recv_buffer) { + switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; + + memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); + tts_ctx->curl_send_buffer_ref += ncur; + tts_ctx->curl_send_buffer_len -= ncur; + + return ncur; +} + +static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + const char *voice_local = (tts_ctx->alt_voice ? tts_ctx->alt_voice : tts_ctx->model_info->voice); + const char *model_local = (tts_ctx->alt_model ? tts_ctx->alt_model : tts_ctx->model_info->model); + char *pdata = NULL; + char *qtext = NULL; + + if(text) { + qtext = escape_dquotes(text); + } + pdata = switch_mprintf("{\"model\":\"%s\",\"voice\":\"%s\",\"input\":\"%s\"}\n", + model_local, + voice_local, + qtext ? qtext : "" + ); + +#ifdef OAITTS_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url, pdata); +#endif + + tts_ctx->curl_send_buffer_len = strlen(pdata); + tts_ctx->curl_send_buffer_ref = pdata; + + curl_handle = switch_curl_easy_init(); + + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + headers = switch_curl_slist_append(headers, "Expect:"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *) pdata); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *) tts_ctx); + + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) tts_ctx); + + if(globals.connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); + } + if(globals.request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); + } + if(globals.user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); + } + + if(strncasecmp(globals.api_url, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals.proxy) { + if(globals.proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); + } + if(strncasecmp(globals.proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); + } + + curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, globals.api_key); + curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); + status = SWITCH_STATUS_FALSE; + } + + if(tts_ctx->curl_recv_buffer) { + if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { + switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); + } + } + + if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } + if(headers) { switch_curl_slist_free_all(headers); } + + switch_safe_free(pdata); + switch_safe_free(qtext); + return status; +} + + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// speech api +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->language = (globals.fl_voice_name_as_language && voice) ? switch_core_strdup(sh->memory_pool, voice) : NULL; + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + tts_ctx->dst_file = NULL; + + sh->private_info = tts_ctx; + + if(tts_ctx->language) { + tts_ctx->model_info = tts_model_lookup(tts_ctx->language); + } + + if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic() fail\n"); + goto out; + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sopenai-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + enc2ext(globals.opt_encoding) + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->curl_recv_buffer) { + switch_buffer_destroy(&tts_ctx->curl_recv_buffer); + } + + if(tts_ctx->dst_file && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_file); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + switch_status_t status = SWITCH_STATUS_SUCCESS; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + const void *ptr = NULL; + uint32_t recv_len = 0; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_file) { + switch_md5_string(digest, (void *)text, strlen(text)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + enc2ext(globals.opt_encoding) + ); + } + + if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + status = SWITCH_STATUS_FALSE; + goto out; + } + } else { + if(tts_ctx->alt_voice == NULL && tts_ctx->model_info == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "voice not determined\n"); + status = SWITCH_STATUS_FALSE; goto out; + } + if(tts_ctx->alt_model == NULL && tts_ctx->model_info == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "model not determined\n"); + status = SWITCH_STATUS_FALSE; goto out; + } + + switch_buffer_zero(tts_ctx->curl_recv_buffer); + status = curl_perform(tts_ctx , text); + recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); + + if(status == SWITCH_STATUS_SUCCESS) { + if((status = write_file(tts_ctx->dst_file, (switch_byte_t *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + goto out; + } + } + } else { + if(globals.fl_log_http_error && recv_len > 0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); + } + } + } +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "voice") == 0) { + if(val) { tts_ctx->alt_voice = switch_core_strdup(sh->memory_pool, val); } + } else if(strcasecmp(param, "model") == 0) { + if(val) { tts_ctx->alt_model = switch_core_strdup(sh->memory_pool, val); } + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param, xmodels, xmodel; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + switch_core_hash_init(&globals.models); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_name_as_language = switch_true(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_error = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } else if(!strcasecmp(var, "file-size-max")) { + if(val) globals.file_size_max = atoi(val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } + } + } + + if((xmodels = switch_xml_child(cfg, "models"))) { + for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { + char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); + char *voice = (char *) switch_xml_attr_soft(xmodel, "voice"); + char *model = (char *) switch_xml_attr_soft(xmodel, "model"); + + tts_model_info_t *model_info = NULL; + + if(!lang || !voice || !model) { continue; } + + if(switch_core_hash_find(globals.models, lang)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); + continue; + } + + if((model_info = switch_core_alloc(pool, sizeof(tts_model_info_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + model_info->lang = switch_core_strdup(pool, lang); + model_info->voice = switch_core_strdup(pool, voice); + model_info->model = switch_core_strdup(pool, model); + + switch_core_hash_insert(globals.models, model_info->lang, model_info); + } + } + + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if(!globals.api_key) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.cache_path = (globals.cache_path == NULL ? "/tmp/openai-tts-cache" : globals.cache_path); + globals.opt_encoding = (globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); + globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "openai"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-TTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + if(status != SWITCH_STATUS_SUCCESS) { + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown) { + + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h new file mode 100644 index 0000000000..1f9d1c53be --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h @@ -0,0 +1,59 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_OPENAI_TTS_H +#define MOD_OPENAI_TTS_H + +#include +#include + +#define MOD_VERSION "1.0_apiv1" +#define MOD_CONFIG_NAME "openai_tts.conf" +#define FILE_SIZE_MAX (2*1024*1024) +// #define OAITTS_DEBUG + +typedef struct { + char *lang; + char *voice; + char *model; +} tts_model_info_t; + +typedef struct { + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + switch_buffer_t *curl_recv_buffer; + tts_model_info_t *model_info; + char *curl_send_buffer_ref; + char *language; + char *alt_voice; + char *alt_model; + char *dst_file; + uint32_t samplerate; + uint32_t channels; + size_t curl_send_buffer_len; +} tts_ctx_t; + +char *enc2ext(const char *fmt); +char *escape_dquotes(const char *string); + +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len); + +#endif diff --git a/src/mod/asr_tts/mod_openai_tts/utils.c b/src/mod/asr_tts/mod_openai_tts/utils.c new file mode 100644 index 0000000000..df16d332c9 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/utils.c @@ -0,0 +1,85 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_openai_tts.h" + +char *enc2ext(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } + return (char *)fmt; +} + +char *escape_dquotes(const char *string) { + size_t string_len = strlen(string); + size_t i; + size_t n = 0; + size_t dest_len = 0; + char *dest; + + dest_len = strlen(string) + 1; + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': dest_len += 1; break; + } + } + + dest = (char *) malloc(sizeof(char) * dest_len); + switch_assert(dest); + + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': + dest[n++] = '\\'; + dest[n++] = '\"'; + break; + default: + dest[n++] = string[i]; + } + } + dest[n++] = '\0'; + + switch_assert(n == dest_len); + return dest; +} + +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_memory_pool_t *pool = NULL; + switch_size_t len = buf_len; + switch_file_t *fd = NULL; + + if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_new_memory_pool() fail\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if((status = switch_file_open(&fd, file_name, (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE), SWITCH_FPROT_OS_DEFAULT, pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open fail: %s\n", file_name); + goto out; + } + if((status = switch_file_write(fd, buf, &len)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Write fail (%s)\n", file_name); + } + switch_file_close(fd); +out: + if(pool) { + switch_core_destroy_memory_pool(&pool); + } + return status; +} diff --git a/src/mod/asr_tts/mod_piper_tts/Makefile.am b/src/mod/asr_tts/mod_piper_tts/Makefile.am new file mode 100644 index 0000000000..013d7df268 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_piper_tts + +mod_LTLIBRARIES = mod_piper_tts.la +mod_piper_tts_la_SOURCES = mod_piper_tts.c +mod_piper_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_piper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_piper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h diff --git a/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml b/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml new file mode 100644 index 0000000000..76d52c24f8 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml new file mode 100644 index 0000000000..e4e09e41f6 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c new file mode 100644 index 0000000000..0fd1056084 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c @@ -0,0 +1,339 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use PIPER TTS in the Freeswitch + * https://github.com/rhasspy/piper + * + * + * Development repository: + * https://github.com/akscf/mod_piper_tts + * + */ +#include "mod_piper_tts.h" + +static piper_globals_t globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL); + + +static piper_model_info_t *piper_lookup_model(const char *lang) { + piper_model_info_t *model = NULL; + + if(!lang) { + return NULL; + } + + switch_mutex_lock(globals.mutex); + model = switch_core_hash_find(globals.models, lang); + switch_mutex_unlock(globals.mutex); + + return model; +} + +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + switch_status_t status = SWITCH_STATUS_SUCCESS; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice); + tts_ctx->language = (globals.fl_voice_as_language && voice ? switch_core_strdup(sh->memory_pool, voice) : "en"); + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + + sh->private_info = tts_ctx; + + if(tts_ctx->language) { + tts_ctx->model_info = piper_lookup_model(tts_ctx->language); + if(!tts_ctx->model_info) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Language '%s' not registered!\n", tts_ctx->language); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%spiper-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + PIPER_FILE_ENCODING + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->dst_fname && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_fname); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + switch_status_t status = SWITCH_STATUS_SUCCESS; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_fname) { + switch_md5_string(digest, (void *)text, strlen(text)); + tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + PIPER_FILE_ENCODING + ); + } + + if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + char *cmd = NULL; + char *textq = NULL; + + if(!tts_ctx->model_info) { + if(tts_ctx->language) { + tts_ctx->model_info = piper_lookup_model(tts_ctx->language); + } + if(!tts_ctx->model_info) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to lookup the model for lang: %s\n", tts_ctx->language); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + + textq = switch_util_quote_shell_arg(text); + cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", + textq, globals.piper_bin, + globals.piper_opts ? globals.piper_opts : "", + tts_ctx->model_info->model, + tts_ctx->dst_fname + ); + +#ifdef PIPER_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER-CMD: [%s]\n", cmd); +#endif + + if(switch_system(cmd, SWITCH_TRUE)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform cmd: %s\n", cmd); + status = SWITCH_STATUS_FALSE; + } + + switch_safe_free(textq); + switch_safe_free(cmd); + + if(status == SWITCH_STATUS_SUCCESS) { + if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + } + +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) { tts_ctx->language = switch_core_strdup(sh->memory_pool, val); } + } else if(strcasecmp(param, "voice") == 0) { + if(val) { tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); } + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param, xmodels, xmodel; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + switch_core_hash_init(&globals.models); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for(param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "piper-bin")) { + if(val) globals.piper_bin = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "piper-opts")) { + if(val) globals.piper_opts = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_as_language = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } + } + } + + if((xmodels = switch_xml_child(cfg, "models"))) { + for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { + char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); + char *model = (char *) switch_xml_attr_soft(xmodel, "model"); + piper_model_info_t *model_info = NULL; + + if(!lang || !model) { continue; } + + if(switch_core_hash_find(globals.models, lang)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); + continue; + } + + if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + model_info->lang = switch_core_strdup(pool, lang); + model_info->model = switch_core_strdup(pool, model); + + switch_core_hash_insert(globals.models, model_info->lang, model_info); + } + } + + if(!globals.piper_bin) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not determined!\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path); + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "piper"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + if(status != SWITCH_STATUS_SUCCESS) { + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) { + + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h new file mode 100644 index 0000000000..ea6c1b9de1 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h @@ -0,0 +1,60 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_PIPER_TTS_H +#define MOD_PIPER_TTS_H + +#include + +#define MOD_VERSION "1.0" +#define MOD_CONFIG_NAME "piper_tts.conf" +#define PIPER_FILE_ENCODING "wav" +// #define PIPER_DEBUG + +typedef struct { + switch_mutex_t *mutex; + switch_hash_t *models; + const char *tmp_path; + const char *cache_path; + const char *piper_bin; + const char *piper_opts; + uint8_t fl_cache_enabled; + uint8_t fl_voice_as_language; +} piper_globals_t; + +typedef struct { + char *lang; + char *model; +} piper_model_info_t; + +typedef struct { + piper_model_info_t *model_info; + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + char *language; + char *voice; + char *dst_fname; + uint32_t samplerate; + uint32_t channels; +} tts_ctx_t; + + +#endif