diff --git a/src/include/switch_core.h b/src/include/switch_core.h index 10cfde05fb..86281717e9 100644 --- a/src/include/switch_core.h +++ b/src/include/switch_core.h @@ -760,11 +760,18 @@ SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh); \brief Open a speech handle \param sh a speech handle to use \param module_name the speech module to use + \param voice_name the desired voice name + \param rate the sampling rate \param flags asr/tts flags \param pool the pool to use (NULL for new pool) \return SWITCH_STATUS_SUCCESS if the handle is opened */ -SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, char *module_name, unsigned int flags, switch_memory_pool *pool); +SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, + char *module_name, + char *voice_name, + int rate, + unsigned int flags, + switch_memory_pool *pool); /*! \brief Feed data to the ASR module diff --git a/src/include/switch_ivr.h b/src/include/switch_ivr.h index 4465be8cf8..559a6c391d 100644 --- a/src/include/switch_ivr.h +++ b/src/include/switch_ivr.h @@ -119,6 +119,16 @@ SWITCH_DECLARE(switch_status) switch_ivr_record_file(switch_core_session *sessio unsigned int buflen); +SWITCH_DECLARE(switch_status) switch_ivr_speak_text(switch_core_session *session, + char *tts_name, + char *voice_name, + char *timer_name, + int rate, + switch_dtmf_callback_function dtmf_callback, + char *text, + void *buf, + unsigned int buflen); + /** @} */ #ifdef __cplusplus diff --git a/src/include/switch_module_interfaces.h b/src/include/switch_module_interfaces.h index 76a6a84db8..373ac2bfcd 100644 --- a/src/include/switch_module_interfaces.h +++ b/src/include/switch_module_interfaces.h @@ -286,6 +286,8 @@ struct switch_speech_interface { const char *interface_name; /*! function to open the speech interface */ switch_status (*speech_open)(switch_speech_handle *sh, + char *voice_name, + int rate, unsigned int flags); /*! function to close the speech interface */ switch_status (*speech_close)(switch_speech_handle *, unsigned int *flags); diff --git a/src/include/switch_types.h b/src/include/switch_types.h index 8808b4b822..524bb0baf6 100644 --- a/src/include/switch_types.h +++ b/src/include/switch_types.h @@ -252,6 +252,7 @@ SWITCH_SPEECH_FLAG_ASR = (1 << 1) - Interface can/should convert audio to tex SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2) - Interface is has text to read. SWITCH_SPEECH_FLAG_PEEK = (1 << 3) - Read data but do not erase it. SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4) - Free interface's pool on destruction. +SWITCH_SPEECH_FLAG_BLOCKING = (1 << 5) - Indicate that a blocking call is desired */ typedef enum { @@ -260,6 +261,7 @@ typedef enum { SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2), SWITCH_SPEECH_FLAG_PEEK = (1 << 3), SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4), + SWITCH_SPEECH_FLAG_BLOCKING = (1 << 5), } switch_speech_flag; diff --git a/src/mod/applications/mod_ivrtest/mod_ivrtest.c b/src/mod/applications/mod_ivrtest/mod_ivrtest.c index e9dbd5c532..e44cc167d0 100644 --- a/src/mod/applications/mod_ivrtest/mod_ivrtest.c +++ b/src/mod/applications/mod_ivrtest/mod_ivrtest.c @@ -81,19 +81,66 @@ static void dirtest_function(switch_core_session *session, char *data) } +static switch_status show_dtmf(switch_core_session *session, char *dtmf, void *buf, unsigned int buflen) +{ + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Digits %s\n", dtmf); + + switch_copy_string((char *)buf, dtmf, buflen); + return SWITCH_STATUS_SUCCESS; + +} + +static void tts_function(switch_core_session *session, char *data) +{ + switch_channel *channel; + switch_codec *codec; + char *mydata, *text = NULL, *voice_name = NULL, *tts_name = NULL; + char buf[10] = ""; + char *argv[3]; + int argc; + + if(!(mydata = switch_core_session_strdup(session, (char *) data))) { + return; + } + + if ((argc = switch_separate_string(mydata, ':', argv, sizeof(argv) / sizeof(argv[0]))) > 1) { + tts_name = argv[0]; + voice_name = argv[1]; + text = argv[2]; + } + + if (voice_name && !text) { + text = argv[1]; + voice_name = NULL; + } + + channel = switch_core_session_get_channel(session); + assert(channel != NULL); + + switch_channel_answer(channel); + + codec = switch_core_session_get_read_codec(session); + + switch_ivr_speak_text(session, tts_name, voice_name, NULL, codec->implementation->samples_per_second, show_dtmf, text, buf, sizeof(buf)); + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Done\n"); +} + static void ivrtest_function(switch_core_session *session, char *data) { switch_channel *channel; switch_status status = SWITCH_STATUS_SUCCESS; + switch_codec *codec; char buf[10] = ""; char term; - + char say[128] = ""; + channel = switch_core_session_get_channel(session); assert(channel != NULL); switch_channel_answer(channel); - - + + codec = switch_core_session_get_read_codec(session); + while (switch_channel_get_state(channel) == CS_EXECUTE) { memset(buf, 0, sizeof(buf)); switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Enter up to 10 digits, press # to terminate, * to hangup\n"); @@ -118,8 +165,9 @@ static void ivrtest_function(switch_core_session *session, char *data) if (term && term == '*') { break; } - - switch_console_printf(SWITCH_CHANNEL_CONSOLE, "You Dialed [%s]\n", buf); + snprintf(say, sizeof(say), "You Dialed [%s]\n", buf); + switch_console_printf(SWITCH_CHANNEL_CONSOLE, say); + switch_ivr_speak_text(session, "cepstral", "david", NULL, codec->implementation->samples_per_second, NULL, say, NULL, 0); } } @@ -146,9 +194,18 @@ static const switch_state_handler_table state_handlers = { /*.on_transmit */ NULL }; +static const switch_application_interface tts_application_interface = { + /*.interface_name */ "tts", + /*.application_function */ tts_function, + NULL, NULL, NULL, + /*.next*/ NULL +}; + static const switch_application_interface dirtest_application_interface = { /*.interface_name */ "dirtest", - /*.application_function */ dirtest_function + /*.application_function */ dirtest_function, + NULL, NULL, NULL, + /*.next*/ &tts_application_interface }; static const switch_application_interface ivrtest_application_interface = { diff --git a/src/mod/applications/mod_skel/mod_skel.c b/src/mod/applications/mod_skel/mod_skel.c index 2867a9135f..0bd9aa6b03 100644 --- a/src/mod/applications/mod_skel/mod_skel.c +++ b/src/mod/applications/mod_skel/mod_skel.c @@ -39,7 +39,11 @@ static switch_loadable_module_interface skel_module_interface = { /*.timer_interface */ NULL, /*.dialplan_interface */ NULL, /*.codec_interface */ NULL, - /*.application_interface */ NULL + /*.application_interface */ NULL, + /*.api_interface */ NULL, + /*.file_interface */ NULL, + /*.speech_interface */ NULL, + /*.directory_interface */ NULL }; switch_status switch_module_load(const switch_loadable_module_interface **interface, char *filename) diff --git a/src/mod/asr_tts/mod_cepstral/mod_cepstral.c b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c new file mode 100644 index 0000000000..54ff6a4228 --- /dev/null +++ b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c @@ -0,0 +1,283 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005/2006, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * + * The Initial Developer of the Original Code is + * Anthony Minessale II + * Portions created by the Initial Developer are Copyright (C) + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Anthony Minessale II + * + * + * mod_cepstral.c -- Cepstral Interface + * + */ +#include +#include + +static const char modname[] = "mod_cepstral"; + +static swift_engine *engine; + + +typedef struct { + swift_background_t tts_stream; + swift_port *port; + swift_params *params; + swift_voice *voice; + switch_mutex_t *audio_lock; + switch_buffer *audio_buffer; + int done; + int done_gen; +} cepstral_t; + + +/* This callback caches the audio in the buffer */ +static swift_result_t write_audio(swift_event *event, swift_event_t type, void *udata) +{ + cepstral_t *cepstral; + swift_event_t rv = SWIFT_SUCCESS; + void *buf = NULL; + int len = 0; + int wrote; + + cepstral = udata; + assert(cepstral != NULL); + + /* Only proceed when we have success */ + if (!SWIFT_FAILED((rv = swift_event_get_audio(event, &buf, &len)))) { + switch_mutex_lock(cepstral->audio_lock); + if ((wrote=switch_buffer_write(cepstral->audio_buffer, buf, len)) <= 0) { + rv = SWIFT_UNKNOWN_ERROR; + } + switch_mutex_unlock(cepstral->audio_lock); + } else { + cepstral->done = 1; + } + + return rv; +} + +static switch_status cepstral_speech_open(switch_speech_handle *sh, char *voice_name, int rate, unsigned int flags) +{ + if (flags & SWITCH_SPEECH_FLAG_ASR) { + return SWITCH_STATUS_FALSE; + } + if (flags & SWITCH_SPEECH_FLAG_TTS) { + cepstral_t *cepstral = switch_core_alloc(sh->memory_pool, sizeof(*cepstral)); + char srate[25]; + + if (!cepstral) { + return SWITCH_STATUS_MEMERR; + } + + if (switch_buffer_create(sh->memory_pool, &cepstral->audio_buffer, SWITCH_RECCOMMENDED_BUFFER_SIZE) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Write Buffer Failed!\n"); + return SWITCH_STATUS_MEMERR; + } + + + switch_mutex_init(&cepstral->audio_lock, SWITCH_MUTEX_NESTED, sh->memory_pool); + + + cepstral->params = swift_params_new(NULL); + swift_params_set_string(cepstral->params, "audio/encoding", "pcm16"); + snprintf(srate, sizeof(srate), "%d", rate); + swift_params_set_string(cepstral->params, "audio/sampling-rate", srate); + + + /* Open a Swift Port through which to make TTS calls */ + if (SWIFT_FAILED(cepstral->port = swift_port_open(engine, cepstral->params))) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to open Swift Port."); + goto all_done; + } + + + if (voice_name && SWIFT_FAILED(swift_port_set_voice_by_name(cepstral->port, voice_name))) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Invalid voice %s!\n", voice_name); + voice_name = NULL; + } + + if (!voice_name) { + /* Find the first voice on the system */ + if ((cepstral->voice = swift_port_find_first_voice(cepstral->port, NULL, NULL)) == NULL) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to find any voices!\n"); + goto all_done; + } + + /* Set the voice found by find_first_voice() as the port's current voice */ + if ( SWIFT_FAILED(swift_port_set_voice(cepstral->port, cepstral->voice)) ) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to set voice.\n"); + goto all_done; + } + } + + swift_port_set_callback(cepstral->port, &write_audio, SWIFT_EVENT_AUDIO, cepstral); + + sh->private_info = cepstral; + return SWITCH_STATUS_SUCCESS; + } + + all_done: + return SWITCH_STATUS_FALSE; +} + +static switch_status cepstral_speech_close(switch_speech_handle *sh, unsigned int *flags) +{ + cepstral_t *cepstral; + + assert(sh != NULL); + cepstral = sh->private_info; + assert(cepstral != NULL); + + /* Close the Swift Port and Engine */ + if (NULL != cepstral->port) swift_port_close(cepstral->port); + //if (NULL != cepstral->engine) swift_engine_close(cepstral->engine); + + cepstral->port = NULL; + //cepstral->engine = NULL; + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status cepstral_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags) +{ + cepstral_t *cepstral; + + assert(sh != NULL); + cepstral = sh->private_info; + assert(cepstral != NULL); + + + swift_port_speak_text(cepstral->port, text, 0, NULL, &cepstral->tts_stream, NULL); + //swift_port_speak_text(cepstral->port, text, 0, NULL, NULL, NULL); + + + return SWITCH_STATUS_FALSE; +} + +static switch_status cepstral_speech_read_tts(switch_speech_handle *sh, + void *data, + unsigned int *datalen, + unsigned int *rate, + unsigned int *flags) +{ + cepstral_t *cepstral; + int desired = *datalen; + switch_status status = SWITCH_STATUS_FALSE; + int used, padding = 0; + + assert(sh != NULL); + cepstral = sh->private_info; + assert(cepstral != NULL); + + while(!cepstral->done) { + if (!cepstral->done_gen) { + int check = (SWIFT_STATUS_RUNNING == swift_port_status(cepstral->port, cepstral->tts_stream)); + if (!check) { + cepstral->done_gen = 1; + } + } + + used = switch_buffer_inuse(cepstral->audio_buffer); + + + + if (!used && cepstral->done_gen) { + break; + } + + + /* wait for the right amount of data (unless there is no blocking flag) */ + if (used < desired) { + if (cepstral->done_gen) { + padding = desired - used; + desired = used; + } + if (!(*flags & SWITCH_SPEECH_FLAG_BLOCKING)) { + *datalen = 0; + status = SWITCH_STATUS_SUCCESS; + break; + } + switch_yield(1000); + continue; + } + + /* There is enough, read it and return */ + switch_mutex_lock(cepstral->audio_lock); + *datalen = switch_buffer_read(cepstral->audio_buffer, data, desired); + if (padding) { + int x = 0; + unsigned char *p = data; + + for(x = 0; x < padding; x++) { + *(p + x) = 0; + (*datalen)++; + } + } + + switch_mutex_unlock(cepstral->audio_lock); + status = SWITCH_STATUS_SUCCESS; + + break; + } + + return status; +} + +const switch_speech_interface cepstral_speech_interface = { + /*.interface_name*/ "cepstral", + /*.speech_open*/ cepstral_speech_open, + /*.speech_close*/ cepstral_speech_close, + /*.speech_feed_asr*/ NULL, + /*.speech_interpret_asr*/ NULL, + /*.speech_feed_tts*/ cepstral_speech_feed_tts, + /*.speech_read_tts*/ cepstral_speech_read_tts + +}; + +static switch_loadable_module_interface cepstral_module_interface = { + /*.module_name */ modname, + /*.endpoint_interface */ NULL, + /*.timer_interface */ NULL, + /*.dialplan_interface */ NULL, + /*.codec_interface */ NULL, + /*.application_interface */ NULL, + /*.api_interface */ NULL, + /*.file_interface */ NULL, + /*.speech_interface */ &cepstral_speech_interface, + /*.directory_interface */ NULL +}; + +switch_status switch_module_load(const switch_loadable_module_interface **interface, char *filename) +{ + + /* Open the Swift TTS Engine */ + if ( SWIFT_FAILED(engine = swift_engine_open(NULL)) ) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to open Swift Engine."); + return SWITCH_STATUS_GENERR; + } + + /* connect my internal structure to the blank pointer passed to me */ + *interface = &cepstral_module_interface; + + /* indicate that the module should continue to be loaded */ + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/switch_core.c b/src/switch_core.c index 83ce2059c8..45ab88f44e 100644 --- a/src/switch_core.c +++ b/src/switch_core.c @@ -475,6 +475,8 @@ SWITCH_DECLARE(switch_status) switch_core_directory_close(switch_directory_handl SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, char *module_name, + char *voice_name, + int rate, unsigned int flags, switch_memory_pool *pool) { @@ -495,7 +497,7 @@ SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL); } - return sh->speech_interface->speech_open(sh, flags); + return sh->speech_interface->speech_open(sh, voice_name, rate, flags); } SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags) diff --git a/src/switch_ivr.c b/src/switch_ivr.c index e4ab972c33..a06592ccd2 100644 --- a/src/switch_ivr.c +++ b/src/switch_ivr.c @@ -381,3 +381,212 @@ SWITCH_DECLARE(switch_status) switch_ivr_play_file(switch_core_session *session, + +SWITCH_DECLARE(switch_status) switch_ivr_speak_text(switch_core_session *session, + char *tts_name, + char *voice_name, + char *timer_name, + int rate, + switch_dtmf_callback_function dtmf_callback, + char *text, + void *buf, + unsigned int buflen) +{ + switch_channel *channel; + short abuf[960]; + char dtmf[128]; + int interval = 0, samples = 0; + size_t len = 0, ilen = 0; + switch_frame write_frame; + switch_timer timer; + switch_core_thread_session thread_session; + switch_codec codec; + switch_memory_pool *pool = switch_core_session_get_pool(session); + char *codec_name; + int x; + int stream_id; + int done = 0; + int lead_in_out = 10; + + switch_status status = SWITCH_STATUS_SUCCESS; + switch_speech_handle sh; + switch_speech_flag flags = SWITCH_SPEECH_FLAG_TTS; + + + memset(&sh, 0, sizeof(sh)); + + channel = switch_core_session_get_channel(session); + assert(channel != NULL); + + if (switch_core_speech_open(&sh, + tts_name, + voice_name, + rate, + flags, + switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Invalid TTS module!\n"); + return SWITCH_STATUS_FALSE; + } + + switch_channel_answer(channel); + + write_frame.data = abuf; + write_frame.buflen = sizeof(abuf); + + + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "OPEN TTS %s\n", tts_name); + + interval = 20; + samples = (rate / 50); + len = samples * 2; + + codec_name = "L16"; + + if (switch_core_codec_init(&codec, + codec_name, + rate, + interval, + 1, + SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, + NULL, pool) == SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Raw Codec Activated\n"); + write_frame.codec = &codec; + } else { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Raw Codec Activation Failed %s@%dhz %d channels %dms\n", + codec_name, rate, 1, interval); + flags = 0; + switch_core_speech_close(&sh, &flags); + return SWITCH_STATUS_GENERR; + } + + if (timer_name) { + if (switch_core_timer_init(&timer, timer_name, interval, samples, pool) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "setup timer failed!\n"); + switch_core_codec_destroy(&codec); + flags = 0; + switch_core_speech_close(&sh, &flags); + return SWITCH_STATUS_GENERR; + } + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "setup timer success %d bytes per %d ms!\n", len, interval); + } + + flags = 0; + switch_core_speech_feed_tts(&sh, text, &flags); + write_frame.rate = rate; + + memset(write_frame.data, 0, len); + write_frame.datalen = len; + write_frame.samples = len / 2; + + for( x = 0; !done && x < lead_in_out; x++) { + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n"); + done = 1; + break; + } + } + } + + if (timer_name) { + /* start a thread to absorb incoming audio */ + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + switch_core_service_session(session, &thread_session, stream_id); + } + } + + ilen = len; + while (switch_channel_get_state(channel) == CS_EXECUTE) { + + if (dtmf_callback || buf) { + + + /* + dtmf handler function you can hook up to be executed when a digit is dialed during playback + if you return anything but SWITCH_STATUS_SUCCESS the playback will stop. + */ + if (switch_channel_has_dtmf(channel)) { + switch_channel_dequeue_dtmf(channel, dtmf, sizeof(dtmf)); + if (dtmf_callback) { + status = dtmf_callback(session, dtmf, buf, buflen); + } else { + switch_copy_string((char *)buf, dtmf, buflen); + status = SWITCH_STATUS_BREAK; + } + } + + if (status != SWITCH_STATUS_SUCCESS) { + done = 1; + break; + } + } + + flags = SWITCH_SPEECH_FLAG_BLOCKING; + status = switch_core_speech_read_tts(&sh, + abuf, + &ilen, + &rate, + &flags); + + if (status != SWITCH_STATUS_SUCCESS) { + for( x = 0; !done && x < lead_in_out; x++) { + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n"); + done = 1; + break; + } + } + } + done = 1; + } + + if (done || ilen <= 0) { + break; + } + + write_frame.datalen = ilen; + write_frame.samples = (int) ilen / 2; +#ifdef SWAP_LINEAR + switch_swap_linear(write_frame.data, (int) write_frame.datalen); +#endif + + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n"); + done = 1; + break; + } + + if (done) { + break; + } + } + if (timer_name) { + if ((x = switch_core_timer_next(&timer)) < 0) { + break; + } + } else { /* time off the channel (if you must) */ + switch_frame *read_frame; + if (switch_core_session_read_frame(session, &read_frame, -1, 0) != SWITCH_STATUS_SUCCESS) { + break; + } + } + } + + switch_console_printf(SWITCH_CHANNEL_CONSOLE, "done playing file\n"); + switch_core_codec_destroy(&codec); + flags = 0; + switch_core_codec_destroy(&codec); + + if (timer_name) { + /* End the audio absorbing thread */ + switch_core_thread_session_end(&thread_session); + switch_core_timer_destroy(&timer); + } + + return status; +} + + +