add mod_cepstral to test tts backend

git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@681 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
Anthony Minessale 2006-02-26 20:23:23 +00:00
parent 14072e8725
commit 9369bfba5a
9 changed files with 585 additions and 9 deletions

View File

@ -760,11 +760,18 @@ SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh);
\brief Open a speech handle
\param sh a speech handle to use
\param module_name the speech module to use
\param voice_name the desired voice name
\param rate the sampling rate
\param flags asr/tts flags
\param pool the pool to use (NULL for new pool)
\return SWITCH_STATUS_SUCCESS if the handle is opened
*/
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, char *module_name, unsigned int flags, switch_memory_pool *pool);
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh,
char *module_name,
char *voice_name,
int rate,
unsigned int flags,
switch_memory_pool *pool);
/*!
\brief Feed data to the ASR module

View File

@ -119,6 +119,16 @@ SWITCH_DECLARE(switch_status) switch_ivr_record_file(switch_core_session *sessio
unsigned int buflen);
SWITCH_DECLARE(switch_status) switch_ivr_speak_text(switch_core_session *session,
char *tts_name,
char *voice_name,
char *timer_name,
int rate,
switch_dtmf_callback_function dtmf_callback,
char *text,
void *buf,
unsigned int buflen);
/** @} */
#ifdef __cplusplus

View File

@ -286,6 +286,8 @@ struct switch_speech_interface {
const char *interface_name;
/*! function to open the speech interface */
switch_status (*speech_open)(switch_speech_handle *sh,
char *voice_name,
int rate,
unsigned int flags);
/*! function to close the speech interface */
switch_status (*speech_close)(switch_speech_handle *, unsigned int *flags);

View File

@ -252,6 +252,7 @@ SWITCH_SPEECH_FLAG_ASR = (1 << 1) - Interface can/should convert audio to tex
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2) - Interface is has text to read.
SWITCH_SPEECH_FLAG_PEEK = (1 << 3) - Read data but do not erase it.
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4) - Free interface's pool on destruction.
SWITCH_SPEECH_FLAG_BLOCKING = (1 << 5) - Indicate that a blocking call is desired
</pre>
*/
typedef enum {
@ -260,6 +261,7 @@ typedef enum {
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2),
SWITCH_SPEECH_FLAG_PEEK = (1 << 3),
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4),
SWITCH_SPEECH_FLAG_BLOCKING = (1 << 5),
} switch_speech_flag;

View File

@ -81,18 +81,65 @@ static void dirtest_function(switch_core_session *session, char *data)
}
static void ivrtest_function(switch_core_session *session, char *data)
static switch_status show_dtmf(switch_core_session *session, char *dtmf, void *buf, unsigned int buflen)
{
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Digits %s\n", dtmf);
switch_copy_string((char *)buf, dtmf, buflen);
return SWITCH_STATUS_SUCCESS;
}
static void tts_function(switch_core_session *session, char *data)
{
switch_channel *channel;
switch_status status = SWITCH_STATUS_SUCCESS;
switch_codec *codec;
char *mydata, *text = NULL, *voice_name = NULL, *tts_name = NULL;
char buf[10] = "";
char term;
char *argv[3];
int argc;
if(!(mydata = switch_core_session_strdup(session, (char *) data))) {
return;
}
if ((argc = switch_separate_string(mydata, ':', argv, sizeof(argv) / sizeof(argv[0]))) > 1) {
tts_name = argv[0];
voice_name = argv[1];
text = argv[2];
}
if (voice_name && !text) {
text = argv[1];
voice_name = NULL;
}
channel = switch_core_session_get_channel(session);
assert(channel != NULL);
switch_channel_answer(channel);
codec = switch_core_session_get_read_codec(session);
switch_ivr_speak_text(session, tts_name, voice_name, NULL, codec->implementation->samples_per_second, show_dtmf, text, buf, sizeof(buf));
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Done\n");
}
static void ivrtest_function(switch_core_session *session, char *data)
{
switch_channel *channel;
switch_status status = SWITCH_STATUS_SUCCESS;
switch_codec *codec;
char buf[10] = "";
char term;
char say[128] = "";
channel = switch_core_session_get_channel(session);
assert(channel != NULL);
switch_channel_answer(channel);
codec = switch_core_session_get_read_codec(session);
while (switch_channel_get_state(channel) == CS_EXECUTE) {
memset(buf, 0, sizeof(buf));
@ -118,8 +165,9 @@ static void ivrtest_function(switch_core_session *session, char *data)
if (term && term == '*') {
break;
}
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "You Dialed [%s]\n", buf);
snprintf(say, sizeof(say), "You Dialed [%s]\n", buf);
switch_console_printf(SWITCH_CHANNEL_CONSOLE, say);
switch_ivr_speak_text(session, "cepstral", "david", NULL, codec->implementation->samples_per_second, NULL, say, NULL, 0);
}
}
@ -146,9 +194,18 @@ static const switch_state_handler_table state_handlers = {
/*.on_transmit */ NULL
};
static const switch_application_interface tts_application_interface = {
/*.interface_name */ "tts",
/*.application_function */ tts_function,
NULL, NULL, NULL,
/*.next*/ NULL
};
static const switch_application_interface dirtest_application_interface = {
/*.interface_name */ "dirtest",
/*.application_function */ dirtest_function
/*.application_function */ dirtest_function,
NULL, NULL, NULL,
/*.next*/ &tts_application_interface
};
static const switch_application_interface ivrtest_application_interface = {

View File

@ -39,7 +39,11 @@ static switch_loadable_module_interface skel_module_interface = {
/*.timer_interface */ NULL,
/*.dialplan_interface */ NULL,
/*.codec_interface */ NULL,
/*.application_interface */ NULL
/*.application_interface */ NULL,
/*.api_interface */ NULL,
/*.file_interface */ NULL,
/*.speech_interface */ NULL,
/*.directory_interface */ NULL
};
switch_status switch_module_load(const switch_loadable_module_interface **interface, char *filename)

View File

@ -0,0 +1,283 @@
/*
* FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
* Copyright (C) 2005/2006, Anthony Minessale II <anthmct@yahoo.com>
*
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
*
* The Initial Developer of the Original Code is
* Anthony Minessale II <anthmct@yahoo.com>
* Portions created by the Initial Developer are Copyright (C)
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Anthony Minessale II <anthmct@yahoo.com>
*
*
* mod_cepstral.c -- Cepstral Interface
*
*/
#include <swift.h>
#include <switch.h>
static const char modname[] = "mod_cepstral";
static swift_engine *engine;
typedef struct {
swift_background_t tts_stream;
swift_port *port;
swift_params *params;
swift_voice *voice;
switch_mutex_t *audio_lock;
switch_buffer *audio_buffer;
int done;
int done_gen;
} cepstral_t;
/* This callback caches the audio in the buffer */
static swift_result_t write_audio(swift_event *event, swift_event_t type, void *udata)
{
cepstral_t *cepstral;
swift_event_t rv = SWIFT_SUCCESS;
void *buf = NULL;
int len = 0;
int wrote;
cepstral = udata;
assert(cepstral != NULL);
/* Only proceed when we have success */
if (!SWIFT_FAILED((rv = swift_event_get_audio(event, &buf, &len)))) {
switch_mutex_lock(cepstral->audio_lock);
if ((wrote=switch_buffer_write(cepstral->audio_buffer, buf, len)) <= 0) {
rv = SWIFT_UNKNOWN_ERROR;
}
switch_mutex_unlock(cepstral->audio_lock);
} else {
cepstral->done = 1;
}
return rv;
}
static switch_status cepstral_speech_open(switch_speech_handle *sh, char *voice_name, int rate, unsigned int flags)
{
if (flags & SWITCH_SPEECH_FLAG_ASR) {
return SWITCH_STATUS_FALSE;
}
if (flags & SWITCH_SPEECH_FLAG_TTS) {
cepstral_t *cepstral = switch_core_alloc(sh->memory_pool, sizeof(*cepstral));
char srate[25];
if (!cepstral) {
return SWITCH_STATUS_MEMERR;
}
if (switch_buffer_create(sh->memory_pool, &cepstral->audio_buffer, SWITCH_RECCOMMENDED_BUFFER_SIZE) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Write Buffer Failed!\n");
return SWITCH_STATUS_MEMERR;
}
switch_mutex_init(&cepstral->audio_lock, SWITCH_MUTEX_NESTED, sh->memory_pool);
cepstral->params = swift_params_new(NULL);
swift_params_set_string(cepstral->params, "audio/encoding", "pcm16");
snprintf(srate, sizeof(srate), "%d", rate);
swift_params_set_string(cepstral->params, "audio/sampling-rate", srate);
/* Open a Swift Port through which to make TTS calls */
if (SWIFT_FAILED(cepstral->port = swift_port_open(engine, cepstral->params))) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to open Swift Port.");
goto all_done;
}
if (voice_name && SWIFT_FAILED(swift_port_set_voice_by_name(cepstral->port, voice_name))) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Invalid voice %s!\n", voice_name);
voice_name = NULL;
}
if (!voice_name) {
/* Find the first voice on the system */
if ((cepstral->voice = swift_port_find_first_voice(cepstral->port, NULL, NULL)) == NULL) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to find any voices!\n");
goto all_done;
}
/* Set the voice found by find_first_voice() as the port's current voice */
if ( SWIFT_FAILED(swift_port_set_voice(cepstral->port, cepstral->voice)) ) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to set voice.\n");
goto all_done;
}
}
swift_port_set_callback(cepstral->port, &write_audio, SWIFT_EVENT_AUDIO, cepstral);
sh->private_info = cepstral;
return SWITCH_STATUS_SUCCESS;
}
all_done:
return SWITCH_STATUS_FALSE;
}
static switch_status cepstral_speech_close(switch_speech_handle *sh, unsigned int *flags)
{
cepstral_t *cepstral;
assert(sh != NULL);
cepstral = sh->private_info;
assert(cepstral != NULL);
/* Close the Swift Port and Engine */
if (NULL != cepstral->port) swift_port_close(cepstral->port);
//if (NULL != cepstral->engine) swift_engine_close(cepstral->engine);
cepstral->port = NULL;
//cepstral->engine = NULL;
return SWITCH_STATUS_SUCCESS;
}
static switch_status cepstral_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags)
{
cepstral_t *cepstral;
assert(sh != NULL);
cepstral = sh->private_info;
assert(cepstral != NULL);
swift_port_speak_text(cepstral->port, text, 0, NULL, &cepstral->tts_stream, NULL);
//swift_port_speak_text(cepstral->port, text, 0, NULL, NULL, NULL);
return SWITCH_STATUS_FALSE;
}
static switch_status cepstral_speech_read_tts(switch_speech_handle *sh,
void *data,
unsigned int *datalen,
unsigned int *rate,
unsigned int *flags)
{
cepstral_t *cepstral;
int desired = *datalen;
switch_status status = SWITCH_STATUS_FALSE;
int used, padding = 0;
assert(sh != NULL);
cepstral = sh->private_info;
assert(cepstral != NULL);
while(!cepstral->done) {
if (!cepstral->done_gen) {
int check = (SWIFT_STATUS_RUNNING == swift_port_status(cepstral->port, cepstral->tts_stream));
if (!check) {
cepstral->done_gen = 1;
}
}
used = switch_buffer_inuse(cepstral->audio_buffer);
if (!used && cepstral->done_gen) {
break;
}
/* wait for the right amount of data (unless there is no blocking flag) */
if (used < desired) {
if (cepstral->done_gen) {
padding = desired - used;
desired = used;
}
if (!(*flags & SWITCH_SPEECH_FLAG_BLOCKING)) {
*datalen = 0;
status = SWITCH_STATUS_SUCCESS;
break;
}
switch_yield(1000);
continue;
}
/* There is enough, read it and return */
switch_mutex_lock(cepstral->audio_lock);
*datalen = switch_buffer_read(cepstral->audio_buffer, data, desired);
if (padding) {
int x = 0;
unsigned char *p = data;
for(x = 0; x < padding; x++) {
*(p + x) = 0;
(*datalen)++;
}
}
switch_mutex_unlock(cepstral->audio_lock);
status = SWITCH_STATUS_SUCCESS;
break;
}
return status;
}
const switch_speech_interface cepstral_speech_interface = {
/*.interface_name*/ "cepstral",
/*.speech_open*/ cepstral_speech_open,
/*.speech_close*/ cepstral_speech_close,
/*.speech_feed_asr*/ NULL,
/*.speech_interpret_asr*/ NULL,
/*.speech_feed_tts*/ cepstral_speech_feed_tts,
/*.speech_read_tts*/ cepstral_speech_read_tts
};
static switch_loadable_module_interface cepstral_module_interface = {
/*.module_name */ modname,
/*.endpoint_interface */ NULL,
/*.timer_interface */ NULL,
/*.dialplan_interface */ NULL,
/*.codec_interface */ NULL,
/*.application_interface */ NULL,
/*.api_interface */ NULL,
/*.file_interface */ NULL,
/*.speech_interface */ &cepstral_speech_interface,
/*.directory_interface */ NULL
};
switch_status switch_module_load(const switch_loadable_module_interface **interface, char *filename)
{
/* Open the Swift TTS Engine */
if ( SWIFT_FAILED(engine = swift_engine_open(NULL)) ) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Failed to open Swift Engine.");
return SWITCH_STATUS_GENERR;
}
/* connect my internal structure to the blank pointer passed to me */
*interface = &cepstral_module_interface;
/* indicate that the module should continue to be loaded */
return SWITCH_STATUS_SUCCESS;
}

View File

@ -475,6 +475,8 @@ SWITCH_DECLARE(switch_status) switch_core_directory_close(switch_directory_handl
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh,
char *module_name,
char *voice_name,
int rate,
unsigned int flags,
switch_memory_pool *pool)
{
@ -495,7 +497,7 @@ SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh,
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
return sh->speech_interface->speech_open(sh, flags);
return sh->speech_interface->speech_open(sh, voice_name, rate, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags)

View File

@ -381,3 +381,212 @@ SWITCH_DECLARE(switch_status) switch_ivr_play_file(switch_core_session *session,
SWITCH_DECLARE(switch_status) switch_ivr_speak_text(switch_core_session *session,
char *tts_name,
char *voice_name,
char *timer_name,
int rate,
switch_dtmf_callback_function dtmf_callback,
char *text,
void *buf,
unsigned int buflen)
{
switch_channel *channel;
short abuf[960];
char dtmf[128];
int interval = 0, samples = 0;
size_t len = 0, ilen = 0;
switch_frame write_frame;
switch_timer timer;
switch_core_thread_session thread_session;
switch_codec codec;
switch_memory_pool *pool = switch_core_session_get_pool(session);
char *codec_name;
int x;
int stream_id;
int done = 0;
int lead_in_out = 10;
switch_status status = SWITCH_STATUS_SUCCESS;
switch_speech_handle sh;
switch_speech_flag flags = SWITCH_SPEECH_FLAG_TTS;
memset(&sh, 0, sizeof(sh));
channel = switch_core_session_get_channel(session);
assert(channel != NULL);
if (switch_core_speech_open(&sh,
tts_name,
voice_name,
rate,
flags,
switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Invalid TTS module!\n");
return SWITCH_STATUS_FALSE;
}
switch_channel_answer(channel);
write_frame.data = abuf;
write_frame.buflen = sizeof(abuf);
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "OPEN TTS %s\n", tts_name);
interval = 20;
samples = (rate / 50);
len = samples * 2;
codec_name = "L16";
if (switch_core_codec_init(&codec,
codec_name,
rate,
interval,
1,
SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE,
NULL, pool) == SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Raw Codec Activated\n");
write_frame.codec = &codec;
} else {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Raw Codec Activation Failed %s@%dhz %d channels %dms\n",
codec_name, rate, 1, interval);
flags = 0;
switch_core_speech_close(&sh, &flags);
return SWITCH_STATUS_GENERR;
}
if (timer_name) {
if (switch_core_timer_init(&timer, timer_name, interval, samples, pool) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "setup timer failed!\n");
switch_core_codec_destroy(&codec);
flags = 0;
switch_core_speech_close(&sh, &flags);
return SWITCH_STATUS_GENERR;
}
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "setup timer success %d bytes per %d ms!\n", len, interval);
}
flags = 0;
switch_core_speech_feed_tts(&sh, text, &flags);
write_frame.rate = rate;
memset(write_frame.data, 0, len);
write_frame.datalen = len;
write_frame.samples = len / 2;
for( x = 0; !done && x < lead_in_out; x++) {
for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) {
if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n");
done = 1;
break;
}
}
}
if (timer_name) {
/* start a thread to absorb incoming audio */
for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) {
switch_core_service_session(session, &thread_session, stream_id);
}
}
ilen = len;
while (switch_channel_get_state(channel) == CS_EXECUTE) {
if (dtmf_callback || buf) {
/*
dtmf handler function you can hook up to be executed when a digit is dialed during playback
if you return anything but SWITCH_STATUS_SUCCESS the playback will stop.
*/
if (switch_channel_has_dtmf(channel)) {
switch_channel_dequeue_dtmf(channel, dtmf, sizeof(dtmf));
if (dtmf_callback) {
status = dtmf_callback(session, dtmf, buf, buflen);
} else {
switch_copy_string((char *)buf, dtmf, buflen);
status = SWITCH_STATUS_BREAK;
}
}
if (status != SWITCH_STATUS_SUCCESS) {
done = 1;
break;
}
}
flags = SWITCH_SPEECH_FLAG_BLOCKING;
status = switch_core_speech_read_tts(&sh,
abuf,
&ilen,
&rate,
&flags);
if (status != SWITCH_STATUS_SUCCESS) {
for( x = 0; !done && x < lead_in_out; x++) {
for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) {
if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n");
done = 1;
break;
}
}
}
done = 1;
}
if (done || ilen <= 0) {
break;
}
write_frame.datalen = ilen;
write_frame.samples = (int) ilen / 2;
#ifdef SWAP_LINEAR
switch_swap_linear(write_frame.data, (int) write_frame.datalen);
#endif
for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) {
if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Bad Write\n");
done = 1;
break;
}
if (done) {
break;
}
}
if (timer_name) {
if ((x = switch_core_timer_next(&timer)) < 0) {
break;
}
} else { /* time off the channel (if you must) */
switch_frame *read_frame;
if (switch_core_session_read_frame(session, &read_frame, -1, 0) != SWITCH_STATUS_SUCCESS) {
break;
}
}
}
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "done playing file\n");
switch_core_codec_destroy(&codec);
flags = 0;
switch_core_codec_destroy(&codec);
if (timer_name) {
/* End the audio absorbing thread */
switch_core_thread_session_end(&thread_session);
switch_core_timer_destroy(&timer);
}
return status;
}