mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-07-16 03:42:09 +00:00
support resampling for TTS (UniMRCP 0.8.0 only)
git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@15570 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
parent
4f8b15fd00
commit
1ba29d5763
@ -308,8 +308,6 @@ struct speech_channel {
|
|||||||
speech_channel_state_t state;
|
speech_channel_state_t state;
|
||||||
/** UniMRCP <--> FreeSWITCH audio buffer */
|
/** UniMRCP <--> FreeSWITCH audio buffer */
|
||||||
audio_queue_t *audio_queue;
|
audio_queue_t *audio_queue;
|
||||||
/** codec */
|
|
||||||
char *codec;
|
|
||||||
/** rate */
|
/** rate */
|
||||||
uint16_t rate;
|
uint16_t rate;
|
||||||
/** silence sample */
|
/** silence sample */
|
||||||
@ -327,7 +325,8 @@ static apt_bool_t speech_on_channel_add(mrcp_application_t *application, mrcp_se
|
|||||||
static apt_bool_t speech_on_channel_remove(mrcp_application_t *application, mrcp_session_t *session, mrcp_channel_t *channel, mrcp_sig_status_code_e status);
|
static apt_bool_t speech_on_channel_remove(mrcp_application_t *application, mrcp_session_t *session, mrcp_channel_t *channel, mrcp_sig_status_code_e status);
|
||||||
|
|
||||||
/* speech_channel funcs */
|
/* speech_channel funcs */
|
||||||
static switch_status_t speech_channel_create(speech_channel_t **schannel, const char *name, speech_channel_type_t type, mod_unimrcp_application_t *app, const char *codec, uint16_t rate, switch_memory_pool_t *pool);
|
static switch_status_t speech_channel_create(speech_channel_t **schannel, const char *name, speech_channel_type_t type, mod_unimrcp_application_t *app, uint16_t rate, switch_memory_pool_t *pool);
|
||||||
|
static mpf_termination_t *speech_channel_create_mpf_termination(speech_channel_t *schannel);
|
||||||
static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile);
|
static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile);
|
||||||
static switch_status_t speech_channel_destroy(speech_channel_t *schannel);
|
static switch_status_t speech_channel_destroy(speech_channel_t *schannel);
|
||||||
static switch_status_t speech_channel_stop(speech_channel_t *schannel);
|
static switch_status_t speech_channel_stop(speech_channel_t *schannel);
|
||||||
@ -635,7 +634,7 @@ static switch_status_t audio_queue_write(audio_queue_t *queue, void *data, switc
|
|||||||
if (switch_buffer_write(queue->buffer, data, *data_len) > 0) {
|
if (switch_buffer_write(queue->buffer, data, *data_len) > 0) {
|
||||||
queue->write_bytes = queue->write_bytes + *data_len;
|
queue->write_bytes = queue->write_bytes + *data_len;
|
||||||
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) audio queue write total = %d\trequested = %d\n", queue->name, queue->write_bytes, *data_len);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) audio queue write total = %ld\trequested = %ld\n", queue->name, queue->write_bytes, *data_len);
|
||||||
#endif
|
#endif
|
||||||
if (queue->waiting <= switch_buffer_inuse(queue->buffer)) {
|
if (queue->waiting <= switch_buffer_inuse(queue->buffer)) {
|
||||||
switch_thread_cond_signal(queue->cond);
|
switch_thread_cond_signal(queue->cond);
|
||||||
@ -690,7 +689,7 @@ static switch_status_t audio_queue_read(audio_queue_t *queue, void *data, switch
|
|||||||
*data_len = switch_buffer_read(queue->buffer, data, requested);
|
*data_len = switch_buffer_read(queue->buffer, data, requested);
|
||||||
queue->read_bytes = queue->read_bytes + *data_len;
|
queue->read_bytes = queue->read_bytes + *data_len;
|
||||||
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) audio queue read total = %d\tread = %d\trequested = %d\n", queue->name, queue->read_bytes, *data_len, requested);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) audio queue read total = %ld\tread = %ld\trequested = %ld\n", queue->name, queue->read_bytes, *data_len, requested);
|
||||||
switch_size_t len = *data_len;
|
switch_size_t len = *data_len;
|
||||||
if (queue->file_read) {
|
if (queue->file_read) {
|
||||||
switch_file_write(queue->file_read, data, &len);
|
switch_file_write(queue->file_read, data, &len);
|
||||||
@ -764,12 +763,11 @@ static switch_status_t audio_queue_destroy(audio_queue_t *queue)
|
|||||||
* @param name the name of the channel
|
* @param name the name of the channel
|
||||||
* @param type the type of channel to create
|
* @param type the type of channel to create
|
||||||
* @param app the application
|
* @param app the application
|
||||||
* @param codec the codec to use
|
|
||||||
* @param rate the rate to use
|
* @param rate the rate to use
|
||||||
* @param pool the memory pool to use
|
* @param pool the memory pool to use
|
||||||
* @return SWITCH_STATUS_SUCCESS if successful. SWITCH_STATUS_FALSE if the channel cannot be allocated.
|
* @return SWITCH_STATUS_SUCCESS if successful. SWITCH_STATUS_FALSE if the channel cannot be allocated.
|
||||||
*/
|
*/
|
||||||
static switch_status_t speech_channel_create(speech_channel_t **schannel, const char *name, speech_channel_type_t type, mod_unimrcp_application_t *app, const char *codec, uint16_t rate, switch_memory_pool_t *pool)
|
static switch_status_t speech_channel_create(speech_channel_t **schannel, const char *name, speech_channel_type_t type, mod_unimrcp_application_t *app, uint16_t rate, switch_memory_pool_t *pool)
|
||||||
{
|
{
|
||||||
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
||||||
speech_channel_t *schan = NULL;
|
speech_channel_t *schan = NULL;
|
||||||
@ -786,14 +784,7 @@ static switch_status_t speech_channel_create(speech_channel_t **schannel, const
|
|||||||
schan->memory_pool = pool;
|
schan->memory_pool = pool;
|
||||||
schan->params = NULL;
|
schan->params = NULL;
|
||||||
schan->rate = rate;
|
schan->rate = rate;
|
||||||
schan->codec = switch_core_strdup(pool, codec);
|
schan->silence = 0; /* L16 silence sample */
|
||||||
|
|
||||||
if (!strcmp("L16", schan->codec)) {
|
|
||||||
schan->silence = 0;
|
|
||||||
} else {
|
|
||||||
/* 8-bit PCMU, PCMA */
|
|
||||||
schan->silence = 128;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (switch_mutex_init(&schan->mutex, SWITCH_MUTEX_UNNESTED, pool) != SWITCH_STATUS_SUCCESS ||
|
if (switch_mutex_init(&schan->mutex, SWITCH_MUTEX_UNNESTED, pool) != SWITCH_STATUS_SUCCESS ||
|
||||||
switch_thread_cond_create(&schan->cond, pool) != SWITCH_STATUS_SUCCESS ||
|
switch_thread_cond_create(&schan->cond, pool) != SWITCH_STATUS_SUCCESS ||
|
||||||
@ -840,6 +831,63 @@ static switch_status_t speech_channel_destroy(speech_channel_t *schannel)
|
|||||||
return SWITCH_STATUS_SUCCESS;
|
return SWITCH_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the audio termination for the speech channel
|
||||||
|
* @param schannel the speech channel
|
||||||
|
* @return the termination or NULL
|
||||||
|
*/
|
||||||
|
#if UNI_VERSION_AT_LEAST(0,8,0)
|
||||||
|
static mpf_termination_t *speech_channel_create_mpf_termination(speech_channel_t *schannel)
|
||||||
|
{
|
||||||
|
mpf_termination_t *termination = NULL;
|
||||||
|
mpf_stream_capabilities_t *capabilities = NULL;
|
||||||
|
int sample_rates;
|
||||||
|
|
||||||
|
if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
|
||||||
|
capabilities = mpf_sink_stream_capabilities_create(schannel->unimrcp_session->pool);
|
||||||
|
} else {
|
||||||
|
capabilities = mpf_source_stream_capabilities_create(schannel->unimrcp_session->pool);
|
||||||
|
}
|
||||||
|
/* FreeSWITCH is capable of resampling so pick rates that are are multiples of the desired rate.
|
||||||
|
* UniMRCP should transcode whatever the MRCP server wants to use into LPCM (host-byte ordered L16) for us.
|
||||||
|
*/
|
||||||
|
if (schannel->rate == 16000) {
|
||||||
|
sample_rates = MPF_SAMPLE_RATE_8000 | MPF_SAMPLE_RATE_16000;
|
||||||
|
} else if (schannel->rate == 32000) {
|
||||||
|
sample_rates = MPF_SAMPLE_RATE_8000 | MPF_SAMPLE_RATE_16000 | MPF_SAMPLE_RATE_32000;
|
||||||
|
} else if (schannel->rate == 48000) {
|
||||||
|
sample_rates = MPF_SAMPLE_RATE_8000 | MPF_SAMPLE_RATE_16000 | MPF_SAMPLE_RATE_48000;
|
||||||
|
} else {
|
||||||
|
sample_rates = MPF_SAMPLE_RATE_8000;
|
||||||
|
}
|
||||||
|
mpf_codec_capabilities_add(&capabilities->codecs, sample_rates, "LPCM");
|
||||||
|
termination = mrcp_application_audio_termination_create(schannel->unimrcp_session, &schannel->application->audio_stream_vtable, capabilities, schannel);
|
||||||
|
|
||||||
|
return termination;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static mpf_termination_t *speech_channel_create_mpf_termination(speech_channel_t *schannel)
|
||||||
|
{
|
||||||
|
mpf_termination_t *termination = NULL;
|
||||||
|
mpf_codec_descriptor_t *codec = NULL;
|
||||||
|
codec = (mpf_codec_descriptor_t *)apr_palloc(schannel->unimrcp_session->pool, sizeof(mpf_codec_descriptor_t));
|
||||||
|
mpf_codec_descriptor_init(codec);
|
||||||
|
codec->channel_count = 1;
|
||||||
|
codec->payload_type = 96;
|
||||||
|
codec->sampling_rate = schannel->rate;
|
||||||
|
apt_string_set(&codec->name, "LPCM"); /* "LPCM" is UniMRCP's name for L16 host byte ordered */
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) requesting codec LPCM/%d/%d\n", schannel->name, codec->payload_type, codec->sampling_rate);
|
||||||
|
|
||||||
|
if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
|
||||||
|
termination = mrcp_application_sink_termination_create(schannel->unimrcp_session, &schannel->application->audio_stream_vtable, codec, schannel);
|
||||||
|
} else {
|
||||||
|
termination = mrcp_application_source_termination_create(schannel->unimrcp_session, &schannel->application->audio_stream_vtable, codec, schannel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return termination;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open the speech channel
|
* Open the speech channel
|
||||||
*
|
*
|
||||||
@ -851,7 +899,6 @@ static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t
|
|||||||
{
|
{
|
||||||
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
||||||
mpf_termination_t *termination = NULL;
|
mpf_termination_t *termination = NULL;
|
||||||
mpf_codec_descriptor_t *codec = NULL;
|
|
||||||
mrcp_resource_type_e resource_type;
|
mrcp_resource_type_e resource_type;
|
||||||
|
|
||||||
switch_mutex_lock(schannel->mutex);
|
switch_mutex_lock(schannel->mutex);
|
||||||
@ -872,31 +919,8 @@ static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create RTP endpoint and link to session channel */
|
/* create audio termination and add to channel */
|
||||||
codec = (mpf_codec_descriptor_t *)apr_palloc(schannel->unimrcp_session->pool, sizeof(mpf_codec_descriptor_t));
|
if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) {
|
||||||
mpf_codec_descriptor_init(codec);
|
|
||||||
codec->channel_count = 1;
|
|
||||||
codec->payload_type = 96;
|
|
||||||
codec->sampling_rate = schannel->rate;
|
|
||||||
if (!strcmp(schannel->codec, "L16")) {
|
|
||||||
/* "LPCM" is UniMRCP's name for L16 host byte ordered */
|
|
||||||
apt_string_set(&codec->name, "LPCM");
|
|
||||||
} else {
|
|
||||||
apt_string_set(&codec->name, schannel->codec);
|
|
||||||
}
|
|
||||||
/* see RFC 1890 for payload types */
|
|
||||||
if (!strcmp(schannel->codec, "PCMU") && schannel->rate == 8000) {
|
|
||||||
codec->payload_type = 0;
|
|
||||||
} else if (!strcmp(schannel->codec, "PCMA") && schannel->rate == 8000) {
|
|
||||||
codec->payload_type = 8;
|
|
||||||
}
|
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) requesting codec %s/%d/%d\n", schannel->name, schannel->codec, codec->payload_type, codec->sampling_rate);
|
|
||||||
if(schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
|
|
||||||
termination = mrcp_application_sink_termination_create(schannel->unimrcp_session, &schannel->application->audio_stream_vtable, codec, schannel);
|
|
||||||
} else {
|
|
||||||
termination = mrcp_application_source_termination_create(schannel->unimrcp_session, &schannel->application->audio_stream_vtable, codec, schannel);
|
|
||||||
}
|
|
||||||
if(termination == NULL) {
|
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);
|
||||||
mrcp_application_session_destroy(schannel->unimrcp_session);
|
mrcp_application_session_destroy(schannel->unimrcp_session);
|
||||||
status = SWITCH_STATUS_FALSE;
|
status = SWITCH_STATUS_FALSE;
|
||||||
@ -1441,8 +1465,7 @@ static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char
|
|||||||
switch_snprintf(name, sizeof(name) - 1, "TTS-%d", speech_channel_number);
|
switch_snprintf(name, sizeof(name) - 1, "TTS-%d", speech_channel_number);
|
||||||
name[sizeof(name) - 1] = '\0';
|
name[sizeof(name) - 1] = '\0';
|
||||||
|
|
||||||
/* create channel container with L16 codec (what FreeSWITCH needs) */
|
if (speech_channel_create(&schannel, name, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t)rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {
|
||||||
if (speech_channel_create(&schannel, name, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, "L16", (uint16_t)rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {
|
|
||||||
status = SWITCH_STATUS_FALSE;
|
status = SWITCH_STATUS_FALSE;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -1530,7 +1553,7 @@ static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *d
|
|||||||
/* pad data, if not enough read */
|
/* pad data, if not enough read */
|
||||||
if (bytes_read < *datalen) {
|
if (bytes_read < *datalen) {
|
||||||
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) adding %d bytes of padding\n", schannel->name, *datalen - bytes_read);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read);
|
||||||
#endif
|
#endif
|
||||||
memset((uint8_t *)data + bytes_read, schannel->silence, *datalen - bytes_read);
|
memset((uint8_t *)data + bytes_read, schannel->silence, *datalen - bytes_read);
|
||||||
}
|
}
|
||||||
@ -1539,6 +1562,11 @@ static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *d
|
|||||||
status = SWITCH_STATUS_BREAK;
|
status = SWITCH_STATUS_BREAK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) rate = %d, native_rate = %d, samplerate = %d\n", schannel->name, sh->rate, sh->native_rate, sh->samplerate);
|
||||||
|
|
||||||
|
/* report negotiated sample rate back to FreeSWITCH */
|
||||||
|
sh->native_rate = schannel->rate;
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1645,7 +1673,23 @@ static apt_bool_t speech_on_channel_add(mrcp_application_t *application, mrcp_se
|
|||||||
|
|
||||||
/* check status */
|
/* check status */
|
||||||
if (session && schannel && status == MRCP_SIG_STATUS_CODE_SUCCESS) {
|
if (session && schannel && status == MRCP_SIG_STATUS_CODE_SUCCESS) {
|
||||||
|
#if UNI_VERSION_AT_LEAST(0,8,0)
|
||||||
|
char codec_name[60] = { 0 };
|
||||||
|
const mpf_codec_descriptor_t *descriptor;
|
||||||
|
/* what sample rate did we negotiate? */
|
||||||
|
if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
|
||||||
|
descriptor = mrcp_application_sink_descriptor_get(channel);
|
||||||
|
} else {
|
||||||
|
descriptor = mrcp_application_source_descriptor_get(channel);
|
||||||
|
}
|
||||||
|
schannel->rate = descriptor->sampling_rate;
|
||||||
|
if (descriptor->name.length) {
|
||||||
|
strncpy(codec_name, descriptor->name.buf, sizeof(codec_name));
|
||||||
|
}
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "(%s) %s channel is ready, codec = %s, sample rate = %d\n", schannel->name, speech_channel_type_to_string(schannel->type), codec_name, schannel->rate);
|
||||||
|
#else
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "(%s) %s channel is ready\n", schannel->name, speech_channel_type_to_string(schannel->type));
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "(%s) %s channel is ready\n", schannel->name, speech_channel_type_to_string(schannel->type));
|
||||||
|
#endif
|
||||||
speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);
|
speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);
|
||||||
/* notify of channel open */
|
/* notify of channel open */
|
||||||
if (globals.enable_profile_events && switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, MY_EVENT_PROFILE_OPEN) == SWITCH_STATUS_SUCCESS) {
|
if (globals.enable_profile_events && switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, MY_EVENT_PROFILE_OPEN) == SWITCH_STATUS_SUCCESS) {
|
||||||
@ -2575,7 +2619,7 @@ static switch_status_t recog_asr_open(switch_asr_handle_t *ah, const char *codec
|
|||||||
switch_snprintf(name, sizeof(name) - 1, "ASR-%d", speech_channel_number);
|
switch_snprintf(name, sizeof(name) - 1, "ASR-%d", speech_channel_number);
|
||||||
name[sizeof(name) - 1] = '\0';
|
name[sizeof(name) - 1] = '\0';
|
||||||
|
|
||||||
if (speech_channel_create(&schannel, name, SPEECH_CHANNEL_RECOGNIZER, &globals.recog, "L16", (uint16_t)rate, ah->memory_pool) != SWITCH_STATUS_SUCCESS) {
|
if (speech_channel_create(&schannel, name, SPEECH_CHANNEL_RECOGNIZER, &globals.recog, (uint16_t)rate, ah->memory_pool) != SWITCH_STATUS_SUCCESS) {
|
||||||
status = SWITCH_STATUS_FALSE;
|
status = SWITCH_STATUS_FALSE;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user