freeswitch/libs/unimrcp/platforms/umc/src/recogsession.cpp

457 lines
13 KiB
C++

/*
* Copyright 2008-2014 Arsen Chaloyan
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $Id: recogsession.cpp 2136 2014-07-04 06:33:36Z achaloyan@gmail.com $
*/
#include "recogsession.h"
#include "recogscenario.h"
#include "mrcp_message.h"
#include "mrcp_generic_header.h"
#include "mrcp_recog_header.h"
#include "mrcp_recog_resource.h"
#include "apt_nlsml_doc.h"
#include "apt_log.h"
struct RecogChannel
{
/** MRCP control channel */
mrcp_channel_t* m_pMrcpChannel;
/** IN-PROGRESS RECOGNIZE request */
mrcp_message_t* m_pRecogRequest;
/** Streaming is in-progress */
bool m_Streaming;
/** File to read audio stream from */
FILE* m_pAudioIn;
/** Estimated time to complete (used if no audio_in available) */
apr_size_t m_TimeToComplete;
RecogChannel() :
m_pMrcpChannel(NULL),
m_pRecogRequest(NULL),
m_Streaming(false),
m_pAudioIn(NULL),
m_TimeToComplete(0) {}
};
RecogSession::RecogSession(const RecogScenario* pScenario) :
UmcSession(pScenario),
m_pRecogChannel(NULL),
m_ContentId("request1@form-level")
{
}
RecogSession::~RecogSession()
{
}
bool RecogSession::Start()
{
const RecogScenario* pScenario = GetScenario();
if(!pScenario->IsDefineGrammarEnabled() && !pScenario->IsRecognizeEnabled())
return false;
/* create channel and associate all the required data */
m_pRecogChannel = CreateRecogChannel();
if(!m_pRecogChannel)
return false;
/* add channel to session (send asynchronous request) */
if(!AddMrcpChannel(m_pRecogChannel->m_pMrcpChannel))
{
delete m_pRecogChannel;
m_pRecogChannel = NULL;
return false;
}
return true;
}
bool RecogSession::Stop()
{
if(!UmcSession::Stop())
return false;
if(!m_pRecogChannel)
return false;
mrcp_message_t* pStopMessage = CreateMrcpMessage(m_pRecogChannel->m_pMrcpChannel,RECOGNIZER_STOP);
if(!pStopMessage)
return false;
if(m_pRecogChannel->m_pRecogRequest)
{
mrcp_generic_header_t* pGenericHeader;
/* get/allocate generic header */
pGenericHeader = (mrcp_generic_header_t*) mrcp_generic_header_prepare(pStopMessage);
if(pGenericHeader)
{
pGenericHeader->active_request_id_list.count = 1;
pGenericHeader->active_request_id_list.ids[0] =
m_pRecogChannel->m_pRecogRequest->start_line.request_id;
mrcp_generic_header_property_add(pStopMessage,GENERIC_HEADER_ACTIVE_REQUEST_ID_LIST);
}
m_pRecogChannel->m_pRecogRequest = NULL;
}
return SendMrcpRequest(m_pRecogChannel->m_pMrcpChannel,pStopMessage);
}
bool RecogSession::OnSessionTerminate(mrcp_sig_status_code_e status)
{
if(m_pRecogChannel)
{
FILE* pAudioIn = m_pRecogChannel->m_pAudioIn;
if(pAudioIn)
{
m_pRecogChannel->m_pAudioIn = NULL;
fclose(pAudioIn);
}
delete m_pRecogChannel;
m_pRecogChannel = NULL;
}
return UmcSession::OnSessionTerminate(status);
}
static apt_bool_t ReadStream(mpf_audio_stream_t* pStream, mpf_frame_t* pFrame)
{
RecogChannel* pRecogChannel = (RecogChannel*) pStream->obj;
if(pRecogChannel && pRecogChannel->m_Streaming)
{
if(pRecogChannel->m_pAudioIn)
{
if(fread(pFrame->codec_frame.buffer,1,pFrame->codec_frame.size,pRecogChannel->m_pAudioIn) == pFrame->codec_frame.size)
{
/* normal read */
pFrame->type |= MEDIA_FRAME_TYPE_AUDIO;
}
else
{
/* file is over */
pRecogChannel->m_Streaming = false;
}
}
else
{
/* fill with silence in case no file available */
if(pRecogChannel->m_TimeToComplete >= CODEC_FRAME_TIME_BASE)
{
pFrame->type |= MEDIA_FRAME_TYPE_AUDIO;
memset(pFrame->codec_frame.buffer,0,pFrame->codec_frame.size);
pRecogChannel->m_TimeToComplete -= CODEC_FRAME_TIME_BASE;
}
else
{
pRecogChannel->m_Streaming = false;
}
}
}
return TRUE;
}
RecogChannel* RecogSession::CreateRecogChannel()
{
mrcp_channel_t* pChannel;
mpf_termination_t* pTermination;
mpf_stream_capabilities_t* pCapabilities;
apr_pool_t* pool = GetSessionPool();
/* create channel */
RecogChannel* pRecogChannel = new RecogChannel;
/* create source stream capabilities */
pCapabilities = mpf_source_stream_capabilities_create(pool);
GetScenario()->InitCapabilities(pCapabilities);
static const mpf_audio_stream_vtable_t audio_stream_vtable =
{
NULL,
NULL,
NULL,
ReadStream,
NULL,
NULL,
NULL,
NULL
};
pTermination = CreateAudioTermination(
&audio_stream_vtable, /* virtual methods table of audio stream */
pCapabilities, /* capabilities of audio stream */
pRecogChannel); /* object to associate */
pChannel = CreateMrcpChannel(
MRCP_RECOGNIZER_RESOURCE, /* MRCP resource identifier */
pTermination, /* media termination, used to terminate audio stream */
NULL, /* RTP descriptor, used to create RTP termination (NULL by default) */
pRecogChannel); /* object to associate */
if(!pChannel)
{
delete pRecogChannel;
return NULL;
}
pRecogChannel->m_pMrcpChannel = pChannel;
return pRecogChannel;
}
bool RecogSession::OnChannelAdd(mrcp_channel_t* pMrcpChannel, mrcp_sig_status_code_e status)
{
if(!UmcSession::OnChannelAdd(pMrcpChannel,status))
return false;
if(status != MRCP_SIG_STATUS_CODE_SUCCESS)
{
/* error case, just terminate the demo */
return Terminate();
}
if(GetScenario()->IsDefineGrammarEnabled())
{
mrcp_message_t* pMrcpMessage = CreateDefineGrammarRequest(pMrcpChannel);
if(pMrcpMessage)
SendMrcpRequest(pMrcpChannel,pMrcpMessage);
return true;
}
return StartRecognition(pMrcpChannel);
}
bool RecogSession::OnMessageReceive(mrcp_channel_t* pMrcpChannel, mrcp_message_t* pMrcpMessage)
{
if(!UmcSession::OnMessageReceive(pMrcpChannel,pMrcpMessage))
return false;
RecogChannel* pRecogChannel = (RecogChannel*) mrcp_application_channel_object_get(pMrcpChannel);
if(pMrcpMessage->start_line.message_type == MRCP_MESSAGE_TYPE_RESPONSE)
{
/* received MRCP response */
if(pMrcpMessage->start_line.method_id == RECOGNIZER_DEFINE_GRAMMAR)
{
/* received the response to DEFINE-GRAMMAR request */
if(pMrcpMessage->start_line.request_state == MRCP_REQUEST_STATE_COMPLETE)
{
OnDefineGrammar(pMrcpChannel);
}
else
{
/* received unexpected response, terminate the session */
Terminate();
}
}
else if(pMrcpMessage->start_line.method_id == RECOGNIZER_RECOGNIZE)
{
/* received the response to RECOGNIZE request */
if(pMrcpMessage->start_line.request_state == MRCP_REQUEST_STATE_INPROGRESS)
{
RecogChannel* pRecogChannel = (RecogChannel*) mrcp_application_channel_object_get(pMrcpChannel);
if(pRecogChannel)
pRecogChannel->m_pRecogRequest = GetMrcpMessage();
/* start to stream the speech to recognize */
if(pRecogChannel)
pRecogChannel->m_Streaming = true;
}
else
{
/* received unexpected response, terminate the session */
Terminate();
}
}
else
{
/* received unexpected response */
}
}
else if(pMrcpMessage->start_line.message_type == MRCP_MESSAGE_TYPE_EVENT)
{
if(pMrcpMessage->start_line.method_id == RECOGNIZER_RECOGNITION_COMPLETE)
{
ParseNLSMLResult(pMrcpMessage);
if(pRecogChannel)
pRecogChannel->m_Streaming = false;
RecogChannel* pRecogChannel = (RecogChannel*) mrcp_application_channel_object_get(pMrcpChannel);
if(pRecogChannel)
pRecogChannel->m_pRecogRequest = NULL;
Terminate();
}
else if(pMrcpMessage->start_line.method_id == RECOGNIZER_START_OF_INPUT)
{
/* received start-of-input, do whatever you need here */
}
}
return true;
}
bool RecogSession::OnDefineGrammar(mrcp_channel_t* pMrcpChannel)
{
if(GetScenario()->IsRecognizeEnabled())
{
return StartRecognition(pMrcpChannel);
}
return Terminate();
}
bool RecogSession::StartRecognition(mrcp_channel_t* pMrcpChannel)
{
const mpf_codec_descriptor_t* pDescriptor = mrcp_application_source_descriptor_get(pMrcpChannel);
if(!pDescriptor)
{
apt_log(APT_LOG_MARK,APT_PRIO_WARNING,"Failed to Get Media Source Descriptor");
return Terminate();
}
RecogChannel* pRecogChannel = (RecogChannel*) mrcp_application_channel_object_get(pMrcpChannel);
/* create and send RECOGNIZE request */
mrcp_message_t* pMrcpMessage = CreateRecognizeRequest(pMrcpChannel);
if(pMrcpMessage)
{
SendMrcpRequest(pRecogChannel->m_pMrcpChannel,pMrcpMessage);
}
pRecogChannel->m_pAudioIn = GetAudioIn(pDescriptor,GetSessionPool());
if(!pRecogChannel->m_pAudioIn)
{
/* no audio input availble, set some estimated time to complete instead */
pRecogChannel->m_TimeToComplete = 5000; // 5 sec
}
return true;
}
mrcp_message_t* RecogSession::CreateDefineGrammarRequest(mrcp_channel_t* pMrcpChannel)
{
mrcp_message_t* pMrcpMessage = CreateMrcpMessage(pMrcpChannel,RECOGNIZER_DEFINE_GRAMMAR);
if(!pMrcpMessage)
return NULL;
const RecogScenario* pScenario = GetScenario();
mrcp_generic_header_t* pGenericHeader;
/* get/allocate generic header */
pGenericHeader = (mrcp_generic_header_t*) mrcp_generic_header_prepare(pMrcpMessage);
if(pGenericHeader)
{
/* set generic header fields */
if(pScenario->GetContentType())
{
apt_string_assign(&pGenericHeader->content_type,pScenario->GetContentType(),pMrcpMessage->pool);
mrcp_generic_header_property_add(pMrcpMessage,GENERIC_HEADER_CONTENT_TYPE);
}
apt_string_assign(&pGenericHeader->content_id,m_ContentId,pMrcpMessage->pool);
mrcp_generic_header_property_add(pMrcpMessage,GENERIC_HEADER_CONTENT_ID);
}
/* set message body */
if(pScenario->GetContent())
apt_string_assign_n(&pMrcpMessage->body,pScenario->GetContent(),pScenario->GetContentLength(),pMrcpMessage->pool);
return pMrcpMessage;
}
mrcp_message_t* RecogSession::CreateRecognizeRequest(mrcp_channel_t* pMrcpChannel)
{
mrcp_message_t* pMrcpMessage = CreateMrcpMessage(pMrcpChannel,RECOGNIZER_RECOGNIZE);
if(!pMrcpMessage)
return NULL;
const RecogScenario* pScenario = GetScenario();
mrcp_generic_header_t* pGenericHeader;
mrcp_recog_header_t* pRecogHeader;
/* get/allocate generic header */
pGenericHeader = (mrcp_generic_header_t*) mrcp_generic_header_prepare(pMrcpMessage);
if(pGenericHeader)
{
/* set generic header fields */
if(pScenario->IsDefineGrammarEnabled())
{
apt_string_assign(&pGenericHeader->content_type,"text/uri-list",pMrcpMessage->pool);
/* set message body */
const char* pContent = apr_pstrcat(pMrcpMessage->pool,"session:",m_ContentId,NULL);
apt_string_set(&pMrcpMessage->body,pContent);
}
else
{
apt_string_assign(&pGenericHeader->content_type,pScenario->GetContentType(),pMrcpMessage->pool);
/* set content-id */
apt_string_assign(&pGenericHeader->content_id,m_ContentId,pMrcpMessage->pool);
mrcp_generic_header_property_add(pMrcpMessage,GENERIC_HEADER_CONTENT_ID);
/* set message body */
if(pScenario->GetContent())
apt_string_assign(&pMrcpMessage->body,pScenario->GetContent(),pMrcpMessage->pool);
}
mrcp_generic_header_property_add(pMrcpMessage,GENERIC_HEADER_CONTENT_TYPE);
}
/* get/allocate recognizer header */
pRecogHeader = (mrcp_recog_header_t*) mrcp_resource_header_prepare(pMrcpMessage);
if(pRecogHeader)
{
/* set recognizer header fields */
if(pMrcpMessage->start_line.version == MRCP_VERSION_2)
{
pRecogHeader->cancel_if_queue = FALSE;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_CANCEL_IF_QUEUE);
}
pRecogHeader->no_input_timeout = 5000;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_NO_INPUT_TIMEOUT);
pRecogHeader->recognition_timeout = 10000;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_RECOGNITION_TIMEOUT);
pRecogHeader->start_input_timers = TRUE;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_START_INPUT_TIMERS);
pRecogHeader->confidence_threshold = 0.87f;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_CONFIDENCE_THRESHOLD);
pRecogHeader->save_waveform = TRUE;
mrcp_resource_header_property_add(pMrcpMessage,RECOGNIZER_HEADER_SAVE_WAVEFORM);
}
return pMrcpMessage;
}
bool RecogSession::ParseNLSMLResult(mrcp_message_t* pMrcpMessage)
{
nlsml_result_t *pResult = nlsml_result_parse(pMrcpMessage->body.buf, pMrcpMessage->body.length, pMrcpMessage->pool);
if(!pResult)
return false;
nlsml_result_trace(pResult, pMrcpMessage->pool);
return true;
}
FILE* RecogSession::GetAudioIn(const mpf_codec_descriptor_t* pDescriptor, apr_pool_t* pool) const
{
const char* pFileName = GetScenario()->GetAudioSource();
if(!pFileName)
{
pFileName = apr_psprintf(pool,"one-%dkHz.pcm",pDescriptor->sampling_rate/1000);
}
apt_dir_layout_t* pDirLayout = GetScenario()->GetDirLayout();
const char* pFilePath = apt_datadir_filepath_get(pDirLayout,pFileName,pool);
if(!pFilePath)
return NULL;
FILE* pFile = fopen(pFilePath,"rb");
if(!pFile)
{
apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Cannot Find [%s]",pFilePath);
return NULL;
}
apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Set [%s] as Speech Source",pFilePath);
return pFile;
}