2006-04-10 23:29:50 +00:00
|
|
|
/*
|
|
|
|
* Asterisk -- An open source telephony toolkit.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2006, Digium, Inc.
|
|
|
|
*
|
|
|
|
* Joshua Colp <jcolp@digium.com>
|
|
|
|
*
|
|
|
|
* See http://www.asterisk.org for more information about
|
|
|
|
* the Asterisk project. Please do not directly contact
|
|
|
|
* any of the maintainers of this project for assistance;
|
|
|
|
* the project provides a web site, mailing lists and IRC
|
|
|
|
* channels for your use.
|
|
|
|
*
|
|
|
|
* This program is free software, distributed under the terms of
|
|
|
|
* the GNU General Public License Version 2. See the LICENSE file
|
|
|
|
* at the top of the source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*! \file
|
|
|
|
* \brief Generic Speech Recognition API
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ASTERISK_SPEECH_H
|
|
|
|
#define _ASTERISK_SPEECH_H
|
|
|
|
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Speech structure flags */
|
2007-04-06 01:29:28 +00:00
|
|
|
enum ast_speech_flags {
|
2007-07-31 16:21:34 +00:00
|
|
|
AST_SPEECH_QUIET = (1 << 0), /* Quiet down output... they are talking */
|
|
|
|
AST_SPEECH_SPOKE = (1 << 1), /* Speaker spoke! */
|
|
|
|
AST_SPEECH_HAVE_RESULTS = (1 << 2), /* Results are present */
|
2007-04-06 01:29:28 +00:00
|
|
|
};
|
2006-04-10 23:29:50 +00:00
|
|
|
|
|
|
|
/* Speech structure states - in order of expected change */
|
2007-04-06 01:29:28 +00:00
|
|
|
enum ast_speech_states {
|
|
|
|
AST_SPEECH_STATE_NOT_READY = 0, /* Not ready to accept audio */
|
|
|
|
AST_SPEECH_STATE_READY, /* Accepting audio */
|
|
|
|
AST_SPEECH_STATE_WAIT, /* Wait for results to become available */
|
|
|
|
AST_SPEECH_STATE_DONE, /* Processing is all done */
|
|
|
|
};
|
2006-04-10 23:29:50 +00:00
|
|
|
|
2007-04-06 01:15:50 +00:00
|
|
|
enum ast_speech_results_type {
|
|
|
|
AST_SPEECH_RESULTS_TYPE_NORMAL = 0,
|
|
|
|
AST_SPEECH_RESULTS_TYPE_NBEST,
|
|
|
|
};
|
|
|
|
|
2006-04-10 23:29:50 +00:00
|
|
|
/* Speech structure */
|
|
|
|
struct ast_speech {
|
|
|
|
/*! Structure lock */
|
|
|
|
ast_mutex_t lock;
|
|
|
|
/*! Set flags */
|
After some study, thought, comparing, etc. I've backed out the previous universal mod to make ast_flags a 64 bit thing. Instead, I added a 64-bit version of ast_flags (ast_flags64), and 64-bit versions of the test-flag, set-flag, etc. macros, and an app_parse_options64 routine, and I use these in app_dial alone, to eliminate the 30-option limit it had grown to meet. There is room now for 32 more options and flags. I was heavily tempted to implement some of the other ideas that were presented, but this solution does not intro any new versions of dial, doesn't have a different API, has a minimal/zero impact on code outside of dial, and doesn't seriously (I hope) affect the code structure of dial. It's the best I can think of right now. My goal was NOT to rewrite dial. I leave that to a future, coordinated effort.
git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@75983 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2007-07-19 23:24:27 +00:00
|
|
|
unsigned int flags;
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Processing sound (used when engine is processing audio and getting results) */
|
|
|
|
char *processing_sound;
|
|
|
|
/*! Current state of structure */
|
|
|
|
int state;
|
|
|
|
/*! Expected write format */
|
media formats: re-architect handling of media for performance improvements
In the old times media formats were represented using a bit field. This was
fast but had a few limitations.
1. Asterisk was limited in how many formats it could handle.
2. Formats, being a bit field, could not include any attribute information.
A format was strictly its type, e.g., "this is ulaw".
This was changed in Asterisk 10 (see
https://wiki.asterisk.org/wiki/display/AST/Media+Architecture+Proposal for
notes on that work) which led to the creation of the ast_format structure.
This structure allowed Asterisk to handle attributes and bundle information
with a format.
Additionally, ast_format_cap was created to act as a container for multiple
formats that, together, formed the capability of some entity. Another
mechanism was added to allow logic to be registered which performed format
attribute negotiation. Everywhere throughout the codebase Asterisk was
changed to use this strategy.
Unfortunately, in software, there is no free lunch. These new capabilities
came at a cost.
Performance analysis and profiling showed that we spend an inordinate
amount of time comparing, copying, and generally manipulating formats and
their related structures. Basic prototyping has shown that a reasonably
large performance improvement could be made in this area. This patch is the
result of that project, which overhauled the media format architecture
and its usage in Asterisk to improve performance.
Generally, the new philosophy for handling formats is as follows:
* The ast_format structure is reference counted. This removed a large amount
of the memory allocations and copying that was done in prior versions.
* In order to prevent race conditions while keeping things performant, the
ast_format structure is immutable by convention and lock-free. Violate this
tenet at your peril!
* Because formats are reference counted, codecs are also reference counted.
The Asterisk core generally provides built-in codecs and caches the
ast_format structures created to represent them. Generally, to prevent
inordinate amounts of module reference bumping, codecs and formats can be
added at run-time but cannot be removed.
* All compatibility with the bit field representation of codecs/formats has
been moved to a compatibility API. The primary user of this representation
is chan_iax2, which must continue to maintain its bit-field usage of formats
for interoperability concerns.
* When a format is negotiated with attributes, or when a format cannot be
represented by one of the cached formats, a new format object is created or
cloned from an existing format. That format may have the same codec
underlying it, but is a different format than a version of the format with
different attributes or without attributes.
* While formats are reference counted objects, the reference count maintained
on the format should be manipulated with care. Formats are generally cached
and will persist for the lifetime of Asterisk and do not explicitly need
to have their lifetime modified. An exception to this is when the user of a
format does not know where the format came from *and* the user may outlive
the provider of the format. This occurs, for example, when a format is read
from a channel: the channel may have a format with attributes (hence,
non-cached) and the user of the format may last longer than the channel (if
the reference to the channel is released prior to the format's reference).
For more information on this work, see the API design notes:
https://wiki.asterisk.org/wiki/display/AST/Media+Format+Rewrite
Finally, this work was the culmination of a large number of developer's
efforts. Extra thanks goes to Corey Farrell, who took on a large amount of the
work in the Asterisk core, chan_sip, and was an invaluable resource in peer
reviews throughout this project.
There were a substantial number of patches contributed during this work; the
following issues/patch names simply reflect some of the work (and will cause
the release scripts to give attribution to the individuals who work on them).
Reviews:
https://reviewboard.asterisk.org/r/3814
https://reviewboard.asterisk.org/r/3808
https://reviewboard.asterisk.org/r/3805
https://reviewboard.asterisk.org/r/3803
https://reviewboard.asterisk.org/r/3801
https://reviewboard.asterisk.org/r/3798
https://reviewboard.asterisk.org/r/3800
https://reviewboard.asterisk.org/r/3794
https://reviewboard.asterisk.org/r/3793
https://reviewboard.asterisk.org/r/3792
https://reviewboard.asterisk.org/r/3791
https://reviewboard.asterisk.org/r/3790
https://reviewboard.asterisk.org/r/3789
https://reviewboard.asterisk.org/r/3788
https://reviewboard.asterisk.org/r/3787
https://reviewboard.asterisk.org/r/3786
https://reviewboard.asterisk.org/r/3784
https://reviewboard.asterisk.org/r/3783
https://reviewboard.asterisk.org/r/3778
https://reviewboard.asterisk.org/r/3774
https://reviewboard.asterisk.org/r/3775
https://reviewboard.asterisk.org/r/3772
https://reviewboard.asterisk.org/r/3761
https://reviewboard.asterisk.org/r/3754
https://reviewboard.asterisk.org/r/3753
https://reviewboard.asterisk.org/r/3751
https://reviewboard.asterisk.org/r/3750
https://reviewboard.asterisk.org/r/3748
https://reviewboard.asterisk.org/r/3747
https://reviewboard.asterisk.org/r/3746
https://reviewboard.asterisk.org/r/3742
https://reviewboard.asterisk.org/r/3740
https://reviewboard.asterisk.org/r/3739
https://reviewboard.asterisk.org/r/3738
https://reviewboard.asterisk.org/r/3737
https://reviewboard.asterisk.org/r/3736
https://reviewboard.asterisk.org/r/3734
https://reviewboard.asterisk.org/r/3722
https://reviewboard.asterisk.org/r/3713
https://reviewboard.asterisk.org/r/3703
https://reviewboard.asterisk.org/r/3689
https://reviewboard.asterisk.org/r/3687
https://reviewboard.asterisk.org/r/3674
https://reviewboard.asterisk.org/r/3671
https://reviewboard.asterisk.org/r/3667
https://reviewboard.asterisk.org/r/3665
https://reviewboard.asterisk.org/r/3625
https://reviewboard.asterisk.org/r/3602
https://reviewboard.asterisk.org/r/3519
https://reviewboard.asterisk.org/r/3518
https://reviewboard.asterisk.org/r/3516
https://reviewboard.asterisk.org/r/3515
https://reviewboard.asterisk.org/r/3512
https://reviewboard.asterisk.org/r/3506
https://reviewboard.asterisk.org/r/3413
https://reviewboard.asterisk.org/r/3410
https://reviewboard.asterisk.org/r/3387
https://reviewboard.asterisk.org/r/3388
https://reviewboard.asterisk.org/r/3389
https://reviewboard.asterisk.org/r/3390
https://reviewboard.asterisk.org/r/3321
https://reviewboard.asterisk.org/r/3320
https://reviewboard.asterisk.org/r/3319
https://reviewboard.asterisk.org/r/3318
https://reviewboard.asterisk.org/r/3266
https://reviewboard.asterisk.org/r/3265
https://reviewboard.asterisk.org/r/3234
https://reviewboard.asterisk.org/r/3178
ASTERISK-23114 #close
Reported by: mjordan
media_formats_translation_core.diff uploaded by kharwell (License 6464)
rb3506.diff uploaded by mjordan (License 6283)
media_format_app_file.diff uploaded by kharwell (License 6464)
misc-2.diff uploaded by file (License 5000)
chan_mild-3.diff uploaded by file (License 5000)
chan_obscure.diff uploaded by file (License 5000)
jingle.diff uploaded by file (License 5000)
funcs.diff uploaded by file (License 5000)
formats.diff uploaded by file (License 5000)
core.diff uploaded by file (License 5000)
bridges.diff uploaded by file (License 5000)
mf-codecs-2.diff uploaded by file (License 5000)
mf-app_fax.diff uploaded by file (License 5000)
mf-apps-3.diff uploaded by file (License 5000)
media-formats-3.diff uploaded by file (License 5000)
ASTERISK-23715
rb3713.patch uploaded by coreyfarrell (License 5909)
rb3689.patch uploaded by mjordan (License 6283)
ASTERISK-23957
rb3722.patch uploaded by mjordan (License 6283)
mf-attributes-3.diff uploaded by file (License 5000)
ASTERISK-23958
Tested by: jrose
rb3822.patch uploaded by coreyfarrell (License 5909)
rb3800.patch uploaded by jrose (License 6182)
chan_sip.diff uploaded by mjordan (License 6283)
rb3747.patch uploaded by jrose (License 6182)
ASTERISK-23959 #close
Tested by: sgriepentrog, mjordan, coreyfarrell
sip_cleanup.diff uploaded by opticron (License 6273)
chan_sip_caps.diff uploaded by mjordan (License 6283)
rb3751.patch uploaded by coreyfarrell (License 5909)
chan_sip-3.diff uploaded by file (License 5000)
ASTERISK-23960 #close
Tested by: opticron
direct_media.diff uploaded by opticron (License 6273)
pjsip-direct-media.diff uploaded by file (License 5000)
format_cap_remove.diff uploaded by opticron (License 6273)
media_format_fixes.diff uploaded by opticron (License 6273)
chan_pjsip-2.diff uploaded by file (License 5000)
ASTERISK-23966 #close
Tested by: rmudgett
rb3803.patch uploaded by rmudgetti (License 5621)
chan_dahdi.diff uploaded by file (License 5000)
ASTERISK-24064 #close
Tested by: coreyfarrell, mjordan, opticron, file, rmudgett, sgriepentrog, jrose
rb3814.patch uploaded by rmudgett (License 5621)
moh_cleanup.diff uploaded by opticron (License 6273)
bridge_leak.diff uploaded by opticron (License 6273)
translate.diff uploaded by file (License 5000)
rb3795.patch uploaded by rmudgett (License 5621)
tls_fix.diff uploaded by mjordan (License 6283)
fax-mf-fix-2.diff uploaded by file (License 5000)
rtp_transfer_stuff uploaded by mjordan (License 6283)
rb3787.patch uploaded by rmudgett (License 5621)
media-formats-explicit-translate-format-3.diff uploaded by file (License 5000)
format_cache_case_fix.diff uploaded by opticron (License 6273)
rb3774.patch uploaded by rmudgett (License 5621)
rb3775.patch uploaded by rmudgett (License 5621)
rtp_engine_fix.diff uploaded by opticron (License 6273)
rtp_crash_fix.diff uploaded by opticron (License 6273)
rb3753.patch uploaded by mjordan (License 6283)
rb3750.patch uploaded by mjordan (License 6283)
rb3748.patch uploaded by rmudgett (License 5621)
media_format_fixes.diff uploaded by opticron (License 6273)
rb3740.patch uploaded by mjordan (License 6283)
rb3739.patch uploaded by mjordan (License 6283)
rb3734.patch uploaded by mjordan (License 6283)
rb3689.patch uploaded by mjordan (License 6283)
rb3674.patch uploaded by coreyfarrell (License 5909)
rb3671.patch uploaded by coreyfarrell (License 5909)
rb3667.patch uploaded by coreyfarrell (License 5909)
rb3665.patch uploaded by mjordan (License 6283)
rb3625.patch uploaded by coreyfarrell (License 5909)
rb3602.patch uploaded by coreyfarrell (License 5909)
format_compatibility-2.diff uploaded by file (License 5000)
core.diff uploaded by file (License 5000)
git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@419044 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-07-20 22:06:33 +00:00
|
|
|
struct ast_format *format;
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Data for speech engine */
|
|
|
|
void *data;
|
2006-04-13 00:18:52 +00:00
|
|
|
/*! Cached results */
|
|
|
|
struct ast_speech_result *results;
|
2007-04-06 01:15:50 +00:00
|
|
|
/*! Type of results we want */
|
|
|
|
enum ast_speech_results_type results_type;
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Pointer to the engine used by this speech structure */
|
|
|
|
struct ast_speech_engine *engine;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Speech recognition engine structure */
|
|
|
|
struct ast_speech_engine {
|
|
|
|
/*! Name of speech engine */
|
|
|
|
char *name;
|
|
|
|
/*! Set up the speech structure within the engine */
|
2011-02-03 16:22:10 +00:00
|
|
|
int (*create)(struct ast_speech *speech, struct ast_format *format);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Destroy any data set on the speech structure by the engine */
|
|
|
|
int (*destroy)(struct ast_speech *speech);
|
|
|
|
/*! Load a local grammar on the speech structure */
|
2009-05-21 21:13:09 +00:00
|
|
|
int (*load)(struct ast_speech *speech, const char *grammar_name, const char *grammar);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Unload a local grammar */
|
2009-05-21 21:13:09 +00:00
|
|
|
int (*unload)(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Activate a loaded grammar */
|
2009-05-21 21:13:09 +00:00
|
|
|
int (*activate)(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Deactivate a loaded grammar */
|
2009-05-21 21:13:09 +00:00
|
|
|
int (*deactivate)(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Write audio to the speech engine */
|
|
|
|
int (*write)(struct ast_speech *speech, void *data, int len);
|
2007-08-13 14:55:17 +00:00
|
|
|
/*! Signal DTMF was received */
|
2007-08-13 21:59:15 +00:00
|
|
|
int (*dtmf)(struct ast_speech *speech, const char *dtmf);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Prepare engine to accept audio */
|
|
|
|
int (*start)(struct ast_speech *speech);
|
2006-07-18 16:22:26 +00:00
|
|
|
/*! Change an engine specific setting */
|
2009-05-21 21:13:09 +00:00
|
|
|
int (*change)(struct ast_speech *speech, const char *name, const char *value);
|
2012-10-01 12:29:04 +00:00
|
|
|
/*! Get an engine specific setting */
|
|
|
|
int (*get_setting)(struct ast_speech *speech, const char *name, char *buf, size_t len);
|
2007-04-06 01:15:50 +00:00
|
|
|
/*! Change the type of results we want back */
|
|
|
|
int (*change_results_type)(struct ast_speech *speech, enum ast_speech_results_type results_type);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! Try to get results */
|
|
|
|
struct ast_speech_result *(*get)(struct ast_speech *speech);
|
|
|
|
/*! Accepted formats by the engine */
|
2011-02-03 16:22:10 +00:00
|
|
|
struct ast_format_cap *formats;
|
2006-04-10 23:29:50 +00:00
|
|
|
AST_LIST_ENTRY(ast_speech_engine) list;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Result structure */
|
|
|
|
struct ast_speech_result {
|
|
|
|
/*! Recognized text */
|
|
|
|
char *text;
|
|
|
|
/*! Result score */
|
|
|
|
int score;
|
2007-04-06 01:15:50 +00:00
|
|
|
/*! NBest Alternative number if in NBest results type */
|
|
|
|
int nbest_num;
|
2006-04-13 00:18:52 +00:00
|
|
|
/*! Matched grammar */
|
|
|
|
char *grammar;
|
|
|
|
/*! List information */
|
2007-07-11 17:34:30 +00:00
|
|
|
AST_LIST_ENTRY(ast_speech_result) list;
|
2006-04-10 23:29:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/*! \brief Activate a grammar on a speech structure */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Deactivate a grammar on a speech structure */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Load a grammar on a speech structure (not globally) */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Unload a grammar */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Get speech recognition results */
|
|
|
|
struct ast_speech_result *ast_speech_results_get(struct ast_speech *speech);
|
|
|
|
/*! \brief Free a set of results */
|
|
|
|
int ast_speech_results_free(struct ast_speech_result *result);
|
|
|
|
/*! \brief Indicate to the speech engine that audio is now going to start being written */
|
|
|
|
void ast_speech_start(struct ast_speech *speech);
|
|
|
|
/*! \brief Create a new speech structure */
|
2011-02-03 16:22:10 +00:00
|
|
|
struct ast_speech *ast_speech_new(const char *engine_name, const struct ast_format_cap *formats);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Destroy a speech structure */
|
|
|
|
int ast_speech_destroy(struct ast_speech *speech);
|
|
|
|
/*! \brief Write audio to the speech engine */
|
|
|
|
int ast_speech_write(struct ast_speech *speech, void *data, int len);
|
2007-08-13 14:55:17 +00:00
|
|
|
/*! \brief Signal to the engine that DTMF was received */
|
2007-08-13 21:59:15 +00:00
|
|
|
int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf);
|
2006-07-18 16:22:26 +00:00
|
|
|
/*! \brief Change an engine specific attribute */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_change(struct ast_speech *speech, const char *name, const char *value);
|
2012-10-01 12:29:04 +00:00
|
|
|
/*! \brief Get an engine specific attribute */
|
|
|
|
int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len);
|
2007-04-06 01:15:50 +00:00
|
|
|
/*! \brief Change the type of results we want */
|
|
|
|
int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type);
|
2006-04-10 23:29:50 +00:00
|
|
|
/*! \brief Change state of a speech structure */
|
|
|
|
int ast_speech_change_state(struct ast_speech *speech, int state);
|
|
|
|
/*! \brief Register a speech recognition engine */
|
|
|
|
int ast_speech_register(struct ast_speech_engine *engine);
|
|
|
|
/*! \brief Unregister a speech recognition engine */
|
2009-05-21 21:13:09 +00:00
|
|
|
int ast_speech_unregister(const char *engine_name);
|
2006-04-10 23:29:50 +00:00
|
|
|
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* _ASTERISK_SPEECH_H */
|