Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added jsgf/fsg tags extraction #84

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 40 additions & 12 deletions include/pocketsphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
Expand All @@ -47,6 +47,7 @@
#include <sphinxbase/logmath.h>
#include <sphinxbase/fe.h>
#include <sphinxbase/feat.h>
#include <sphinxbase/glist.h>

/* PocketSphinx headers (not many of them!) */
#include <pocketsphinx_export.h>
Expand All @@ -61,6 +62,8 @@ extern "C" {
}
#endif

#define MAX_TAG_SIZE 50

/**
* PocketSphinx speech recognizer object.
*/
Expand Down Expand Up @@ -266,7 +269,7 @@ int ps_add_word(ps_decoder_t *ps,
char const *phones,
int update);

/**
/**
* Lookup for the word in the dictionary and return phone transcription
* for it.
*
Expand All @@ -278,7 +281,7 @@ int ps_add_word(ps_decoder_t *ps,
* allocated and must be freed by the user.
*/
POCKETSPHINX_EXPORT
char *ps_lookup_word(ps_decoder_t *ps,
char *ps_lookup_word(ps_decoder_t *ps,
const char *word);

/**
Expand Down Expand Up @@ -408,6 +411,28 @@ int ps_end_utt(ps_decoder_t *ps);
POCKETSPHINX_EXPORT
char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score);

/**
* Get glist containing word-tags pairs, as ps_hyptags_t.
*
* @param ps Decoder.
* @param out_best_score Output: path score corresponding to returned string.
* @param glist pointer in which to store word-tag pairs
* @return String containing best hypothesis at this point in
* decoding. NULL if no hypothesis is available.
*/

POCKETSPHINX_EXPORT
char const *ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score, glist_t *hyptagsP);

/**
* Get glist containing tags and word referenced.
*
* @param glist containing word-tags pairs.
* @param referenced word of the hypothesis.
* @param referenced tag of the hypothesis.
*/
void ps_get_word_and_tag(glist_t hyptags_list, char *word, char *tag);

/**
* Get posterior probability.
*
Expand All @@ -421,6 +446,9 @@ char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score);
* @param ps Decoder.
* @return Posterior probability of the best hypothesis.
*/

void ps_free_tags_struct(glist_t hyptags_list);

POCKETSPHINX_EXPORT
int32 ps_get_prob(ps_decoder_t *ps);

Expand Down Expand Up @@ -533,7 +561,7 @@ ps_nbest_t *ps_nbest(ps_decoder_t *ps);
* @return Updated N-best iterator, or NULL if no more hypotheses are
* available (iterator is freed ni this case).
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest);

/**
Expand Down Expand Up @@ -611,7 +639,7 @@ void ps_set_rawdata_size(ps_decoder_t *ps, int32 size);

/**
* Retrieves the raw data collected during utterance decoding.
*
*
* @param ps Decoder
* @param buffer preallocated buffer to store the data, must be within the limit
* set before
Expand Down
57 changes: 34 additions & 23 deletions include/ps_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
Expand All @@ -37,14 +37,14 @@
* provide interactive experience for the user.
*
* There are different possible search modes:
*
*
* <ul>
* <li>keyphrase - efficiently looks for keyphrase and ignores other speech. allows to configure detection threshold.</li>
* <li>grammar - recognizes speech according to JSGF grammar. Unlike keyphrase grammar search doesn't ignore words which are not in grammar but tries to recognize them.</li>
* <li>ngram/lm - recognizes natural speech with a language model.</li>
* <li>allphone - recognizes phonemes with a phonetic language model.</li>
* </ul>
*
*
* Each search has a name and can be referenced by a name, names are
* application-specific. The function ps_set_search allows to activate
* the search previously added by a name. Only single search can be
Expand All @@ -53,7 +53,7 @@
* To add the search one needs to point to the grammar/language model
* describing the search. The location of the grammar is specific to the
* application.
*
*
* The exact design of a searches depends on your application. For
* example, you might want to listen for activation keyphrase first and once
* keyphrase is recognized switch to ngram search to recognize actual
Expand All @@ -68,6 +68,7 @@
#ifndef __PS_SEARCH_H__
#define __PS_SEARCH_H__

#include <sphinxbase/jsgf.h>
#include <sphinxbase/fsg_model.h>
#include <sphinxbase/ngram_model.h>

Expand Down Expand Up @@ -100,7 +101,7 @@ int ps_set_search(ps_decoder_t *ps, const char *name);
*
* @see ps_set_search
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
const char* ps_get_search(ps_decoder_t *ps);

/**
Expand All @@ -117,7 +118,7 @@ POCKETSPHINX_EXPORT
int ps_unset_search(ps_decoder_t *ps, const char *name);

/**
* Returns iterator over current searches
* Returns iterator over current searches
*
* @see ps_set_search
*/
Expand All @@ -126,7 +127,7 @@ ps_search_iter_t *ps_search_iter(ps_decoder_t *ps);

/**
* Updates search iterator to point to the next position.
*
*
* This function automatically frees the iterator object upon reaching
* the final entry.
* @see ps_set_search
Expand All @@ -152,7 +153,7 @@ void ps_search_iter_free(ps_search_iter_t *itor);

/**
* Updates search iterator to point to the next position.
*
*
* This function automatically frees the iterator object upon reaching
* the final entry.
* @see ps_set_search
Expand All @@ -172,7 +173,7 @@ const char* ps_search_iter_val(ps_search_iter_t *itor);
* not attempt to free it manually. Use ngram_model_retain()
* if you wish to reuse it elsewhere.
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
ngram_model_t *ps_get_lm(ps_decoder_t *ps, const char *name);

/**
Expand All @@ -182,15 +183,15 @@ ngram_model_t *ps_get_lm(ps_decoder_t *ps, const char *name);
* using ps_set_search().
*
* @see ps_set_search.
*/
*/
POCKETSPHINX_EXPORT
int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm);

/**
* Adds new search based on N-gram language model.
*
* Convenient method to load N-gram model and create a search.
*
*
* @see ps_set_lm
*/
POCKETSPHINX_EXPORT
Expand Down Expand Up @@ -219,6 +220,16 @@ fsg_model_t *ps_get_fsg(ps_decoder_t *ps, const char *name);
POCKETSPHINX_EXPORT
int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg);

/**
* Adds new search using JSGF model.
*
* Convenient method to use already built jsgf_t structs.
*
* @see ps_set_fsg
*/
POCKETSPHINX_EXPORT
int ps_set_jsgf(ps_decoder_t *ps, const char *name, jsgf_t *jsgf);

/**
* Adds new search using JSGF model.
*
Expand Down Expand Up @@ -247,7 +258,7 @@ int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_stri
*
* @return The current keyphrase to spot
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
const char* ps_get_kws(ps_decoder_t *ps, const char *name);

/**
Expand All @@ -258,7 +269,7 @@ const char* ps_get_kws(ps_decoder_t *ps, const char *name);
*
* @see ps_set_search
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile);

/**
Expand All @@ -269,7 +280,7 @@ int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile);
*
* @see ps_set_search
*/
POCKETSPHINX_EXPORT
POCKETSPHINX_EXPORT
int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase);

/**
Expand All @@ -279,15 +290,15 @@ int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase);
* using ps_set_search().
*
* @see ps_set_search.
*/
*/
POCKETSPHINX_EXPORT
int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm);

/**
* Adds new search based on phone N-gram language model.
*
* Convenient method to load N-gram model and create a search.
*
*
* @see ps_set_allphone
*/
POCKETSPHINX_EXPORT
Expand Down
8 changes: 8 additions & 0 deletions src/libpocketsphinx/allphone_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ static ps_searchfuncs_t allphone_funcs = {
/* free: */ allphone_search_free,
/* lattice: */ allphone_search_lattice,
/* hyp: */ allphone_search_hyp,
/* hyptags_list: */ allphone_search_hyp_with_tags,
/* prob: */ allphone_search_prob,
/* seg_iter: */ allphone_search_seg_iter,
};
Expand Down Expand Up @@ -907,3 +908,10 @@ allphone_search_hyp(ps_search_t * search, int32 * out_score)
E_INFO("Hyp: %s\n", search->hyp_str);
return search->hyp_str;
}

glist_t
allphone_search_hyp_with_tags(ps_search_t * search, int32 * out_score)
{
E_WARN("Tags extraction for allphone_search not implemented\n");
return NULL;
}
5 changes: 5 additions & 0 deletions src/libpocketsphinx/allphone_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,9 @@ int allphone_search_finish(ps_search_t * search);
*/
char const *allphone_search_hyp(ps_search_t * search, int32 * out_score);

/**
* Get glist_t with word-tags pairs.
*/
glist_t allphone_search_hyp_with_tags(ps_search_t * search, int32 * out_score);

#endif /* __ALLPHONE_SEARCH_H__ */
Loading