/** * @file speech.h * * * @brief Miscellaneous definitions for speech input processing * * This file contains miscellaneous definitions for speech input processing. * Several limitation for input speech length is also defined here. * * Please refer to adin.h for speech capturing, mfcc.h for MFCC parameter * extraction, htk_param.h for storing the parameter vectors. * * * @brief 音声入出力処理に関する定義 * * このファイルには,音声の入出力に関する雑多な定義が収められています. * 一発話あたりの入力長に関する制限などが定義されています. * * 入力ソースに関する定義は adin.h,MFCC 特徴量抽出に関する定義は mfcc.h, * 特徴量パラメータについては htk_param.h を参照して下さい. * * * @author Akinobu LEE * @date Sat Feb 12 11:16:41 2005 * * $Revision: 1.5 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* speech input limitation */ #ifndef __SENT_SPEECH__ #define __SENT_SPEECH__ #include /** * @brief Maximum number of words in an input. * * This value defines limitation of word length in one utterance input. * If the number of words exceeds this value, Julius produces error. * So you have to set large value enough. * */ #define MAXSEQNUM 150 /** * @brief Maximum length of an input in samples * * This value defines limitation of speech input length in one utterance input. * If the length of an input exceeds this value, Julius stop the input * at that point and recognize it, disgarding the rest until the end of speech * (long silence) comes. * * The default value is 320000, which means you can give Julius an input of * at most 20 secons in 16kHz sampling. Setting smaller value saves * memory usage. * */ #define MAXSPEECHLEN 320000 /** * @brief Maximum length of input delay in seconds * * This value defines maximum delay on live speech recognition with slow * machines. If an input delays over this sample, the overflowed samples * will be dropped. This value is used on callback-based ad-in, * namely on portaudio interface. * * The default value is 8 seconds. Setting smaller value saves * memory usage but risk of overflow grows on slow machines * */ #define INPUT_DELAY_SEC 8 /** * @brief Expansion period in frames for output probability cache * * When recognition, the 1st recognition pass stores all the output * probabilities of %HMM states for every incoming input frame, to speed up the * re-computation of acoustic likelihoods in the 2nd pass. * In live input mode, this output probability cache will be * re-allocated dynamically as the input becomes longer. * * This value specifies the re-allocation period in frames. The probability * cache are will be expanded as the input proceeds this frame. * * Smaller value may improve memory efficiency, but Too small value may * result in the overhead of memory re-allocation and slow down the * recognition. * */ #define OUTPROB_CACHE_PERIOD 100 #ifdef __cplusplus extern "C" { #endif /// Macro to convert smpPeriod (100nsec unit) to frequency (Hz) #define period2freq(A) (10000000.0 / (float)(A)) /// Macro to convert sampling frequency (Hz) to smpPeriod (100nsec unit) #define freq2period(A) (10000000.0 / (float)(A)) /* for anlz/wrsamp.c */ int wrsamp(int fd, SP16 *buf, int len); /* for anlz/wrwav.c */ FILE *wrwav_open(char *filename, int sfreq); boolean wrwav_data(FILE *fp, SP16 *buf, int len); boolean wrwav_close(FILE *fp); /* for an;z/strip.c */ int strip_zero(SP16 a[], int len); #ifdef __cplusplus } #endif #endif /* __SENT_SPEECH__ */