/**
* @file adintool.c
*
*
* @brief 音声入力を記録/分割/送受信する汎用音声入力ツール
*
* このツールは Julius の音声ライブラリを用いて様々な音声の入力や出力を
* 行います.マイクデバイス・ファイル・adinnetネットワーククライアント・
* 標準入力から音声を読み込んで(必要であれば)音声区間検出を行い,
* その結果のデータを順次,ファイル・adinnetネットワークサーバー・標準出力
* などへ出力します.
*
* 応用例として,Julius/Julian へのネットワーク入力(入力=マイク,出力=adinnet)
* ,音声ファイルの音声区間抽出(入力=ファイル,出力=ファイル)などに
* 利用できます.
*
*
*
* @brief AD-in tool to record / split / send / receive speech data
*
* This tool is to handle speech input and output from/to various devices
* using libsetn library in Julius. It reads input from either of
* microphone, file, adinnet network client, or standard input, and
* perform speech detection based on level and zero cross (you can disable
* this), and output them to either of file, adinnet network server, or
* standard output.
*
* For example, you can send microphone input to Julius running on other host
* by specifying input to microphone and output to adinnet (Julius should
* be run with "-input adinnet"), or you can long recorded speech data by
* long silence by specifying both input and output to file and enabling
* segment detection.
*
*
* @author Akinobu LEE
* @date Wed Mar 23 20:43:32 2005
*
* $Revision: 1.16 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
#define MAXCONNECTION 10 ///< Maximum number of server connection
/* input */
static int file_counter = 0; ///< num of input files (for SP_RAWFILE)
static int sfreq; ///< Temporal storage of sample rate
/* output */
enum{SPOUT_FILE, SPOUT_STDOUT, SPOUT_ADINNET}; ///< value for speech_output
static int speech_output = SPOUT_FILE; ///< output device
static int total_speechlen; ///< total samples of recorded segments
static int speechlen; ///< samples of one recorded segments
static char *filename = NULL; ///< Output file name
static int fd = -1; ///< File descriptor for output
static FILE *fp = NULL; ///< File pointer for WAV output
static int size; ///< Output file size
static boolean use_raw = FALSE; ///< Output in RAW format if TRUE
static boolean continuous_segment = TRUE; ///< enable/disable successive output
static int startid = 0; ///< output file numbering variable
static int sid = 0; ///< current file ID (for SPOUT_FILE)
static char *outpath = NULL; ///< work space for output file name formatting
static int adinnet_port_in = ADINNET_PORT; ///< Input server port
static int adinnet_port[MAXCONNECTION]; ///< Output server ports
static char *adinnet_serv[MAXCONNECTION]; ///< Server name to send recorded data
static int sd[MAXCONNECTION]; ///< Output socket descriptors
static int adinnet_servnum = 0; ///< Number of server to connect
static int adinnet_portnum = 0; ///< Number of server to connect
static boolean writing_file = FALSE; ///< TRUE if writing to a file
static boolean stop_at_next = FALSE; ///< TRUE if need to stop at next input by server command. Will be set when PAUSE or TERMINATE command received while input.
/* switch */
static boolean pause_each = FALSE; ///< If set to TRUE, adintool will pause automatically at each input end and wait for resume command
static boolean loose_sync = FALSE; ///< If set to TRUE, adintool will do loose synchronization for resume among servers
static int rewind_msec = 0;
static int trigger_sample;
/**
* ヘルプを表示して終了する
* Print help and exit
*/
static boolean
opt_help(Jconf *jconf, char *arg[], int argnum)
{
fprintf(stderr, "adintool --- AD-in tool to record/split/send/receive speech data\n");
fprintf(stderr, "Usage: adintool [options] -in inputdev -out outputdev\n");
fprintf(stderr, "inputdev: read speech data from:\n");
#ifdef USE_MIC
fprintf(stderr, " mic microphone (default)\n");
#endif
#ifdef USE_NETAUDIO
fprintf(stderr, " netaudio DatLink (NetAudio) server\n");
#endif
fprintf(stderr, " file speech file (filename given from prompt)\n");
fprintf(stderr, " adinnet from adinnet client (I'm server)\n");
fprintf(stderr, " stdin standard tty input\n");
fprintf(stderr, " (other input can be specified by \"-input xxx\" as in Julius)\n");
fprintf(stderr, "outputdev: output data to:\n");
fprintf(stderr, " file speech file (\"foo.0000.wav\" - \"foo.N.wav\"\n");
fprintf(stderr, " adinnet to adinnet server (I'm client)\n");
fprintf(stderr, " stdout standard tty output\n");
fprintf(stderr, "I/O options:\n");
#ifdef USE_NETAUDIO
fprintf(stderr, " -NA (netaudio) NetAudio server host:unit\n");
#endif
fprintf(stderr, " -server host[,host,...] (adinnet-out) server hostnames\n");
fprintf(stderr, " -port num[,num,...] (adinnet-out) port numbers (%d)\n", ADINNET_PORT);
fprintf(stderr, " -inport num (adinnet-in) port number (%d)\n", ADINNET_PORT);
fprintf(stderr, " -filename foo (file-out) filename to record\n");
fprintf(stderr, " -startid id (file-out) recording start id (%04d)\n", startid);
fprintf(stderr, "Recording and Pause segmentation options:\n");
fprintf(stderr, " (input segmentation: on for file/mic/stdin, off for adinnet)\n");
fprintf(stderr, " [-nosegment] not segment input speech\n");
fprintf(stderr, " [-segment] force segmentation of input speech\n");
fprintf(stderr, " [-cutsilence] (same as \"-segment\")\n");
fprintf(stderr, " [-oneshot] record only the first segment\n");
fprintf(stderr, " [-freq frequency] sampling frequency in Hz (%d)\n", jconf->am_root->analysis.para_default.smp_freq);
fprintf(stderr, " [-48] 48000Hz recording with down sampling (16kHz only)\n");
fprintf(stderr, " [-lv unsignedshort] silence cut level threshold (%d)\n", jconf->detect.level_thres);
fprintf(stderr, " [-zc zerocrossnum] silence cut zerocross num (%d)\n", jconf->detect.zero_cross_num);
fprintf(stderr, " [-headmargin msec] head margin length (%d)\n", jconf->detect.head_margin_msec);
fprintf(stderr, " [-tailmargin msec] tail margin length (%d)\n", jconf->detect.tail_margin_msec);
fprintf(stderr, " [-chunksize sample] chunk size for processing (%d)\n", jconf->detect.chunk_size);
fprintf(stderr, " [-nostrip] do not strip zero samples\n");
fprintf(stderr, " [-zmean] remove DC by zero mean\n");
fprintf(stderr, " [-raw] output in RAW format\n");
fprintf(stderr, " [-autopause] automatically pause at each input end\n");
fprintf(stderr, " [-loosesync] loose sync of resume among servers\n");
fprintf(stderr, " [-rewind msec] rewind input if spoken while pause at resume\n");
fprintf(stderr, " [-C jconffile] load jconf to set parameters (ignore other options\n");
fprintf(stderr, "\nLibrary configuration: ");
confout_version(stderr);
confout_audio(stderr);
confout_process(stderr);
fprintf(stderr, "\n");
exit(1); /* exit here */
return TRUE;
}
static boolean
opt_in(Jconf *jconf, char *arg[], int argnum)
{
jconf->input.plugin_source = -1;
jconf->input.type = INPUT_WAVEFORM;
switch(arg[0][0]) {
case 'm':
#ifdef USE_MIC
jconf->input.speech_input = SP_MIC;
#else
fprintf(stderr,"Error: mic input not available\n");
return FALSE;
#endif
break;
case 'f':
jconf->input.speech_input = SP_RAWFILE;
jconf->detect.silence_cut = 1;
break;
case 's':
jconf->input.speech_input = SP_STDIN;
jconf->detect.silence_cut = 1;
break;
case 'a':
jconf->input.speech_input = SP_ADINNET;
break;
case 'n':
#ifdef USE_NETAUDIO
jconf->input.speech_input = SP_NETAUDIO;
#else
fprintf(stderr,"Error: netaudio input not available\n");
return FALSE;
#endif
break;
default:
fprintf(stderr,"Error: no such input device: %s\n", arg[0]);
return FALSE;
}
return TRUE;
}
static boolean
opt_out(Jconf *jconf, char *arg[], int argnum)
{
switch(arg[0][0]) {
case 'f':
speech_output = SPOUT_FILE;
break;
case 's':
speech_output = SPOUT_STDOUT;
break;
case 'a':
speech_output = SPOUT_ADINNET;
break;
default:
fprintf(stderr,"Error: no such output device: %s\n", arg[0]);
return FALSE;
}
return TRUE;
}
static boolean
opt_server(Jconf *jconf, char *arg[], int argnum)
{
char *p, *q;
if (speech_output == SPOUT_ADINNET) {
p = (char *)malloc(strlen(arg[0]) + 1);
strcpy(p, arg[0]);
for (q = strtok(p, ","); q; q = strtok(NULL, ",")) {
if (adinnet_servnum >= MAXCONNECTION) {
fprintf(stderr, "Error: too many servers (> %d): %s\n", MAXCONNECTION, arg[0]);
return FALSE;
}
adinnet_serv[adinnet_servnum] = (char *)malloc(strlen(q) + 1);
strcpy(adinnet_serv[adinnet_servnum], q);
adinnet_servnum++;
}
free(p);
} else {
fprintf(stderr, "Warning: server [%s] should be with adinnet\n", arg[0]);
return FALSE;
}
return TRUE;
}
static boolean
opt_NA(Jconf *jconf, char *arg[], int argnum)
{
#ifdef USE_NETAUDIO
if (jconf->input.speech_input == SP_NETAUDIO) {
jconf->input.netaudio_devname = arg[0];
} else {
fprintf(stderr, "Error: use \"-NA\" with \"-in netaudio\"\n");
return FALSE;
}
return TRUE;
#else /* ~USE_NETAUDIO */
fprintf(stderr, "Error: NetAudio(DatLink) not supported\n");
return FALSE;
#endif
}
static boolean
opt_inport(Jconf *jconf, char *arg[], int argnum)
{
adinnet_port_in = atoi(arg[0]);
return TRUE;
}
static boolean
opt_port(Jconf *jconf, char *arg[], int argnum)
{
char *p, *q;
p = (char *)malloc(strlen(arg[0]) + 1);
strcpy(p, arg[0]);
for (q = strtok(p, ","); q; q = strtok(NULL, ",")) {
if (adinnet_portnum >= MAXCONNECTION) {
fprintf(stderr, "Error: too many server ports (> %d): %s\n", MAXCONNECTION, arg[0]);
return FALSE;
}
adinnet_port[adinnet_portnum] = atoi(q);
adinnet_portnum++;
}
free(p);
return TRUE;
}
static boolean
opt_filename(Jconf *jconf, char *arg[], int argnum)
{
filename = arg[0];
return TRUE;
}
static boolean
opt_startid(Jconf *jconf, char *arg[], int argnum)
{
startid = atoi(arg[0]);
return TRUE;
}
static boolean
opt_freq(Jconf *jconf, char *arg[], int argnum)
{
jconf->amnow->analysis.para.smp_freq = atoi(arg[0]);
jconf->amnow->analysis.para.smp_period = freq2period(jconf->amnow->analysis.para.smp_freq);
return TRUE;
}
static boolean
opt_nosegment(Jconf *jconf, char *arg[], int argnum)
{
jconf->detect.silence_cut = 0;
return TRUE;
}
static boolean
opt_segment(Jconf *jconf, char *arg[], int argnum)
{
jconf->detect.silence_cut = 1;
return TRUE;
}
static boolean
opt_oneshot(Jconf *jconf, char *arg[], int argnum)
{
continuous_segment = FALSE;
return TRUE;
}
static boolean
opt_raw(Jconf *jconf, char *arg[], int argnum)
{
use_raw = TRUE;
return TRUE;
}
static boolean
opt_autopause(Jconf *jconf, char *arg[], int argnum)
{
pause_each = TRUE;
return TRUE;
}
static boolean
opt_loosesync(Jconf *jconf, char *arg[], int argnum)
{
loose_sync = TRUE;
return TRUE;
}
static boolean
opt_rewind(Jconf *jconf, char *arg[], int argnum)
{
rewind_msec = atoi(arg[0]);
return TRUE;
}
/**
*
* 確認のため入出力設定をテキスト出力する
*
*
*
* Output input/output configuration in text for a confirmation.
*
*
*/
void
put_status(Recog *recog)
{
int i;
Jconf *jconf = recog->jconf;
fprintf(stderr,"----\n");
fprintf(stderr, "Input stream:\n");
fprintf(stderr, "\t input type = ");
switch(jconf->input.type) {
case INPUT_WAVEFORM:
fprintf(stderr, "waveform\n");
break;
case INPUT_VECTOR:
fprintf(stderr, "feature vector sequence\n");
break;
}
fprintf(stderr, "\t input source = ");
if (jconf->input.plugin_source != -1) {
fprintf(stderr, "plugin\n");
} else if (jconf->input.speech_input == SP_RAWFILE) {
fprintf(stderr, "waveform file\n");
} else if (jconf->input.speech_input == SP_MFCFILE) {
fprintf(stderr, "feature vector file (HTK format)\n");
} else if (jconf->input.speech_input == SP_STDIN) {
fprintf(stderr, "standard input\n");
} else if (jconf->input.speech_input == SP_ADINNET) {
fprintf(stderr, "adinnet client\n");
#ifdef USE_NETAUDIO
} else if (jconf->input.speech_input == SP_NETAUDIO) {
char *p;
fprintf(stderr, "NetAudio server on ");
if (jconf->input.netaudio_devname != NULL) {
fprintf(stderr, "%s\n", jconf->input.netaudio_devname);
} else if ((p = getenv("AUDIO_DEVICE")) != NULL) {
fprintf(stderr, "%s\n", p);
} else {
fprintf(stderr, "local port\n");
}
#endif
} else if (jconf->input.speech_input == SP_MIC) {
fprintf(stderr, "microphone\n");
fprintf(stderr, "\t device API = ");
switch(jconf->input.device) {
case SP_INPUT_DEFAULT: fprintf(stderr, "default\n"); break;
case SP_INPUT_ALSA: fprintf(stderr, "alsa\n"); break;
case SP_INPUT_OSS: fprintf(stderr, "oss\n"); break;
case SP_INPUT_ESD: fprintf(stderr, "esd\n"); break;
case SP_INPUT_PULSEAUDIO: fprintf(stderr, "pulseaudio\n"); break;
}
}
fprintf(stderr,"Segmentation: ");
if (jconf->detect.silence_cut) {
if (continuous_segment) {
fprintf(stderr,"on, continuous\n");
} else {
fprintf(stderr,"on, only one snapshot\n");
}
if (recog->adin->down_sample) {
fprintf(stderr," SampleRate: 48000Hz -> %d Hz\n", sfreq);
} else {
fprintf(stderr," SampleRate: %d Hz\n", sfreq);
}
fprintf(stderr," Level: %d / 32767\n", jconf->detect.level_thres);
fprintf(stderr," ZeroCross: %d per sec.\n", jconf->detect.zero_cross_num);
fprintf(stderr," HeadMargin: %d msec.\n", jconf->detect.head_margin_msec);
fprintf(stderr," TailMargin: %d msec.\n", jconf->detect.tail_margin_msec);
} else {
fprintf(stderr,"OFF\n");
}
if (jconf->preprocess.strip_zero_sample) {
fprintf(stderr," ZeroFrames: drop\n");
} else {
fprintf(stderr," ZeroFrames: keep\n");
}
if (jconf->preprocess.use_zmean) {
fprintf(stderr," remove DC: on\n");
} else {
fprintf(stderr," remove DC: off\n");
}
if (pause_each) {
fprintf(stderr," Autopause: on\n");
} else {
fprintf(stderr," Autopause: off\n");
}
if (loose_sync) {
fprintf(stderr," LooseSync: on\n");
} else {
fprintf(stderr," LooseSync: off\n");
}
if (rewind_msec > 0) {
fprintf(stderr," Rewind: %d msec\n", rewind_msec);
} else {
fprintf(stderr," Rewind: no\n");
}
fprintf(stderr," Output to: ");
switch(speech_output) {
case SPOUT_FILE:
if (jconf->detect.silence_cut) {
if (continuous_segment) {
if (use_raw) {
fprintf(stderr,"%s.%04d.raw, %s.%04d.raw, ...\n", filename,startid, filename, startid+1);
} else {
fprintf(stderr,"%s.%04d.wav, %s.%04d.wav, ...\n", filename,startid, filename, startid+1);
}
} else {
fprintf(stderr,"%s\n", outpath);
}
} else {
fprintf(stderr,"%s (warning: inifinite recording: be care of disk space!)\n", outpath);
}
break;
case SPOUT_STDOUT:
fprintf(stderr,"STDOUT\n");
use_raw = TRUE;
break;
case SPOUT_ADINNET:
fprintf(stderr, "adinnet server");
for(i=0;i
* 読み込んだサンプル列を fd もしくは fp に記録
* するコールバック関数
*
* @param now [in] 録音されたサンプル列
* @param len [in] 長さ(サンプル数)
*
* @return エラー時 -1,処理成功時 0,処理成功+区間終端検出時 1 を返す.
*
*
* Callback handler to record the sample fragments to file pointed by
* the file descriptor "fd".
*
* @param now [in] recorded fragments of speech sample
* @param len [in] length of above in samples
*
* @return -1 on device error (require caller to exit and terminate input),
* 0 on success (allow caller to continue),
* 1 on succeeded but segmentation detected (require caller to exit but
* input will continue in the next call.
*
*/
static int
adin_callback_file(SP16 *now, int len, Recog *recog)
{
int count;
int start;
int w;
start = 0;
if (recog->jconf->input.speech_input == SP_MIC && speechlen == 0) {
/* this is first up-trigger */
if (rewind_msec > 0 && !recog->adin->is_valid_data) {
/* not spoken currently but has data to process at first trigger */
/* it means that there are old spoken segments */
/* disgard them */
printf("disgard already recorded %d samples\n", len);
return 0;
}
/* erase "<<>>" text on tty */
fprintf(stderr, "\r \r");
if (rewind_msec > 0) {
/* when -rewind value set larger than 0, the speech data spoken
while pause will be considered back to the specified msec.
*/
printf("buffered samples=%d\n", len);
w = rewind_msec * sfreq / 1000;
if (len > w) {
start = len - w;
len = w;
} else {
start = 0;
}
printf("will process from %d\n", start);
}
}
/* open files for recording at first trigger */
if (speech_output == SPOUT_FILE && speechlen == 0) {
if (continuous_segment) {
if (use_raw) {
sprintf(outpath, "%s.%04d.raw", filename, sid);
} else {
sprintf(outpath, "%s.%04d.wav", filename, sid);
}
}
fprintf(stderr,"[%s]", outpath);
if (access(outpath, F_OK) == 0) {
if (access(outpath, W_OK) == 0) {
fprintf(stderr, "(override)");
} else {
perror("adintool");
return(-1);
}
}
if (use_raw) {
if ((fd = open(outpath, O_CREAT | O_RDWR
#ifdef O_BINARY
| O_BINARY
#endif
, 0644)) == -1) {
perror("adintool");
return -1;
}
} else {
if ((fp = wrwav_open(outpath, sfreq)) == NULL) {
perror("adintool");
return -1;
}
}
writing_file = TRUE;
}
/* write recorded sample to file */
if (use_raw) {
count = wrsamp(fd, &(now[start]), len);
if (count < 0) {
perror("adinrec: cannot write");
return -1;
}
if (count < len * sizeof(SP16)) {
fprintf(stderr, "adinrec: cannot write more %d bytes\ncurrent length = %d\n", count, speechlen * sizeof(SP16));
return -1;
}
} else {
if (wrwav_data(fp, &(now[start]), len) == FALSE) {
fprintf(stderr, "adinrec: cannot write\n");
return -1;
}
}
/* accumulate sample num of this segment */
speechlen += len;
/* if input length reaches limit, rehash the ad-in buffer */
if (recog->jconf->input.speech_input == SP_MIC) {
if (speechlen > MAXSPEECHLEN - 16000) {
recog->adin->rehash = TRUE;
}
}
/* progress bar in dots */
fprintf(stderr, ".");
return(0);
}
/**
*
* 読み込んだサンプル列をソケットデスクリプタ "fd" 上のadinnetサーバに送信
* するコールバック関数
*
* @param now [in] 録音されたサンプル列
* @param len [in] 長さ(サンプル数)
*
* @return エラー時 -1,処理成功時 0,処理成功+区間終端検出時 1 を返す.
*
*
* Callback handler to record the sample fragments to adinnet server
* pointed by the socket descriptor "fd".
*
* @param now [in] recorded fragments of speech sample
* @param len [in] length of above in samples
*
* @return -1 on device error (require caller to exit and terminate input),
* 0 on success (allow caller to continue),
* 1 on succeeded but segmentation detected (require caller to exit but
* input will continue in the next call.
*
*/
static int
adin_callback_adinnet(SP16 *now, int len, Recog *recog)
{
int count;
int start, w;
int i;
start = 0;
if (recog->jconf->input.speech_input == SP_MIC && speechlen == 0) {
/* this is first up-trigger */
if (rewind_msec > 0 && !recog->adin->is_valid_data) {
/* not spoken currently but has data to process at first trigger */
/* it means that there are old spoken segments */
/* disgard them */
printf("disgard already recorded %d samples\n", len);
return 0;
}
/* erase "<<>>" text on tty */
fprintf(stderr, "\r \r");
if (rewind_msec > 0) {
/* when -rewind value set larger than 0, the speech data spoken
while pause will be considered back to the specified msec.
*/
printf("buffered samples=%d\n", len);
w = rewind_msec * sfreq / 1000;
if (len > w) {
start = len - w;
len = w;
} else {
start = 0;
}
printf("will process from %d\n", start);
}
}
#ifdef WORDS_BIGENDIAN
swap_sample_bytes(&(now[start]), len);
#endif
for (i=0;iadin->enable_thread) {
/* if input length reaches limit, rehash the ad-in buffer */
if (recog->adin->speechlen > MAXSPEECHLEN - 16000) {
recog->adin->rehash = TRUE;
fprintf(stderr, "+");
}
}
#endif
/* display progress in dots */
fprintf(stderr, ".");
return(0);
}
/**********************************************************************/
/**
*
* adinnetサーバにセグメント終了信号を送信する
*
*
*
* Send end-of-segment singal to adinnet server.
*
*
*/
static void
adin_send_end_of_segment()
{
char p;
int i;
for(i=0;i
* 音声取り込み中にサーバからの中断/再開コマンドを受け取るための
* コールバック関数
*
* @return コマンド無しか再開コマンドで録音続行の場合 0,
* エラー時 -2, 中断コマンドを受信して録音を中断すべきとき -1 を返す.
*
*
* Callback function for A/D-in processing to check pause/resume
* command from adinnet server.
*
* @return 0 when no command or RESUME command to tell caller to
* continue recording, -1 when received a PAUSE command and tell caller to
* stop recording, or -2 when error.
*
*/
static int
adinnet_check_command()
{
fd_set rfds;
struct timeval tv;
int status;
int cnt, ret;
char com;
int i, max_sd;
/* check if some commands are waiting in queue */
FD_ZERO(&rfds);
max_sd = 0;
for(i=0;i 0) { /* there are some data */
for (i=0;i\n", i+1);
stop_at_next = TRUE; /* mark to pause at the end of this input */
/* tell caller to stop recording */
return -1;
case '1': /* resume */
fprintf(stderr, "<#%d: RESUME - already running, ignored>\n", i+1);
/* we are already running, so just continue */
break;
case '2': /* terminate */
fprintf(stderr, "<#%d: TERMINATE>\n", i+1);
stop_at_next = TRUE; /* mark to pause at the end of this input */
/* tell caller to stop recording immediately */
return -2;
break;
default:
fprintf(stderr, "adintool: unknown command from #%d: %d\n", i+1,com);
unknown_command_counter++;
/* avoid infinite loop in that case... */
/* this may happen when connection is terminated from server side */
if (unknown_command_counter > 100) {
fprintf(stderr, "killed by a flood of unknown commands from server\n");
exit(1);
}
}
}
}
}
return 0; /* continue ad-in */
}
static int resume_count[MAXCONNECTION]; ///< Number of incoming resume commands for resume synchronization
/**
*
* サーバから再開コマンドを受信するまで待つ.再開コマンドを受信したら
* 終了する.
*
* @return エラー時 -1, 通常終了は 0 を返す.
*
*
* Wait for resume command from server.
*
* @return 0 on normal exit, or -1 on error.
*
*/
static int
adinnet_wait_command()
{
fd_set rfds;
int status;
int cnt, ret;
char com;
int i, count, max_sd;
fprintf(stderr, "<<< waiting RESUME >>>");
while(1) {
/* check for synchronized resume */
if (loose_sync) {
for(i=0;i= adinnet_servnum) { /* all count > 0 */
for(i=0;i>RESUME\n");
return 1; /* restart recording */
}
} else {
/* force same resume count among servers */
count = resume_count[0];
for(i=1;i= adinnet_servnum && count > 0) {
/* all resume counts are the same, actually resume */
for(i=0;i>RESUME\n");
return 1; /* restart recording */
}
}
/* not all host send me resume command */
FD_ZERO(&rfds);
max_sd = 0;
for(i=0;i\n", i+1);
for(i=0;i\n", i+1);
}
break;
case '1': /* resume */
/* do resume */
resume_count[i]++;
if (loose_sync) {
fprintf(stderr, "<#%d: RESUME>\n", i+1);
} else {
fprintf(stderr, "<#%d: RESUME @%d>\n", i+1, resume_count[i]);
}
break;
case '2': /* terminate */
/* already paused, so just wait for next command */
if (loose_sync) {
fprintf(stderr, "<#%d: TERMINATE - already paused, reset sync>\n", i+1);
for(i=0;i\n", i+1);
}
break;
default:
fprintf(stderr, "adintool: unknown command from #%d: %d\n", i+1, com);
unknown_command_counter++;
/* avoid infinite loop in that case... */
/* this may happen when connection is terminated from server side */
if (unknown_command_counter > 100) {
fprintf(stderr, "killed by a flood of unknown commands from server\n");
exit(1);
}
}
}
}
}
}
return 0;
}
/* close file */
static boolean
close_files()
{
size = sizeof(SP16) * speechlen;
if (writing_file) {
if (use_raw) {
if (close(fd) != 0) {
perror("adinrec");
return FALSE;
}
} else {
if (wrwav_close(fp) == FALSE) {
fprintf(stderr, "adinrec: failed to close file\n");
return FALSE;
}
}
printf("%s: %d samples (%.2f sec.) [%6d (%5.2fs) - %6d (%5.2fs)]\n",
outpath, speechlen,
(float)speechlen / (float)sfreq,
trigger_sample, (float)trigger_sample / (float)sfreq,
trigger_sample + speechlen, (float)(trigger_sample + speechlen) / (float)sfreq);
writing_file = FALSE;
}
return TRUE;
}
/* Interrupt signal handling */
static void
interrupt_record(int signum)
{
fprintf(stderr, "[Interrupt]");
if (speech_output == SPOUT_FILE) {
/* close files */
close_files();
}
/* terminate program */
exit(1);
}
static void
record_trigger_time(Recog *recog, void *data)
{
trigger_sample = recog->adin->last_trigger_sample;
}
/**
*
* メイン関数
*
* @param argc [in] 引数列の長さ
* @param argv [in] 引数列
*
* @return
* エラー時 1,通常終了時 0 を返す.
*
* Main function.
*
* @param argc [in] number of argument.
* @param argv [in] array of arguments.
*
* @return 1 on error, 0 on success.
*
*/
int
main(int argc, char *argv[])
{
Recog *recog;
Jconf *jconf;
int ret;
int i;
boolean is_continues;
/* create instance */
recog = j_recog_new();
jconf = j_jconf_new();
recog->jconf = jconf;
/********************/
/* setup parameters */
/********************/
/* register additional options */
j_add_option("-in", 1, 1, "input from", opt_in);
j_add_option("-out", 1, 1, "output to", opt_out);
j_add_option("-server", 1, 1, "hostname (-out adinnet)", opt_server);
j_add_option("-NA", 1, 1, "NetAudio server host:unit (-in netaudio)", opt_NA);
j_add_option("-port", 1, 1, "port number (-out adinnet)", opt_port);
j_add_option("-inport", 1, 1, "port number (-in adinnet)", opt_inport);
j_add_option("-filename", 1, 1, "(base) filename to record (-out file)", opt_filename);
j_add_option("-startid", 1, 1, "recording start id (-out file)", opt_startid);
j_add_option("-freq", 1, 1, "sampling frequency in Hz", opt_freq);
j_add_option("-nosegment", 0, 0, "not segment input speech, record all", opt_nosegment);
j_add_option("-segment", 0, 0, "force segment input speech", opt_segment);
j_add_option("-oneshot", 0, 0, "exit after the first input", opt_oneshot);
j_add_option("-raw", 0, 0, "save in raw (BE) format", opt_raw);
j_add_option("-autopause", 0, 0, "automatically pause at each input end", opt_autopause);
j_add_option("-loosesync", 0, 0, "loose sync of resume among servers", opt_loosesync);
j_add_option("-rewind", 1, 1, "rewind to the msec", opt_rewind);
j_add_option("-h", 0, 0, "display this help", opt_help);
j_add_option("-help", 0, 0, "display this help", opt_help);
j_add_option("--help", 0, 0, "display this help", opt_help);
/* when no argument, output help and exit */
if (argc <= 1) {
opt_help(jconf, NULL, 0);
return 0;
}
/* read arguments and set parameters */
if (j_config_load_args(jconf, argc, argv) == -1) {
fprintf(stderr, "Error reading arguments\n");
return -1;
}
/* check needed arguments */
if (speech_output == SPOUT_FILE && filename == NULL) {
fprintf(stderr, "Error: output filename not specified\n");
return(-1);
}
if (speech_output == SPOUT_ADINNET && adinnet_servnum < 1) {
fprintf(stderr, "Error: adinnet server name for output not specified\n");
return(-1);
}
if (jconf->input.speech_input == SP_ADINNET &&
speech_output != SPOUT_ADINNET &&
adinnet_servnum >= 1) {
fprintf(stderr, "Warning: you specified port num by -port, but it's for output\n");
fprintf(stderr, "Warning: you may specify input port by -inport instead.\n");
fprintf(stderr, "Warning: now the default value (%d) will be used\n", ADINNET_PORT);
}
#ifdef USE_NETAUDIO
if (jconf->input.speech_input == SP_NETAUDIO && jconf->input.netaudio_devname == NULL) {
fprintf(stderr, "Error: NetAudio server name not specified\n");
return(-1);
}
#endif
if (adinnet_portnum != adinnet_servnum) {
/* if only one server, use default */
if (adinnet_servnum == 1) {
adinnet_port[0] = ADINNET_PORT;
adinnet_portnum = 1;
} else {
fprintf(stderr, "Error: you should specify both server names and different port for each!\n");
fprintf(stderr, "\tserver:");
for(i=0;iam_root->analysis.para), &(jconf->am_root->analysis.para_default));
/* set some values */
jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq;
jconf->input.period = jconf->am_root->analysis.para.smp_period;
jconf->input.frameshift = jconf->am_root->analysis.para.frameshift;
jconf->input.framesize = jconf->am_root->analysis.para.framesize;
/* disable successive segmentation when no segmentation available */
if (!jconf->detect.silence_cut) continuous_segment = FALSE;
/* store sampling rate locally */
sfreq = jconf->am_root->analysis.para.smp_freq;
/********************/
/* setup for output */
/********************/
if (speech_output == SPOUT_FILE) {
/* allocate work area for output file name */
if (continuous_segment) {
outpath = (char *)mymalloc(strlen(filename) + 10);
} else {
if (use_raw) {
outpath = filename;
} else {
outpath = new_output_filename(filename, ".wav");
}
}
} else if (speech_output == SPOUT_ADINNET) {
/* connect to adinnet server(s) */
for(i=0;iinput.speech_input == SP_ADINNET) {
jconf->input.adinnet_port = adinnet_port_in;
}
if (j_adin_init(recog) == FALSE) {
fprintf(stderr, "Error in initializing adin device\n");
return -1;
}
if (rewind_msec > 0) {
/* allow adin module to keep triggered speech while pausing */
#ifdef HAVE_PTHREAD
if (recog->adin->enable_thread) {
recog->adin->ignore_speech_while_recog = FALSE;
}
#endif
}
/*********************/
/* add some callback */
/*********************/
callback_add(recog, CALLBACK_EVENT_SPEECH_START, record_trigger_time, NULL);
/**************************************/
/* display input/output configuration */
/**************************************/
put_status(recog);
/*******************/
/* begin recording */
/*******************/
if (continuous_segment) { /* reset parameter for successive output */
total_speechlen = 0;
sid = startid;
}
fprintf(stderr,"[start recording]\n");
if (jconf->input.speech_input == SP_RAWFILE) file_counter = 0;
/*********************/
/* input stream loop */
/*********************/
while(1) {
/* begin A/D input of a stream */
ret = j_open_stream(recog, NULL);
switch(ret) {
case 0: /* succeeded */
break;
case -1: /* error */
/* go on to next input */
continue;
case -2: /* end of recognition process */
switch(jconf->input.speech_input) {
case SP_RAWFILE:
fprintf(stderr, "%d files processed\n", file_counter);
break;
case SP_STDIN:
fprintf(stderr, "reached end of input on stdin\n");
break;
default:
fprintf(stderr, "failed to begin input stream\n");
}
/* exit recording */
goto record_end;
}
/*********************************/
/* do VAD and recording */
/*********************************/
do {
/* process one segment with segmentation */
/* for incoming speech input, speech detection and segmentation are
performed and, adin_callback_* is called for speech output for each segment block.
*/
/* adin_go() return when input segmented by long silence, or input stream reached to the end */
speechlen = 0;
stop_at_next = FALSE;
if (jconf->input.speech_input == SP_MIC) {
fprintf(stderr, "<<< please speak >>>");
}
if (speech_output == SPOUT_ADINNET) {
ret = adin_go(adin_callback_adinnet, adinnet_check_command, recog);
} else {
ret = adin_go(adin_callback_file, NULL, recog);
}
/* return value of adin_go:
-2: input terminated by pause command from adinnet server
-1: input device read error or callback process error
0: paused by input stream (end of file, etc..)
>0: detected end of speech segment:
by adin-cut, or by callback process
(or return value of ad_check (<0) (== not used in this program))
*/
/* if PAUSE or TERMINATE command has been received while input,
stop_at_next is TRUE here */
switch(ret) {
case -2: /* terminated by terminate command from server */
fprintf(stderr, "[terminated by server]\n");
break;
case -1: /* device read error or callback error */
fprintf(stderr, "[error]\n");
break;
case 0: /* reached to end of input */
fprintf(stderr, "[eof]\n");
break;
default: /* input segmented by silence or callback process */
fprintf(stderr, "[segmented]\n");
break;
}
if (ret == -1) {
/* error in input device or callback function, so terminate program here */
return 1;
}
/*************************/
/* one segment processed */
/*************************/
if (speech_output == SPOUT_FILE) {
/* close output files */
if (close_files() == FALSE) return 1;
} else if (speech_output == SPOUT_ADINNET) {
if (speechlen > 0) {
if (ret >= 0 || stop_at_next) { /* segmented by adin-cut or end of stream or server-side command */
/* send end-of-segment ack to client */
adin_send_end_of_segment();
}
/* output info */
printf("sent: %d samples (%.2f sec.) [%6d (%5.2fs) - %6d (%5.2fs)]\n",
speechlen, (float)speechlen / (float)sfreq,
trigger_sample, (float)trigger_sample / (float)sfreq,
trigger_sample + speechlen, (float)(trigger_sample + speechlen) / (float)sfreq);
}
}
/*************************************/
/* increment ID and total sample len */
/*************************************/
if (continuous_segment) {
total_speechlen += speechlen;
sid++;
}
/***************************************************/
/* with adinnet server, if terminated by */
/* server-side PAUSE command, wait for RESUME here */
/***************************************************/
if (pause_each) {
/* pause at each end */
//if (speech_output == SPOUT_ADINNET && speechlen > 0) {
if (speech_output == SPOUT_ADINNET) {
if (adinnet_wait_command() < 0) {
/* command error: terminate program here */
return 1;
}
}
} else {
if (speech_output == SPOUT_ADINNET && stop_at_next) {
if (adinnet_wait_command() < 0) {
/* command error: terminate program here */
return 1;
}
}
}
/* loop condition check */
is_continues = FALSE;
if (continuous_segment && (ret > 0 || ret == -2)) {
is_continues = TRUE;
}
} while (is_continues); /* to the next segment in this input stream */
/***********************/
/* end of input stream */
/***********************/
adin_end(recog->adin);
} /* to the next input stream (i.e. next input file in SP_RAWFILE) */
record_end:
if (speech_output == SPOUT_FILE) {
if (continuous_segment) {
printf("recorded total %d samples (%.2f sec.) segmented to %s.%04d - %s.%04d files\n", total_speechlen, (float)total_speechlen / (float)sfreq, filename, 0, filename, sid-1);
}
}
return 0;
}