/** * @file adintool.c * * * @brief 音声入力を記録/分割/送受信する汎用音声入力ツール * * このツールは Julius の音声ライブラリを用いて様々な音声の入力や出力を * 行います．マイクデバイス・ファイル・adinnetネットワーククライアント・ * 標準入力から音声を読み込んで(必要であれば)音声区間検出を行い， * その結果のデータを順次，ファイル・adinnetネットワークサーバー・標準出力 * などへ出力します． * * 応用例として，Julius/Julian へのネットワーク入力(入力=マイク,出力=adinnet) * ，音声ファイルの音声区間抽出(入力=ファイル,出力=ファイル)などに * 利用できます． * * * * @brief AD-in tool to record / split / send / receive speech data * * This tool is to handle speech input and output from/to various devices * using libsetn library in Julius. It reads input from either of * microphone, file, adinnet network client, or standard input, and * perform speech detection based on level and zero cross (you can disable * this), and output them to either of file, adinnet network server, or * standard output. * * For example, you can send microphone input to Julius running on other host * by specifying input to microphone and output to adinnet (Julius should * be run with "-input adinnet"), or you can long recorded speech data by * long silence by specifying both input and output to file and enabling * segment detection. * * * @author Akinobu LEE * @date Wed Mar 23 20:43:32 2005 * * $Revision: 1.16 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #define MAXCONNECTION 10 ///< Maximum number of server connection /* input */ static int file_counter = 0; ///< num of input files (for SP_RAWFILE) static int sfreq; ///< Temporal storage of sample rate /* output */ enum{SPOUT_FILE, SPOUT_STDOUT, SPOUT_ADINNET}; ///< value for speech_output static int speech_output = SPOUT_FILE; ///< output device static int total_speechlen; ///< total samples of recorded segments static int speechlen; ///< samples of one recorded segments static char *filename = NULL; ///< Output file name static int fd = -1; ///< File descriptor for output static FILE *fp = NULL; ///< File pointer for WAV output static int size; ///< Output file size static boolean use_raw = FALSE; ///< Output in RAW format if TRUE static boolean continuous_segment = TRUE; ///< enable/disable successive output static int startid = 0; ///< output file numbering variable static int sid = 0; ///< current file ID (for SPOUT_FILE) static char *outpath = NULL; ///< work space for output file name formatting static int adinnet_port_in = ADINNET_PORT; ///< Input server port static int adinnet_port[MAXCONNECTION]; ///< Output server ports static char *adinnet_serv[MAXCONNECTION]; ///< Server name to send recorded data static int sd[MAXCONNECTION]; ///< Output socket descriptors static int adinnet_servnum = 0; ///< Number of server to connect static int adinnet_portnum = 0; ///< Number of server to connect static boolean writing_file = FALSE; ///< TRUE if writing to a file static boolean stop_at_next = FALSE; ///< TRUE if need to stop at next input by server command. Will be set when PAUSE or TERMINATE command received while input. /* switch */ static boolean pause_each = FALSE; ///< If set to TRUE, adintool will pause automatically at each input end and wait for resume command static boolean loose_sync = FALSE; ///< If set to TRUE, adintool will do loose synchronization for resume among servers static int rewind_msec = 0; static int trigger_sample; /** * ヘルプを表示して終了する * Print help and exit */ static boolean opt_help(Jconf *jconf, char *arg[], int argnum) { fprintf(stderr, "adintool --- AD-in tool to record/split/send/receive speech data\n"); fprintf(stderr, "Usage: adintool [options] -in inputdev -out outputdev\n"); fprintf(stderr, "inputdev: read speech data from:\n"); #ifdef USE_MIC fprintf(stderr, " mic microphone (default)\n"); #endif #ifdef USE_NETAUDIO fprintf(stderr, " netaudio DatLink (NetAudio) server\n"); #endif fprintf(stderr, " file speech file (filename given from prompt)\n"); fprintf(stderr, " adinnet from adinnet client (I'm server)\n"); fprintf(stderr, " stdin standard tty input\n"); fprintf(stderr, " (other input can be specified by \"-input xxx\" as in Julius)\n"); fprintf(stderr, "outputdev: output data to:\n"); fprintf(stderr, " file speech file (\"foo.0000.wav\" - \"foo.N.wav\"\n"); fprintf(stderr, " adinnet to adinnet server (I'm client)\n"); fprintf(stderr, " stdout standard tty output\n"); fprintf(stderr, "I/O options:\n"); #ifdef USE_NETAUDIO fprintf(stderr, " -NA (netaudio) NetAudio server host:unit\n"); #endif fprintf(stderr, " -server host[,host,...] (adinnet-out) server hostnames\n"); fprintf(stderr, " -port num[,num,...] (adinnet-out) port numbers (%d)\n", ADINNET_PORT); fprintf(stderr, " -inport num (adinnet-in) port number (%d)\n", ADINNET_PORT); fprintf(stderr, " -filename foo (file-out) filename to record\n"); fprintf(stderr, " -startid id (file-out) recording start id (%04d)\n", startid); fprintf(stderr, "Recording and Pause segmentation options:\n"); fprintf(stderr, " (input segmentation: on for file/mic/stdin, off for adinnet)\n"); fprintf(stderr, " [-nosegment] not segment input speech\n"); fprintf(stderr, " [-segment] force segmentation of input speech\n"); fprintf(stderr, " [-cutsilence] (same as \"-segment\")\n"); fprintf(stderr, " [-oneshot] record only the first segment\n"); fprintf(stderr, " [-freq frequency] sampling frequency in Hz (%d)\n", jconf->am_root->analysis.para_default.smp_freq); fprintf(stderr, " [-48] 48000Hz recording with down sampling (16kHz only)\n"); fprintf(stderr, " [-lv unsignedshort] silence cut level threshold (%d)\n", jconf->detect.level_thres); fprintf(stderr, " [-zc zerocrossnum] silence cut zerocross num (%d)\n", jconf->detect.zero_cross_num); fprintf(stderr, " [-headmargin msec] head margin length (%d)\n", jconf->detect.head_margin_msec); fprintf(stderr, " [-tailmargin msec] tail margin length (%d)\n", jconf->detect.tail_margin_msec); fprintf(stderr, " [-chunksize sample] chunk size for processing (%d)\n", jconf->detect.chunk_size); fprintf(stderr, " [-nostrip] do not strip zero samples\n"); fprintf(stderr, " [-zmean] remove DC by zero mean\n"); fprintf(stderr, " [-raw] output in RAW format\n"); fprintf(stderr, " [-autopause] automatically pause at each input end\n"); fprintf(stderr, " [-loosesync] loose sync of resume among servers\n"); fprintf(stderr, " [-rewind msec] rewind input if spoken while pause at resume\n"); fprintf(stderr, " [-C jconffile] load jconf to set parameters (ignore other options\n"); fprintf(stderr, "\nLibrary configuration: "); confout_version(stderr); confout_audio(stderr); confout_process(stderr); fprintf(stderr, "\n"); exit(1); /* exit here */ return TRUE; } static boolean opt_in(Jconf *jconf, char *arg[], int argnum) { jconf->input.plugin_source = -1; jconf->input.type = INPUT_WAVEFORM; switch(arg[0][0]) { case 'm': #ifdef USE_MIC jconf->input.speech_input = SP_MIC; #else fprintf(stderr,"Error: mic input not available\n"); return FALSE; #endif break; case 'f': jconf->input.speech_input = SP_RAWFILE; jconf->detect.silence_cut = 1; break; case 's': jconf->input.speech_input = SP_STDIN; jconf->detect.silence_cut = 1; break; case 'a': jconf->input.speech_input = SP_ADINNET; break; case 'n': #ifdef USE_NETAUDIO jconf->input.speech_input = SP_NETAUDIO; #else fprintf(stderr,"Error: netaudio input not available\n"); return FALSE; #endif break; default: fprintf(stderr,"Error: no such input device: %s\n", arg[0]); return FALSE; } return TRUE; } static boolean opt_out(Jconf *jconf, char *arg[], int argnum) { switch(arg[0][0]) { case 'f': speech_output = SPOUT_FILE; break; case 's': speech_output = SPOUT_STDOUT; break; case 'a': speech_output = SPOUT_ADINNET; break; default: fprintf(stderr,"Error: no such output device: %s\n", arg[0]); return FALSE; } return TRUE; } static boolean opt_server(Jconf *jconf, char *arg[], int argnum) { char *p, *q; if (speech_output == SPOUT_ADINNET) { p = (char *)malloc(strlen(arg[0]) + 1); strcpy(p, arg[0]); for (q = strtok(p, ","); q; q = strtok(NULL, ",")) { if (adinnet_servnum >= MAXCONNECTION) { fprintf(stderr, "Error: too many servers (> %d): %s\n", MAXCONNECTION, arg[0]); return FALSE; } adinnet_serv[adinnet_servnum] = (char *)malloc(strlen(q) + 1); strcpy(adinnet_serv[adinnet_servnum], q); adinnet_servnum++; } free(p); } else { fprintf(stderr, "Warning: server [%s] should be with adinnet\n", arg[0]); return FALSE; } return TRUE; } static boolean opt_NA(Jconf *jconf, char *arg[], int argnum) { #ifdef USE_NETAUDIO if (jconf->input.speech_input == SP_NETAUDIO) { jconf->input.netaudio_devname = arg[0]; } else { fprintf(stderr, "Error: use \"-NA\" with \"-in netaudio\"\n"); return FALSE; } return TRUE; #else /* ~USE_NETAUDIO */ fprintf(stderr, "Error: NetAudio(DatLink) not supported\n"); return FALSE; #endif } static boolean opt_inport(Jconf *jconf, char *arg[], int argnum) { adinnet_port_in = atoi(arg[0]); return TRUE; } static boolean opt_port(Jconf *jconf, char *arg[], int argnum) { char *p, *q; p = (char *)malloc(strlen(arg[0]) + 1); strcpy(p, arg[0]); for (q = strtok(p, ","); q; q = strtok(NULL, ",")) { if (adinnet_portnum >= MAXCONNECTION) { fprintf(stderr, "Error: too many server ports (> %d): %s\n", MAXCONNECTION, arg[0]); return FALSE; } adinnet_port[adinnet_portnum] = atoi(q); adinnet_portnum++; } free(p); return TRUE; } static boolean opt_filename(Jconf *jconf, char *arg[], int argnum) { filename = arg[0]; return TRUE; } static boolean opt_startid(Jconf *jconf, char *arg[], int argnum) { startid = atoi(arg[0]); return TRUE; } static boolean opt_freq(Jconf *jconf, char *arg[], int argnum) { jconf->amnow->analysis.para.smp_freq = atoi(arg[0]); jconf->amnow->analysis.para.smp_period = freq2period(jconf->amnow->analysis.para.smp_freq); return TRUE; } static boolean opt_nosegment(Jconf *jconf, char *arg[], int argnum) { jconf->detect.silence_cut = 0; return TRUE; } static boolean opt_segment(Jconf *jconf, char *arg[], int argnum) { jconf->detect.silence_cut = 1; return TRUE; } static boolean opt_oneshot(Jconf *jconf, char *arg[], int argnum) { continuous_segment = FALSE; return TRUE; } static boolean opt_raw(Jconf *jconf, char *arg[], int argnum) { use_raw = TRUE; return TRUE; } static boolean opt_autopause(Jconf *jconf, char *arg[], int argnum) { pause_each = TRUE; return TRUE; } static boolean opt_loosesync(Jconf *jconf, char *arg[], int argnum) { loose_sync = TRUE; return TRUE; } static boolean opt_rewind(Jconf *jconf, char *arg[], int argnum) { rewind_msec = atoi(arg[0]); return TRUE; } /** * * 確認のため入出力設定をテキスト出力する * * * * Output input/output configuration in text for a confirmation. * * */ void put_status(Recog *recog) { int i; Jconf *jconf = recog->jconf; fprintf(stderr,"----\n"); fprintf(stderr, "Input stream:\n"); fprintf(stderr, "\t input type = "); switch(jconf->input.type) { case INPUT_WAVEFORM: fprintf(stderr, "waveform\n"); break; case INPUT_VECTOR: fprintf(stderr, "feature vector sequence\n"); break; } fprintf(stderr, "\t input source = "); if (jconf->input.plugin_source != -1) { fprintf(stderr, "plugin\n"); } else if (jconf->input.speech_input == SP_RAWFILE) { fprintf(stderr, "waveform file\n"); } else if (jconf->input.speech_input == SP_MFCFILE) { fprintf(stderr, "feature vector file (HTK format)\n"); } else if (jconf->input.speech_input == SP_STDIN) { fprintf(stderr, "standard input\n"); } else if (jconf->input.speech_input == SP_ADINNET) { fprintf(stderr, "adinnet client\n"); #ifdef USE_NETAUDIO } else if (jconf->input.speech_input == SP_NETAUDIO) { char *p; fprintf(stderr, "NetAudio server on "); if (jconf->input.netaudio_devname != NULL) { fprintf(stderr, "%s\n", jconf->input.netaudio_devname); } else if ((p = getenv("AUDIO_DEVICE")) != NULL) { fprintf(stderr, "%s\n", p); } else { fprintf(stderr, "local port\n"); } #endif } else if (jconf->input.speech_input == SP_MIC) { fprintf(stderr, "microphone\n"); fprintf(stderr, "\t device API = "); switch(jconf->input.device) { case SP_INPUT_DEFAULT: fprintf(stderr, "default\n"); break; case SP_INPUT_ALSA: fprintf(stderr, "alsa\n"); break; case SP_INPUT_OSS: fprintf(stderr, "oss\n"); break; case SP_INPUT_ESD: fprintf(stderr, "esd\n"); break; case SP_INPUT_PULSEAUDIO: fprintf(stderr, "pulseaudio\n"); break; } } fprintf(stderr,"Segmentation: "); if (jconf->detect.silence_cut) { if (continuous_segment) { fprintf(stderr,"on, continuous\n"); } else { fprintf(stderr,"on, only one snapshot\n"); } if (recog->adin->down_sample) { fprintf(stderr," SampleRate: 48000Hz -> %d Hz\n", sfreq); } else { fprintf(stderr," SampleRate: %d Hz\n", sfreq); } fprintf(stderr," Level: %d / 32767\n", jconf->detect.level_thres); fprintf(stderr," ZeroCross: %d per sec.\n", jconf->detect.zero_cross_num); fprintf(stderr," HeadMargin: %d msec.\n", jconf->detect.head_margin_msec); fprintf(stderr," TailMargin: %d msec.\n", jconf->detect.tail_margin_msec); } else { fprintf(stderr,"OFF\n"); } if (jconf->preprocess.strip_zero_sample) { fprintf(stderr," ZeroFrames: drop\n"); } else { fprintf(stderr," ZeroFrames: keep\n"); } if (jconf->preprocess.use_zmean) { fprintf(stderr," remove DC: on\n"); } else { fprintf(stderr," remove DC: off\n"); } if (pause_each) { fprintf(stderr," Autopause: on\n"); } else { fprintf(stderr," Autopause: off\n"); } if (loose_sync) { fprintf(stderr," LooseSync: on\n"); } else { fprintf(stderr," LooseSync: off\n"); } if (rewind_msec > 0) { fprintf(stderr," Rewind: %d msec\n", rewind_msec); } else { fprintf(stderr," Rewind: no\n"); } fprintf(stderr," Output to: "); switch(speech_output) { case SPOUT_FILE: if (jconf->detect.silence_cut) { if (continuous_segment) { if (use_raw) { fprintf(stderr,"%s.%04d.raw, %s.%04d.raw, ...\n", filename,startid, filename, startid+1); } else { fprintf(stderr,"%s.%04d.wav, %s.%04d.wav, ...\n", filename,startid, filename, startid+1); } } else { fprintf(stderr,"%s\n", outpath); } } else { fprintf(stderr,"%s (warning: inifinite recording: be care of disk space!)\n", outpath); } break; case SPOUT_STDOUT: fprintf(stderr,"STDOUT\n"); use_raw = TRUE; break; case SPOUT_ADINNET: fprintf(stderr, "adinnet server"); for(i=0;i * 読み込んだサンプル列を fd もしくは fp に記録 * するコールバック関数 * * @param now [in] 録音されたサンプル列 * @param len [in] 長さ（サンプル数） * * @return エラー時 -1，処理成功時 0，処理成功＋区間終端検出時 1 を返す． * * * Callback handler to record the sample fragments to file pointed by * the file descriptor "fd". * * @param now [in] recorded fragments of speech sample * @param len [in] length of above in samples * * @return -1 on device error (require caller to exit and terminate input), * 0 on success (allow caller to continue), * 1 on succeeded but segmentation detected (require caller to exit but * input will continue in the next call. * */ static int adin_callback_file(SP16 *now, int len, Recog *recog) { int count; int start; int w; start = 0; if (recog->jconf->input.speech_input == SP_MIC && speechlen == 0) { /* this is first up-trigger */ if (rewind_msec > 0 && !recog->adin->is_valid_data) { /* not spoken currently but has data to process at first trigger */ /* it means that there are old spoken segments */ /* disgard them */ printf("disgard already recorded %d samples\n", len); return 0; } /* erase "<<>>" text on tty */ fprintf(stderr, "\r \r"); if (rewind_msec > 0) { /* when -rewind value set larger than 0, the speech data spoken while pause will be considered back to the specified msec. */ printf("buffered samples=%d\n", len); w = rewind_msec * sfreq / 1000; if (len > w) { start = len - w; len = w; } else { start = 0; } printf("will process from %d\n", start); } } /* open files for recording at first trigger */ if (speech_output == SPOUT_FILE && speechlen == 0) { if (continuous_segment) { if (use_raw) { sprintf(outpath, "%s.%04d.raw", filename, sid); } else { sprintf(outpath, "%s.%04d.wav", filename, sid); } } fprintf(stderr,"[%s]", outpath); if (access(outpath, F_OK) == 0) { if (access(outpath, W_OK) == 0) { fprintf(stderr, "(override)"); } else { perror("adintool"); return(-1); } } if (use_raw) { if ((fd = open(outpath, O_CREAT | O_RDWR #ifdef O_BINARY | O_BINARY #endif , 0644)) == -1) { perror("adintool"); return -1; } } else { if ((fp = wrwav_open(outpath, sfreq)) == NULL) { perror("adintool"); return -1; } } writing_file = TRUE; } /* write recorded sample to file */ if (use_raw) { count = wrsamp(fd, &(now[start]), len); if (count < 0) { perror("adinrec: cannot write"); return -1; } if (count < len * sizeof(SP16)) { fprintf(stderr, "adinrec: cannot write more %d bytes\ncurrent length = %d\n", count, speechlen * sizeof(SP16)); return -1; } } else { if (wrwav_data(fp, &(now[start]), len) == FALSE) { fprintf(stderr, "adinrec: cannot write\n"); return -1; } } /* accumulate sample num of this segment */ speechlen += len; /* if input length reaches limit, rehash the ad-in buffer */ if (recog->jconf->input.speech_input == SP_MIC) { if (speechlen > MAXSPEECHLEN - 16000) { recog->adin->rehash = TRUE; } } /* progress bar in dots */ fprintf(stderr, "."); return(0); } /** * * 読み込んだサンプル列をソケットデスクリプタ "fd" 上のadinnetサーバに送信 * するコールバック関数 * * @param now [in] 録音されたサンプル列 * @param len [in] 長さ（サンプル数） * * @return エラー時 -1，処理成功時 0，処理成功＋区間終端検出時 1 を返す． * * * Callback handler to record the sample fragments to adinnet server * pointed by the socket descriptor "fd". * * @param now [in] recorded fragments of speech sample * @param len [in] length of above in samples * * @return -1 on device error (require caller to exit and terminate input), * 0 on success (allow caller to continue), * 1 on succeeded but segmentation detected (require caller to exit but * input will continue in the next call. * */ static int adin_callback_adinnet(SP16 *now, int len, Recog *recog) { int count; int start, w; int i; start = 0; if (recog->jconf->input.speech_input == SP_MIC && speechlen == 0) { /* this is first up-trigger */ if (rewind_msec > 0 && !recog->adin->is_valid_data) { /* not spoken currently but has data to process at first trigger */ /* it means that there are old spoken segments */ /* disgard them */ printf("disgard already recorded %d samples\n", len); return 0; } /* erase "<<>>" text on tty */ fprintf(stderr, "\r \r"); if (rewind_msec > 0) { /* when -rewind value set larger than 0, the speech data spoken while pause will be considered back to the specified msec. */ printf("buffered samples=%d\n", len); w = rewind_msec * sfreq / 1000; if (len > w) { start = len - w; len = w; } else { start = 0; } printf("will process from %d\n", start); } } #ifdef WORDS_BIGENDIAN swap_sample_bytes(&(now[start]), len); #endif for (i=0;iadin->enable_thread) { /* if input length reaches limit, rehash the ad-in buffer */ if (recog->adin->speechlen > MAXSPEECHLEN - 16000) { recog->adin->rehash = TRUE; fprintf(stderr, "+"); } } #endif /* display progress in dots */ fprintf(stderr, "."); return(0); } /**********************************************************************/ /** * * adinnetサーバにセグメント終了信号を送信する * * * * Send end-of-segment singal to adinnet server. * * */ static void adin_send_end_of_segment() { char p; int i; for(i=0;i * 音声取り込み中にサーバからの中断/再開コマンドを受け取るための * コールバック関数 * * @return コマンド無しか再開コマンドで録音続行の場合 0, * エラー時 -2, 中断コマンドを受信して録音を中断すべきとき -1 を返す． * * * Callback function for A/D-in processing to check pause/resume * command from adinnet server. * * @return 0 when no command or RESUME command to tell caller to * continue recording, -1 when received a PAUSE command and tell caller to * stop recording, or -2 when error. * */ static int adinnet_check_command() { fd_set rfds; struct timeval tv; int status; int cnt, ret; char com; int i, max_sd; /* check if some commands are waiting in queue */ FD_ZERO(&rfds); max_sd = 0; for(i=0;i 0) { /* there are some data */ for (i=0;i\n", i+1); stop_at_next = TRUE; /* mark to pause at the end of this input */ /* tell caller to stop recording */ return -1; case '1': /* resume */ fprintf(stderr, "<#%d: RESUME - already running, ignored>\n", i+1); /* we are already running, so just continue */ break; case '2': /* terminate */ fprintf(stderr, "<#%d: TERMINATE>\n", i+1); stop_at_next = TRUE; /* mark to pause at the end of this input */ /* tell caller to stop recording immediately */ return -2; break; default: fprintf(stderr, "adintool: unknown command from #%d: %d\n", i+1,com); unknown_command_counter++; /* avoid infinite loop in that case... */ /* this may happen when connection is terminated from server side */ if (unknown_command_counter > 100) { fprintf(stderr, "killed by a flood of unknown commands from server\n"); exit(1); } } } } } return 0; /* continue ad-in */ } static int resume_count[MAXCONNECTION]; ///< Number of incoming resume commands for resume synchronization /** * * サーバから再開コマンドを受信するまで待つ．再開コマンドを受信したら * 終了する． * * @return エラー時 -1, 通常終了は 0 を返す． * * * Wait for resume command from server. * * @return 0 on normal exit, or -1 on error. * */ static int adinnet_wait_command() { fd_set rfds; int status; int cnt, ret; char com; int i, count, max_sd; fprintf(stderr, "<<< waiting RESUME >>>"); while(1) { /* check for synchronized resume */ if (loose_sync) { for(i=0;i= adinnet_servnum) { /* all count > 0 */ for(i=0;i>RESUME\n"); return 1; /* restart recording */ } } else { /* force same resume count among servers */ count = resume_count[0]; for(i=1;i= adinnet_servnum && count > 0) { /* all resume counts are the same, actually resume */ for(i=0;i>RESUME\n"); return 1; /* restart recording */ } } /* not all host send me resume command */ FD_ZERO(&rfds); max_sd = 0; for(i=0;i\n", i+1); for(i=0;i\n", i+1); } break; case '1': /* resume */ /* do resume */ resume_count[i]++; if (loose_sync) { fprintf(stderr, "<#%d: RESUME>\n", i+1); } else { fprintf(stderr, "<#%d: RESUME @%d>\n", i+1, resume_count[i]); } break; case '2': /* terminate */ /* already paused, so just wait for next command */ if (loose_sync) { fprintf(stderr, "<#%d: TERMINATE - already paused, reset sync>\n", i+1); for(i=0;i\n", i+1); } break; default: fprintf(stderr, "adintool: unknown command from #%d: %d\n", i+1, com); unknown_command_counter++; /* avoid infinite loop in that case... */ /* this may happen when connection is terminated from server side */ if (unknown_command_counter > 100) { fprintf(stderr, "killed by a flood of unknown commands from server\n"); exit(1); } } } } } } return 0; } /* close file */ static boolean close_files() { size = sizeof(SP16) * speechlen; if (writing_file) { if (use_raw) { if (close(fd) != 0) { perror("adinrec"); return FALSE; } } else { if (wrwav_close(fp) == FALSE) { fprintf(stderr, "adinrec: failed to close file\n"); return FALSE; } } printf("%s: %d samples (%.2f sec.) [%6d (%5.2fs) - %6d (%5.2fs)]\n", outpath, speechlen, (float)speechlen / (float)sfreq, trigger_sample, (float)trigger_sample / (float)sfreq, trigger_sample + speechlen, (float)(trigger_sample + speechlen) / (float)sfreq); writing_file = FALSE; } return TRUE; } /* Interrupt signal handling */ static void interrupt_record(int signum) { fprintf(stderr, "[Interrupt]"); if (speech_output == SPOUT_FILE) { /* close files */ close_files(); } /* terminate program */ exit(1); } static void record_trigger_time(Recog *recog, void *data) { trigger_sample = recog->adin->last_trigger_sample; } /** * * メイン関数 * * @param argc [in] 引数列の長さ * @param argv [in] 引数列 * * @return * エラー時 1，通常終了時 0 を返す． * * Main function. * * @param argc [in] number of argument. * @param argv [in] array of arguments. * * @return 1 on error, 0 on success. * */ int main(int argc, char *argv[]) { Recog *recog; Jconf *jconf; int ret; int i; boolean is_continues; /* create instance */ recog = j_recog_new(); jconf = j_jconf_new(); recog->jconf = jconf; /********************/ /* setup parameters */ /********************/ /* register additional options */ j_add_option("-in", 1, 1, "input from", opt_in); j_add_option("-out", 1, 1, "output to", opt_out); j_add_option("-server", 1, 1, "hostname (-out adinnet)", opt_server); j_add_option("-NA", 1, 1, "NetAudio server host:unit (-in netaudio)", opt_NA); j_add_option("-port", 1, 1, "port number (-out adinnet)", opt_port); j_add_option("-inport", 1, 1, "port number (-in adinnet)", opt_inport); j_add_option("-filename", 1, 1, "(base) filename to record (-out file)", opt_filename); j_add_option("-startid", 1, 1, "recording start id (-out file)", opt_startid); j_add_option("-freq", 1, 1, "sampling frequency in Hz", opt_freq); j_add_option("-nosegment", 0, 0, "not segment input speech, record all", opt_nosegment); j_add_option("-segment", 0, 0, "force segment input speech", opt_segment); j_add_option("-oneshot", 0, 0, "exit after the first input", opt_oneshot); j_add_option("-raw", 0, 0, "save in raw (BE) format", opt_raw); j_add_option("-autopause", 0, 0, "automatically pause at each input end", opt_autopause); j_add_option("-loosesync", 0, 0, "loose sync of resume among servers", opt_loosesync); j_add_option("-rewind", 1, 1, "rewind to the msec", opt_rewind); j_add_option("-h", 0, 0, "display this help", opt_help); j_add_option("-help", 0, 0, "display this help", opt_help); j_add_option("--help", 0, 0, "display this help", opt_help); /* when no argument, output help and exit */ if (argc <= 1) { opt_help(jconf, NULL, 0); return 0; } /* read arguments and set parameters */ if (j_config_load_args(jconf, argc, argv) == -1) { fprintf(stderr, "Error reading arguments\n"); return -1; } /* check needed arguments */ if (speech_output == SPOUT_FILE && filename == NULL) { fprintf(stderr, "Error: output filename not specified\n"); return(-1); } if (speech_output == SPOUT_ADINNET && adinnet_servnum < 1) { fprintf(stderr, "Error: adinnet server name for output not specified\n"); return(-1); } if (jconf->input.speech_input == SP_ADINNET && speech_output != SPOUT_ADINNET && adinnet_servnum >= 1) { fprintf(stderr, "Warning: you specified port num by -port, but it's for output\n"); fprintf(stderr, "Warning: you may specify input port by -inport instead.\n"); fprintf(stderr, "Warning: now the default value (%d) will be used\n", ADINNET_PORT); } #ifdef USE_NETAUDIO if (jconf->input.speech_input == SP_NETAUDIO && jconf->input.netaudio_devname == NULL) { fprintf(stderr, "Error: NetAudio server name not specified\n"); return(-1); } #endif if (adinnet_portnum != adinnet_servnum) { /* if only one server, use default */ if (adinnet_servnum == 1) { adinnet_port[0] = ADINNET_PORT; adinnet_portnum = 1; } else { fprintf(stderr, "Error: you should specify both server names and different port for each!\n"); fprintf(stderr, "\tserver:"); for(i=0;iam_root->analysis.para), &(jconf->am_root->analysis.para_default)); /* set some values */ jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq; jconf->input.period = jconf->am_root->analysis.para.smp_period; jconf->input.frameshift = jconf->am_root->analysis.para.frameshift; jconf->input.framesize = jconf->am_root->analysis.para.framesize; /* disable successive segmentation when no segmentation available */ if (!jconf->detect.silence_cut) continuous_segment = FALSE; /* store sampling rate locally */ sfreq = jconf->am_root->analysis.para.smp_freq; /********************/ /* setup for output */ /********************/ if (speech_output == SPOUT_FILE) { /* allocate work area for output file name */ if (continuous_segment) { outpath = (char *)mymalloc(strlen(filename) + 10); } else { if (use_raw) { outpath = filename; } else { outpath = new_output_filename(filename, ".wav"); } } } else if (speech_output == SPOUT_ADINNET) { /* connect to adinnet server(s) */ for(i=0;iinput.speech_input == SP_ADINNET) { jconf->input.adinnet_port = adinnet_port_in; } if (j_adin_init(recog) == FALSE) { fprintf(stderr, "Error in initializing adin device\n"); return -1; } if (rewind_msec > 0) { /* allow adin module to keep triggered speech while pausing */ #ifdef HAVE_PTHREAD if (recog->adin->enable_thread) { recog->adin->ignore_speech_while_recog = FALSE; } #endif } /*********************/ /* add some callback */ /*********************/ callback_add(recog, CALLBACK_EVENT_SPEECH_START, record_trigger_time, NULL); /**************************************/ /* display input/output configuration */ /**************************************/ put_status(recog); /*******************/ /* begin recording */ /*******************/ if (continuous_segment) { /* reset parameter for successive output */ total_speechlen = 0; sid = startid; } fprintf(stderr,"[start recording]\n"); if (jconf->input.speech_input == SP_RAWFILE) file_counter = 0; /*********************/ /* input stream loop */ /*********************/ while(1) { /* begin A/D input of a stream */ ret = j_open_stream(recog, NULL); switch(ret) { case 0: /* succeeded */ break; case -1: /* error */ /* go on to next input */ continue; case -2: /* end of recognition process */ switch(jconf->input.speech_input) { case SP_RAWFILE: fprintf(stderr, "%d files processed\n", file_counter); break; case SP_STDIN: fprintf(stderr, "reached end of input on stdin\n"); break; default: fprintf(stderr, "failed to begin input stream\n"); } /* exit recording */ goto record_end; } /*********************************/ /* do VAD and recording */ /*********************************/ do { /* process one segment with segmentation */ /* for incoming speech input, speech detection and segmentation are performed and, adin_callback_* is called for speech output for each segment block. */ /* adin_go() return when input segmented by long silence, or input stream reached to the end */ speechlen = 0; stop_at_next = FALSE; if (jconf->input.speech_input == SP_MIC) { fprintf(stderr, "<<< please speak >>>"); } if (speech_output == SPOUT_ADINNET) { ret = adin_go(adin_callback_adinnet, adinnet_check_command, recog); } else { ret = adin_go(adin_callback_file, NULL, recog); } /* return value of adin_go: -2: input terminated by pause command from adinnet server -1: input device read error or callback process error 0: paused by input stream (end of file, etc..) >0: detected end of speech segment: by adin-cut, or by callback process (or return value of ad_check (<0) (== not used in this program)) */ /* if PAUSE or TERMINATE command has been received while input, stop_at_next is TRUE here */ switch(ret) { case -2: /* terminated by terminate command from server */ fprintf(stderr, "[terminated by server]\n"); break; case -1: /* device read error or callback error */ fprintf(stderr, "[error]\n"); break; case 0: /* reached to end of input */ fprintf(stderr, "[eof]\n"); break; default: /* input segmented by silence or callback process */ fprintf(stderr, "[segmented]\n"); break; } if (ret == -1) { /* error in input device or callback function, so terminate program here */ return 1; } /*************************/ /* one segment processed */ /*************************/ if (speech_output == SPOUT_FILE) { /* close output files */ if (close_files() == FALSE) return 1; } else if (speech_output == SPOUT_ADINNET) { if (speechlen > 0) { if (ret >= 0 || stop_at_next) { /* segmented by adin-cut or end of stream or server-side command */ /* send end-of-segment ack to client */ adin_send_end_of_segment(); } /* output info */ printf("sent: %d samples (%.2f sec.) [%6d (%5.2fs) - %6d (%5.2fs)]\n", speechlen, (float)speechlen / (float)sfreq, trigger_sample, (float)trigger_sample / (float)sfreq, trigger_sample + speechlen, (float)(trigger_sample + speechlen) / (float)sfreq); } } /*************************************/ /* increment ID and total sample len */ /*************************************/ if (continuous_segment) { total_speechlen += speechlen; sid++; } /***************************************************/ /* with adinnet server, if terminated by */ /* server-side PAUSE command, wait for RESUME here */ /***************************************************/ if (pause_each) { /* pause at each end */ //if (speech_output == SPOUT_ADINNET && speechlen > 0) { if (speech_output == SPOUT_ADINNET) { if (adinnet_wait_command() < 0) { /* command error: terminate program here */ return 1; } } } else { if (speech_output == SPOUT_ADINNET && stop_at_next) { if (adinnet_wait_command() < 0) { /* command error: terminate program here */ return 1; } } } /* loop condition check */ is_continues = FALSE; if (continuous_segment && (ret > 0 || ret == -2)) { is_continues = TRUE; } } while (is_continues); /* to the next segment in this input stream */ /***********************/ /* end of input stream */ /***********************/ adin_end(recog->adin); } /* to the next input stream (i.e. next input file in SP_RAWFILE) */ record_end: if (speech_output == SPOUT_FILE) { if (continuous_segment) { printf("recorded total %d samples (%.2f sec.) segmented to %s.%04d - %s.%04d files\n", total_speechlen, (float)total_speechlen / (float)sfreq, filename, 0, filename, sid-1); } } return 0; }