/**
* @file ss.c
*
*
* @brief スペクトル減算
*
* 実際のスペクトル減算は wav2mfcc-buffer.c および wav2mfcc-pipe.c で
* 行われます.ここでは平均スペクトルの推定とファイルI/Oのみ定義されています.
*
*
*
* @brief Spectral subtraction
*
* The actual subtraction will be performed in wav2mfcc-buffer.c and
* wav2mfcc-pipe.c. These functions are for estimating average spectrum
* of audio input, and file I/O for that.
*
*
* @author Akinobu LEE
* @date Thu Feb 17 17:19:54 2005
*
* $Revision: 1.5 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
/**
* Binary read function with byte swaping (assume file is BIG ENDIAN)
*
* @param buf [out] read data
* @param unitbyte [in] size of a unit in bytes
* @param unitnum [in] number of unit to be read
* @param fp [in] file pointer
*/
static boolean
myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
{
size_t tmp;
if ((tmp = myfread(buf, unitbyte, unitnum, fp)) < (size_t)unitnum) {
return(FALSE);
}
#ifndef WORDS_BIGENDIAN
swap_bytes(buf, unitbyte, unitnum);
#endif
return(TRUE);
}
/**
* Load a noise spectrum from file.
*
* @param filename [in] path name of noise spectrum file
* @param slen [out] length of the returned buffer
*
* @return a newly allocated buffer that holds the loaded noise spectrum.
*/
float *
new_SS_load_from_file(char *filename, int *slen)
{
FILE *fp;
int num;
float *sbuf;
/* open file */
jlog("Stat: ss: reading Noise Spectrum for SS\n");
if ((fp = fopen_readfile(filename)) == NULL) {
jlog("Error: ss: failed to open \"%s\"\n", filename);
return(NULL);
}
/* read length */
if (myread(&num, sizeof(int), 1, fp) == FALSE) {
jlog("Error: ss: failed to read \"%s\"\n", filename);
return(NULL);
}
/* allocate */
sbuf = (float *)mymalloc(sizeof(float) * num);
/* read data */
if (myread(sbuf, sizeof(float), num, fp) == FALSE) {
jlog("Error: ss: failed to read \"%s\"\n", filename);
return(NULL);
}
/* close file */
fclose_readfile(fp);
*slen = num;
jlog("Stat: ss: done\n");
return(sbuf);
}
/**
* Compute average spectrum of audio input.
* This is used to estimate a noise spectrum from input samples.
*
* @param wave [in] input audio data sequence
* @param wavelen [in] length of above
* @param slen [out] length of returned buffer
* @param w [i/o] MFCC calculation work area
* @param para [in] parameter
*
* @return a newly allocated buffer that contains the calculated spectrum.
*/
float *
new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para)
{
float *spec;
int t, framenum, start, end, k, i;
double x, y;
/* allocate work area */
spec = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float));
for(i=0;ifb.fftN;i++) spec[i] = 0.0;
/* Caluculate sum of noise power spectrum */
framenum = (int)((wavelen - para->framesize) / para->frameshift) + 1;
if (framenum < 1) {
jlog("Error: too short to get noise spectrum: length < 1 frame\n");
jlog("Error: no SS will be performed\n");
*slen = w->fb.fftN;
return spec;
}
start = 1;
end = 0;
for (t = 0; t < framenum; t++) {
if (end != 0) start = end - (para->framesize - para->frameshift) - 1;
k = 1;
for (i = start; i <= start + para->framesize; i++) {
w->bf[k] = (float)wave[i-1];
k++;
}
end = i;
if (para->zmeanframe) {
ZMeanFrame(w->bf, para->framesize);
}
/* Pre-emphasis */
PreEmphasise(w->bf, para->framesize, para->preEmph);
/* Hamming Window */
Hamming(w->bf, para->framesize, w);
/* FFT Spectrum */
for (i = 1; i <= para->framesize; i++) {
w->fb.Re[i-1] = w->bf[i]; w->fb.Im[i-1] = 0.0;
}
for (i = para->framesize + 1; i <= w->fb.fftN; i++) {
w->fb.Re[i-1] = 0.0; w->fb.Im[i-1] = 0.0;
}
FFT(w->fb.Re, w->fb.Im, w->fb.n, w);
/* Sum noise spectrum */
for(i = 1; i <= w->fb.fftN; i++){
x = w->fb.Re[i - 1]; y = w->fb.Im[i - 1];
spec[i - 1] += sqrt(x * x + y * y);
}
}
/* Calculate average noise spectrum */
for(t=0;tfb.fftN;t++) {
spec[t] /= (float)framenum;
}
/* return the new spec[] */
*slen = w->fb.fftN;
return(spec);
}