/*************************************************************************** * This file is part of the 'Shout LVCS Recognition toolkit'. * *************************************************************************** * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 by Marijn Huijbregts * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; version 2 of the License. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "standard.h" #include "adapt_am.h" #include "phonemodel.h" #include "vector.h" #include "train_segmenter.h" #include "featurepool.h" #include "shoutconfig.h" #define PHONEFILE "/phones.lst" #include "shout-misc.h" using namespace WriteFileLittleBigEndian; ///////////////////////////////////////////////////////////////////////////////////////////////////// /// The constructor will check the directory that contains the adaptation data, load the data /// and pass it to Adapt_AM_TreeNode. The resulting model will be stored to disc. ///////////////////////////////////////////////////////////////////////////////////////////////////// Adapt_AM::Adapt_AM(char *adapt_dir, char *amName, char *clusterAM, char *newAmName) { char str[MAXSTR]; char dataDir[MAXSTR]; int numberOfModels; int numberOfSil; DataStats *data = NULL; models = NULL; FILE *trainDataFile; int numberOfClusters = 1; int vectorSize = ASR_DEFAULT_VECTORSIZE; TrainPhoneModel **clusterModels = NULL; FILE *modelFile = fopen(clusterAM,"rb"); if(modelFile == NULL) { USER_WARNING("The clusterAM could not be openend. I'm assuming we're not using clustered adaptation...\n"); } else { int vect,nrC; freadEndianSafe(&vect,1,sizeof(vect),modelFile); freadEndianSafe(&numberOfClusters,1,sizeof(numberOfClusters),modelFile); freadEndianSafe(&nrC,1,sizeof(nrC),modelFile); if(vect != vectorSize) { USER_ERROR("The clusterAM is not compatible! Wrong vector size..."); } if(nrC != 1) { USER_ERROR("The clusterAM is not compatible! Number of 'clusters' in file should be one (no clusters within clusters)..."); } clusterModels = new TrainPhoneModel*[numberOfClusters]; for(int i=0;i 0) { checkData = fopen(str,"rb"); if(checkData) { fclose(checkData); } else { fclose(phoneFile); delete[] data; data = NULL; numberOfModels = 0; USER_ERROR("Data samples for one or more phones are missing!"); } } } fclose(phoneFile); } PRINTF3("Checked the data directory. Seems okay. Number of models: %d with %d clusters.\n",numberOfModels,numberOfClusters); int numberOfClusteredModels = numberOfModels*numberOfClusters; models = new TrainPhoneModel*[numberOfClusteredModels]; FeaturePool *pools[numberOfClusteredModels]; int poolsTime[numberOfClusteredModels]; int maxPoolSize = TRAIN_MAX_FEATURE_POOL_SIZE/100; if(numberOfModels >= 5) { maxPoolSize = TRAIN_MAX_FEATURE_POOL_SIZE/(numberOfModels/5); } PRINTF("Initializing the adaptation data pools..\n"); for(int i=0;isetTrainingData(pools[i],TRAIN_ID1,-1); } for(int j=1;jsetTrainingData(pools[i+j*numberOfModels],TRAIN_ID1,-1); } } } fclose(modelFile); } PRINTF("Loading adaptation data...\n"); for(int i=0;iaddSegment(TRAIN_ID1, poolsTime[ID], poolsTime[ID]+num-1,0,contextLeft*numberOfModels+contextRight); pools[ID]->setCurSegmentVectors(TRAIN_ID1,trainDataFile); poolsTime[ID] += num; } } fclose(trainDataFile); } else { USER_ERROR("The training directory is empty or does not exist!\n"); } } PRINTF("Calculating accumulators for the Gaussian components (training)...\n"); for(int i=0;igetStatistics()->name); fflush(stdout); for(int j=0;jadapt_clear(); models[i+(numberOfModels*j)]->adapt_setAcTrain(); } PRINTF("\n"); } if((numberOfModels == 1) && (numberOfSil == 1) && (strncmp(data[0].name,"UBM",3) == 0)) // this is a speaker model... { PRINTF("This is the UBM for SPEAKER RECOGNITION. I'm going to update the Gaussian means using MAP adaptation.\n"); models[0]->mapAdaptMeans(); } else { PRINTF("Creating structural tree for SMAPLR adaptation...\n"); Adapt_AM_TreeNode **adaptationSilTree = new Adapt_AM_TreeNode*[numberOfClusters]; Adapt_AM_TreeNode **adaptationTree = new Adapt_AM_TreeNode*[numberOfClusters]; for(int i=0;imeanGaussian = new Gaussian(vectorSize); adaptationSilTree[i] = new Adapt_AM_TreeNode(vectorSize); adaptationSilTree[i]->meanGaussian = new Gaussian(vectorSize); } for(int j=0;jisSilModel()) { models[i+j*numberOfModels]->adapt_setInitialNode(adaptationSilTree[j]); } else { models[i+j*numberOfModels]->adapt_setInitialNode(adaptationTree[j]); } } } for(int j=0;jsplit(1); while(splitRes > 0) { totNodes += splitRes; for(int i=0;iadapt_setNode(); } splitRes = adaptationTree[j]->split(1); } PRINTF3("Phone Tree %d created (%d nodes)\n",j,totNodes); PRINTF("Filling the helper matrices and distribute them through the tree\n"); // Determine the first summations of Z and W... (going up) adaptationTree[j]->setHelperMatrices_1((PhoneModel**)(&models[j*numberOfModels]), numberOfModels); PRINTF("Calculating W for each node\n"); adaptationTree[j]->setHelperMatrices_2(); // Determine the adaptation matrices... (going down) PRINTF("Adapting gaussians...\n"); for(int i=0;iadapt_adapt(); } PRINTF("Filling helper matrix for variance adaptation..\n"); for(int i=0;iadapt_setVarTrans(); } PRINTF("Adapting variances..\n"); for(int i=0;iadapt_adaptVar(); } } PRINTF("Done. Cleaning up adaptation matrices...\n"); fflush(stdout); for(int j=0;jwriteModel(modelFile); } /* if(clusterModels != NULL) { for(int i=0;iwriteModel(modelFile); } } */ fclose(modelFile); } PRINTF("Done. Have a nice day.\n"); for(int j=0;j