/***************************************************************************
 *   This file is part of the 'Shout LVCS Recognition toolkit'.            *
 ***************************************************************************
 *   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 by Marijn Huijbregts *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; version 2 of the License.               *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#ifndef TRAIN_SPEAKER_SEGMENTER_H
#define TRAIN_SPEAKER_SEGMENTER_H

#include "standard.h"
#include "segmenter.h"
#include "trainphonemodel.h"

////////////////////////////////////////////////////////////////////////////
/// \brief This class can 'train' new speaker clusters. 
/// One could call determining the clusters training because the phone (SIL)
/// training procedures are used. But in fact, training is done on the
/// target data (not on a training set), so it is not really training but
/// processing. See our Spring 2006 NIST Rich Transcription Speaker Diarization
/// paper for more information.
/// This is ment to be the most simple form of the HMM-based merging
/// diarization method. Overloaded classes may be created for testing
/// new algorithms.
////////////////////////////////////////////////////////////////////////////

class Train_Segmenter : public Segmenter
{
  protected:
    char              outputFileName[2000];
    char              label[200];
    int               sadID_train;
    int               sadID_decode;
    int               segID;
    int               segID_decode;
    int               segID_cheat;
    TrainPhoneModel **trainCluster;
    TrainPhoneModel **scoreCluster;
    int               nrScoreClusters;
    Vector          **distanceVect;
    bool             *mergeable;
    int               nrNonMergeModels;
    double           *clusterScore;
    int              *clusterSize;
    int               cheatSpk1;
    int               cheatSpk2;
    int               numberOfMerges;
    int              *compareClusters;
    PhoneModel       *vtlnModel;
    int               fastMerge;
    bool              prepareForASR;
    int               maxDataPoints;
    bool              detectOverlap;
    int               nrMergeIterations;
    double            bic_meanoffset[10];

    TrainPhoneModel  *trcl;

  public:
    Train_Segmenter                       (FeaturePool *fp, int sID_train, int sID_decode, const char *name, 
                                           int nrMerges, int minClusters, int maxClusters, bool widen, const char *nonMergeableModels);
    virtual ~Train_Segmenter              (); 
  
  protected:
    double trainIteration                 (int nrIterations);
    double trainClusters                  (int nrG = -1);
    bool   mergeClusters                  (int maxClusters, bool smallestWins, bool actuallyMerge);

            void   createInitialModels    (int segAmount, char *outputName = NULL);
            void   createOverlapTree      (int min);
    virtual void   startMergeIteration    ();
    virtual void   getMergeModelScore     (int model1, int model2, double *mergeScore);
    virtual bool   proceedMerge           (int model1, int model2, int method);
    virtual void   mergeModels            (int model1, int model2);
    virtual void   trainModel             (int model , int nrG, double *trainPRes);

    void   getOverlap                     (FILE *file);
    
    void   writePosteriors                (char *fileName);
    
  public:
    void   train                          (int maxClusters, char *label, char *tempStr, char *feaPosteriors, int segID_cheat = -1);
    inline int getSegmentation            ()  const {return segID_decode;}
    void   storeClusters                  (FILE *outFile, char *outputName);
    void   loadClusters                   (FILE *inFile);
};

#endif