// transform/hlda.cc // Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Georg Stemmer // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include #include "util/common-utils.h" #include "transform/hlda.h" #include "transform/mllt.h" namespace kaldi { void HldaAccsDiagGmm::Read(std::istream &is, bool binary, bool add) { ExpectToken(is, binary, ""); ExpectToken(is, binary, ""); int32 dim; // just the #elems of S_, equals model-dim+1. ReadBasicType(is, binary, &dim); if (add && S_.size() != 0 && static_cast(dim) != S_.size()) KALDI_ERR << "HldaAccsDiagGmm::Read, summing accs of different size."; if (!add || S_.empty()) S_.resize(dim); for (size_t i = 0; i < S_.size(); i++) S_[i].Read(is, binary, add); ExpectToken(is, binary, ""); int32 npdfs; ReadBasicType(is, binary, &npdfs); if (add && occs_.size() != 0 && static_cast(npdfs) != occs_.size()) KALDI_ERR << "HldaAccsDiagGmm::Read, summing accs of different size."; if (!add || occs_.empty()) { occs_.resize(npdfs); mean_accs_.resize(npdfs); } for (size_t i = 0; i < occs_.size(); i++) occs_[i].Read(is, binary, add); ExpectToken(is, binary, ""); for (size_t i = 0; i < mean_accs_.size(); i++) mean_accs_[i].Read(is, binary, add); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &speedup_); if (speedup_ != 1.0) { if (!add || occs_sub_.empty()) { occs_sub_.resize(npdfs); mean_accs_sub_.resize(npdfs); } ExpectToken(is, binary, ""); for (size_t i = 0; i < occs_sub_.size(); i++) occs_sub_[i].Read(is, binary, add); ExpectToken(is, binary, ""); for (size_t i = 0; i < mean_accs_sub_.size(); i++) mean_accs_sub_[i].Read(is, binary, add); } ExpectToken(is, binary, ""); ReadBasicType(is, binary, &sample_gconst_); ExpectToken(is, binary, ""); } void HldaAccsDiagGmm::Write(std::ostream &os, bool binary) const { WriteToken(os, binary, ""); WriteToken(os, binary, ""); int32 dim = S_.size(); // just the #elems of S_, equals model-dim+1. WriteBasicType(os, binary, dim); for (int32 i = 0; i < dim; i++) S_[i].Write(os, binary); KALDI_ASSERT(mean_accs_.size() == occs_.size()); WriteToken(os, binary, ""); int32 npdfs = occs_.size(); WriteBasicType(os, binary, npdfs); for (int32 i = 0; i < npdfs; i++) occs_[i].Write(os, binary); WriteToken(os, binary, ""); for (int32 i = 0; i < npdfs; i++) mean_accs_[i].Write(os, binary); WriteToken(os, binary, ""); WriteBasicType(os, binary, speedup_); if (speedup_ != 1.0) { WriteToken(os, binary, ""); for (int32 i = 0; i < npdfs; i++) occs_sub_[i].Write(os, binary); WriteToken(os, binary, ""); for (int32 i = 0; i < npdfs; i++) mean_accs_sub_[i].Write(os, binary); } WriteToken(os, binary, ""); WriteBasicType(os, binary, sample_gconst_); WriteToken(os, binary, ""); } void HldaAccsDiagGmm::Init(const AmDiagGmm &am, int32 orig_feat_dim, BaseFloat speedup) { KALDI_ASSERT(am.Dim() != 0); int32 num_pdfs = am.NumPdfs(), model_dim = am.Dim(); KALDI_ASSERT(orig_feat_dim > 0 && orig_feat_dim >= model_dim); S_.resize(model_dim+1); for (int32 i = 0; i <= model_dim; i++) S_[i].Resize(orig_feat_dim); occs_.resize(num_pdfs); mean_accs_.resize(num_pdfs); for (int32 i = 0; i < num_pdfs; i++) { occs_[i].Resize(am.NumGaussInPdf(i)); mean_accs_[i].Resize(am.NumGaussInPdf(i), orig_feat_dim); } speedup_ = speedup; if (speedup_ == 1.0) { occs_sub_.resize(0); mean_accs_sub_.resize(0); } else { occs_sub_.resize(num_pdfs); mean_accs_sub_.resize(num_pdfs); for (int32 i = 0; i < num_pdfs; i++) { occs_sub_[i].Resize(am.NumGaussInPdf(i)); mean_accs_sub_[i].Resize(am.NumGaussInPdf(i), orig_feat_dim); } } sample_gconst_ = am.GetPdf(0).gconsts()(0); } void HldaAccsDiagGmm:: AccumulateFromPosteriors(int32 pdf_id, const DiagGmm &gmm, const VectorBase &data, const VectorBase &posteriors) { Vector data_dbl(data); KALDI_ASSERT(static_cast(pdf_id) < occs_.size() && occs_[pdf_id].Dim() == posteriors.Dim()); KALDI_ASSERT(mean_accs_[pdf_id].NumCols() == data.Dim() && "Feature dim mismatch in HLDA computation "); double tot_occ = 0.0; int32 model_dim = S_.size() - 1; Vector tot_occ_times_inv_var(model_dim); if (speedup_ == 1.0) { // no speedup; only one type of acc. for (int32 i = 0; i < posteriors.Dim(); i++) { if (posteriors(i) > 1.0e-05) { BaseFloat occ = posteriors(i); tot_occ += occ; occs_[pdf_id](i) += occ; mean_accs_[pdf_id].Row(i).AddVec(occ, data_dbl); SubVector inv_var(gmm.inv_vars(), i); // this inv-var. tot_occ_times_inv_var.AddVec(occ, inv_var); } } } else { // Using a data subset. // In any case, accumulate regular occs and means. Vector posteriors_dbl(posteriors); occs_[pdf_id].AddVec(1.0, posteriors_dbl); mean_accs_[pdf_id].AddVecVec(1.0, posteriors_dbl, data_dbl); if (RandUniform() > speedup_) return; // continue with probability "speedup". for (int32 i = 0; i < posteriors.Dim(); i++) { if (posteriors(i) > 1.0e-05) { BaseFloat occ = posteriors(i); tot_occ += occ; occs_sub_[pdf_id](i) += occ; mean_accs_sub_[pdf_id].Row(i).AddVec(occ, data_dbl); SubVector inv_var(gmm.inv_vars(), i); // this inv-var. tot_occ_times_inv_var.AddVec(occ, inv_var); } } } if (tot_occ != 0.0) { for (int32 i = 0; i < model_dim; i++) S_[i].AddVec2(tot_occ_times_inv_var(i), data_dbl); S_[model_dim].AddVec2(tot_occ, data_dbl); } } void HldaAccsDiagGmm::Update(AmDiagGmm *am, MatrixBase *Mfull, MatrixBase *M_out, BaseFloat *objf_impr_out, BaseFloat *count_out) const { KALDI_ASSERT(am != NULL && Mfull != NULL); KALDI_ASSERT(!S_.empty()); if (!ApproxEqual(sample_gconst_, am->GetPdf(0).gconsts()(0), 1.0e-05)) { KALDI_ERR << "You have to call the HLDA update with the same model as used " "for accumulation."; } int32 model_dim = S_.size() - 1; KALDI_ASSERT(model_dim == am->Dim()); int32 feat_dim = S_[0].NumRows(); KALDI_ASSERT(feat_dim >= model_dim); KALDI_ASSERT(Mfull->NumRows() == feat_dim && Mfull->NumCols() == feat_dim); // this local G will be like the MLLT stats in a dimension equal // to feat_dim. std::vector > G(feat_dim); // This loop sets G to the outer product of the data, scaled // by inverse var. Later we subtract the mean outer-product. for (int32 i = 0; i < feat_dim; i++) { G[i].Resize(feat_dim); if (i < model_dim) { G[i].CopyFromSp(S_[i]); } else { G[i].CopyFromSp(S_[model_dim]); // unit variance in all the // rest of the dims, so we use the same stats. } } const std::vector > &occs = (speedup_ == 1.0 ? occs_ : occs_sub_); const std::vector > &mean_accs = (speedup_ == 1.0 ? mean_accs_ : mean_accs_sub_); int32 num_pdfs = occs.size(); Vector tot_mean_acc(feat_dim); double tot_occ = 0.0; // will be occ of subset of data, if speedup_ != 1.0 for (int32 p = 0; p < num_pdfs; p++) { int32 num_gauss = occs[p].Dim(); const DiagGmm &gmm = am->GetPdf(p); KALDI_ASSERT(num_gauss == gmm.NumGauss()); for (int32 g = 0; g < num_gauss; g++) { double occ = occs[p](g), inv_occ = (occ == 0.0 ? 0.0 : 1.0/occ); Vector mean(feat_dim); mean.AddVec(inv_occ, mean_accs[p].Row(g)); tot_mean_acc.AddVec(1.0, mean_accs[p].Row(g)); tot_occ += occ; // update G matrices (subtracting outer-product of means, scaled by // occ and inverse-var); has same effect as if G is summed outer product of // (x-mu)^2, scaled by occ and inverse-var. SubVector inv_var(gmm.inv_vars(), g); // this inv-var. for (int32 d = 0; d < model_dim; d++) { G[d].AddVec2(-1.0*occ*inv_var(d), mean); } } } KALDI_ASSERT(tot_occ > 0.0); Vector tot_mean(tot_mean_acc); tot_mean.Scale(1.0 / tot_occ); // subtract total occ times outer product of global mean, from // dimensions of G that correspond to "rejected dimensions" // (with unit-var, global mean). for (int32 d = model_dim; d < feat_dim; d++) G[d].AddVec2(-tot_occ, tot_mean); for (int32 d = 0; d < feat_dim; d++) KALDI_ASSERT(G[d].IsPosDef()); MlltAccs::Update(tot_occ, G, Mfull, objf_impr_out, count_out); SubMatrix Mpart(*Mfull, 0, model_dim, 0, feat_dim); if (M_out) { KALDI_ASSERT(M_out->NumRows() == model_dim && M_out->NumCols() == feat_dim); M_out->CopyFromMat(Mpart); } Matrix Mpart_dbl(Mpart); // Now have to update the model. int32 num_no_data = 0; Vector mean(model_dim); double tot_occ_means = 0; for (int32 p = 0; p < num_pdfs; p++) { int32 num_gauss = static_cast(occs_[p].Dim()); for (int32 g = 0; g < num_gauss; g++) { double occ = occs_[p](g); tot_occ_means += occ; if (occ == 0.0) num_no_data++; // and don't update Gaussian. else { SubVector mean_stats(mean_accs_[p], g); // project mean with transform, to accepted dim. mean.AddMatVec(1.0 / occ, Mpart_dbl, kNoTrans, mean_stats, 0.0); Vector mean_flt(mean); am->SetGaussianMean(p, g, mean_flt); } } am->GetPdf(p).ComputeGconsts(); } KALDI_LOG << "Occupancy count used to update means was " << tot_occ_means; if (num_no_data > 0) { KALDI_WARN << num_no_data << " Gaussians not updated due to no data; " "be careful not to set your silence-weight to exactly zero (e.g. use 0.01)."; } } }