package anotacja; public class BCBagOfWords { public InvertedIndex[] index; public BCBagOfWords(InvertedIndex[] index) { this.index = index; } public float[] measure(QueryData query) { //extract the most important information for BoW int[] data = query.groups; //tablica pomiarem podobienstwa dla calej bazy float[] similarity = new float[InvertedIndex.content.size()]; //wyznacz L1 norme dla zapytania float queryL1 = 0; for (int i = 0; i < data.length; i++) { int clust = data[i]; queryL1 += InvertedIndex.idf[clust]; } //System.out.print("INDEX: "); int clust = -1; int cQuery = 0; for (int i = 0; i < data.length; i++) { cQuery++; if ((i == data.length - 1) || (data[i] != data[i + 1])) { clust = data[i]; if (this.index[clust] == null) { this.index[clust] = new InvertedIndex(clust); if (!this.index[clust].readIndex()) { //nastapil blad odczytu, przerwij return null; } if (this.index[clust].images == null) { //indeks nie zostal wczytany, przerwij return null; } } float vQuery = (float)Math.sqrt(cQuery * InvertedIndex.idf[clust] / (data.length - 1)); int cDatab = 0; int[] clInd = this.index[clust].images; for (int j = 0; j < clInd.length - 1; j++) { cDatab++; if (clInd[j] != clInd[j + 1]) { //if (clInd[j] == 43) { System.out.print(clust + "/" + cDatab + " "); } float vDatab = (float)Math.sqrt(cDatab * InvertedIndex.idf[clust] / InvertedIndex.len[clInd[j]]); similarity[clInd[j]] += vQuery * vDatab; cDatab = 0; } } cQuery = 0; } } //System.out.println(); //System.out.print("SIM: "); //for (int i = 0; i < content.size(); i++) { System.out.format("%7.5f ", similarity[i]); } //System.out.println(); for (int i = 0; i < InvertedIndex.content.size(); i++) { float norm = (float)Math.sqrt((queryL1 / (data.length - 1)) * (InvertedIndex.nL1[i] / InvertedIndex.len[i])); similarity[i] = similarity[i] / norm; //za duzo punktow kluczowych, utnij podobienstwo if (InvertedIndex.len[i] > 7000) similarity[i] = 0.001f; } return similarity; //System.out.print("S-N: "); //for (int i = 0; i < content.size(); i++) { System.out.format("%7.5f ", similarity[i]); } //System.out.println(); //List> result = new ArrayList>(); //for (int i = 0; i < content.size(); i++) result.add(new Ordered(content.get(i), similarity[i])); //Collections.sort(result); } }