package anotacja; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Scanner; public class InvertedIndex { public int word; //main index information public int[] images; //spatial information public short[] xCoord; public short[] yCoord; public short[] radius; //global structures public static List content; public static float[] len; public static float[] nL1; public static float[] idf; //storage parameters private static boolean binaryStorage; private static File baseFolder; private static File headerFile; private static File weightFile; private static File imstatFile; public static void GlobalInitialize(File baseFolderParam, boolean binaryStorageParam) { baseFolder = baseFolderParam; binaryStorage = binaryStorageParam; headerFile = new File(baseFolder, "index/header." + (binaryStorage ? "bin" : "txt")); weightFile = new File(baseFolder, "index/weights-idf." + (binaryStorage ? "bin" : "txt")); imstatFile = new File(baseFolder, "index/stat." + (binaryStorage ? "bin" : "txt")); content = readContent(); idf = readWeights(); float[][] stat = readStats(); len = stat[0]; nL1 = stat[1]; } public InvertedIndex(int word) { this.word = word; } public boolean readIndex() { if (binaryStorage) { return readIndexFromBinary(this.word); } else { return readIndexFromText(this.word); } } public boolean readIndexFromBinary(int word) { try { //okreslenie nazwy pliku z indeksem File contentFile = new File(baseFolder, "index/" + word + ".bin"); //przetworzenie na klase zgodna z NIO Path path = Paths.get(contentFile.getPath()); //odczyt calosci pliku do bufora byte[] array = Files.readAllBytes(path); //opakowanie w klase odczytujaca bufor ByteBuffer wrapped = ByteBuffer.wrap(array); //zapis danych nastepuje w formacie Big Endian wrapped.order(ByteOrder.BIG_ENDIAN); //odczyt typu indeksu int indexCode = wrapped.get(); //System.out.println("Index code: " + indexCode); //odczyt rozmiaru indeksu int size = wrapped.getInt(); //System.out.println("Index size: " + size); //rozmiar indeksu jest o jeden wiekszy, niz w rzeczywistosci //chodzi o mozliwosc popatrzenia o jeden elementy do przodu //bez koniecznosci sprawdzania tablicy int[] buffer = new int[size + 1]; //deklaracja buforow do danych przestrzennych short[] bufferX = null; short[] bufferY = null; short[] bufferR = null; //czy indeks zawiera informacje o elementach przestrzennych boolean spatial = (indexCode & 0x10) > 0; if (spatial) { bufferX = new short[size]; bufferY = new short[size]; bufferR = new short[size]; } //uzyty sposob zapisu indeksu int indexType = indexCode & 0x0F; if (indexType == 0) { //zapis gesty, roznicowy, pojedynczy obraz kodowany roznicowo na 1 bajcie //flaga FF oznacza przekroczenie zakresu i zakodowanie na dwoch bajtach int initial = 0; for (int i = 0; i < size; i++) { int delta = wrapped.get() & 0xFF; if (delta == 0xFF) { //nie zmiescilo sie, zapis dwubajtowy delta = wrapped.getShort(); if (delta == 0xFFFF) { //nie zmiescilo sie, zapis czterobajtowy delta = wrapped.getInt(); } } initial = initial + delta; buffer[i] = initial; if (spatial) { bufferX[i] = (short)(wrapped.get() & 0xFF); bufferY[i] = (short)(wrapped.get() & 0xFF); bufferR[i] = (short)(wrapped.get() & 0xFF); } } } else if (indexType == 1) { //zapis sredni, roznicowy, pojedynczy obraz kodowany roznicowo na 2 bajtach //flaga FFFF oznacza przekroczenie zakresu i zakodowanie na czterech bajtach int initial = 0; for (int i = 0; i < size; i++) { int delta = wrapped.getShort(); if (delta == 0xFFFF) { //nie zmiescilo sie, zapis czterobajtowy delta = wrapped.getInt(); } initial = initial + delta; buffer[i] = initial; } } this.images = buffer; if (spatial) { this.xCoord = bufferX; this.yCoord = bufferY; this.radius = bufferR; } return true; } catch (IOException ex) { //blad odczytu ex.printStackTrace(); return false; } } public boolean readIndexFromText(int word) { int[] data = null; File contentFile = new File(baseFolder, "index/" + word + ".txt"); Scanner scanner = null; try { scanner = new Scanner(contentFile, "UTF-8"); int cluster = scanner.nextInt(); if (cluster != word) { scanner.close(); return false; } int count = scanner.nextInt(); data = new int[count + 1]; for (int i = 0; i < count; i++) data[i] = scanner.nextInt(); scanner.close(); this.images = data; } catch (Exception ex) { ex.printStackTrace(); if (scanner != null) scanner.close(); return false; } return true; } public static List readContent() { List content = new ArrayList(); File contentFile = headerFile; BufferedReader in = null; try { in = new BufferedReader(new FileReader(contentFile)); String cntLine = in.readLine(); int count = Integer.parseInt(cntLine); for (int i = 0; i < count; i++) { String text = in.readLine(); content.add(text); } in.close(); } catch (Exception ex) { ex.printStackTrace(); if (in != null) try { in.close(); } catch (Exception ex2) {} } return content; } public static float[][] readStats() { float[][] content = null; File contentFile = imstatFile; BufferedReader in = null; try { in = new BufferedReader(new FileReader(contentFile)); String cntLine = in.readLine(); int count = Integer.parseInt(cntLine); content = new float[2][count]; for (int i = 0; i < count; i++) { String line = in.readLine(); String[] parts = line.split(" "); content[0][i] = Integer.parseInt(parts[0]); content[1][i] = Float.parseFloat(parts[1]); } in.close(); } catch (Exception ex) { ex.printStackTrace(); if (in != null) try { in.close(); } catch (Exception ex2) {} } return content; } public static float[] readWeights() { float[] content = null; File contentFile = weightFile; Scanner scanner = null; try { scanner = new Scanner(contentFile, "UTF-8"); int count = scanner.nextInt(); content = new float[count]; for (int i = 0; i < count; i++) { String val = scanner.next(); float idf = Float.parseFloat(val); content[i] = idf; } scanner.close(); } catch (Exception ex) { ex.printStackTrace(); if (scanner != null) scanner.close(); } return content; } }