package anotacja; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.StringTokenizer; public class TransferValueFinder { public static void main(String[] args) { File TMATFile = new File(args[0]); File TENTFile = new File(args[1]); File AnnotFolder = new File(args[2]); BalanceSets = Boolean.parseBoolean(args[3]); int AnnotN = Integer.parseInt(args[4]); int FileN = Integer.parseInt(args[5]); ArrayList[] AllValues = new ArrayList[AnnotN]; ArrayList[] TrueValues = new ArrayList[AnnotN]; ArrayList[] FalseValues = new ArrayList[AnnotN]; for (int i = 0; i < AnnotN; i++) { AllValues[i] = new ArrayList(); TrueValues[i] = new ArrayList(); FalseValues[i] = new ArrayList(); } try { System.out.println("Reading data..."); BufferedReader Reader = new BufferedReader(new FileReader(TMATFile)); String Line; while ((Line = Reader.readLine()) != null) { StringTokenizer TMATTokens = new StringTokenizer(Line, ";"); String FileName = new File(TMATTokens.nextToken()).getName(); FileName = FileName.substring(0, FileName.indexOf(".png") + 4); File AnnotFile = new File(AnnotFolder, FileName + ".annot"); LinkedList TrueAnnotations = new LinkedList(); BufferedReader TempRead = new BufferedReader(new FileReader(AnnotFile)); String AnnotLine = TempRead.readLine(); TempRead.close(); StringTokenizer AnnotTokens = new StringTokenizer(AnnotLine, " "); System.out.print("Processing: " + FileName); while (AnnotTokens.hasMoreTokens()) { int Current = Integer.parseInt(AnnotTokens.nextToken()); if (!TrueAnnotations.contains(Current)) TrueAnnotations.add(Current); } System.out.print(", found " + TrueAnnotations.size() + " annotations"); System.out.println(); int Added = 0; for (int i = 0; i < AnnotN; i++) { double T = Double.parseDouble(TMATTokens.nextToken()); AllValues[i].add(T); if (TrueAnnotations.contains(i)) { Added++; TrueValues[i].add(T); } else { FalseValues[i].add(T); } } if (Added != TrueAnnotations.size()) throw new Exception(Added + " vs. " + TrueAnnotations.size()); } System.out.println("Done"); System.out.println(); System.out.println("Sorting transfer value lists..."); for (int i = 0; i < AnnotN; i++) Collections.sort(AllValues[i]); System.out.println("Done"); System.out.println(); if (UseEntropy) System.out.println("Finding minimal entropy..."); else System.out.println("Finding maximal F-Score..."); double Accumulator = 0; PrintWriter Save = new PrintWriter(TENTFile); for (int i = 0; i < AnnotN; i++) { if (UseEntropy) { double BestT = 0; double CurrentT; double BestEntropy = Double.POSITIVE_INFINITY; double CurrentEntropy; for (int t = 1; t < FileN; t++) { CurrentT = (AllValues[i].get(t) + AllValues[i].get(t-1))/2.0; CurrentEntropy = FindEntropy(TrueValues[i], FalseValues[i], CurrentT); if (CurrentEntropy <= BestEntropy) { BestEntropy = CurrentEntropy; BestT = CurrentT; } } double CurrentFScore = FindFScore(TrueValues[i], FalseValues[i], BestT); if (CurrentFScore == 0.0) BestT = Double.POSITIVE_INFINITY; Save.println(BestT); System.out.println("Annotation " + i + ": " + BestT + ", with F-score: " + CurrentFScore + ", Entropy: " + BestEntropy); Accumulator += CurrentFScore; } else { double BestT = 0; double CurrentT; double BestFScore = 0; double CurrentFScore; for (int t = 1; t < FileN; t++) { CurrentT = (AllValues[i].get(t) + AllValues[i].get(t-1))/2.0; CurrentFScore = FindFScore(TrueValues[i], FalseValues[i], CurrentT); if (CurrentFScore >= BestFScore) { BestFScore = CurrentFScore; BestT = CurrentT; } } if (BestFScore == 0.0) BestT = Double.POSITIVE_INFINITY; Save.println(BestT); System.out.println("Annotation " + i + ": " + BestT + ", with F-score: " + BestFScore); Accumulator += BestFScore; } } Save.close(); System.out.println(); System.out.println("Average: " + (Accumulator / AnnotN)); } catch (Exception Ex) { Ex.printStackTrace(); } } public static boolean UseEntropy = true; public static double FindEntropy(ListTrue, ListFalse, double T) { int TP = 0; int TN = 0; int FP = 0; int FN = 0; for (Double D : True) if (D > T) TP++; else FN++; for (Double D : False) if (D > T) FP++; else TN++; if (BalanceSets) { TP *= False.size(); FN *= False.size(); TN *= True.size(); FP *= True.size(); } double EntropyA1 = TP > 0 ? ((double)TP/(double)(TP+FP)) * Math.log((double)TP/(double)(TP+FP)) / Math.log(2) : 0; double EntropyA2 = FP > 0 ? ((double)FP/(double)(TP+FP)) * Math.log((double)FP/(double)(TP+FP)) / Math.log(2) : 0; double EntropyA = (NonWeightedEntropy ? 1.0 : ((double)(TP+FP)/(double)(TP+FP+FN+TN))) * (EntropyA1 + EntropyA2); double EntropyB1 = FN > 0 ? ((double)FN/(double)(FN+TN)) * Math.log((double)FN/(double)(FN+TN)) / Math.log(2) : 0; double EntropyB2 = TN > 0 ? ((double)TN/(double)(FN+TN)) * Math.log((double)TN/(double)(FN+TN)) / Math.log(2) : 0; double EntropyB = (NonWeightedEntropy ? 1.0 : ((double)(FN+TN)/(double)(TP+FP+FN+TN))) * (EntropyB1 + EntropyB2); return - (EntropyA + EntropyB); } public static double Beta = 1.0; public static boolean NonWeightedEntropy = false; public static boolean BalanceSets = false; public static double FindFScore(ListTrue, ListFalse, double T) { int TP = 0; int TN = 0; int FP = 0; int FN = 0; for (Double D : True) if (D > T) TP++; else FN++; for (Double D : False) if (D > T) FP++; else TN++; double Return = ((1.0 + (Beta * Beta)) * (double)TP) / (((1.0 + (Beta * Beta)) * (double)TP) + (Beta * Beta * (double)FN) + ((double)FP)); return (new Double(Return)).equals(Double.NaN) ? 0.0 : Return; } }