package dokumenty.tabele; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.StringTokenizer; import dokumenty.test.ThresholdAccuracyTest; public class VectorGraph { public VectorNode[] Nodes; public VectorSubGraph[] Graphs; public VectorSubGraph LargestGraph; public double VertexFactor = 0; public double ContentFactor = 0; public int[] XCellSpacing; public int[] YCellSpacing; public VectorGraph(String FromFile) { System.out.println("Reading graph from file: " + FromFile); try { BufferedReader Source = new BufferedReader(new FileReader(FromFile)); int NodeNumber = Integer.parseInt(Source.readLine()); Nodes = new VectorNode[NodeNumber]; for (int i = 0; i < NodeNumber; i++) { StringTokenizer Tokens = new StringTokenizer(Source.readLine()); Nodes[i] = new VectorNode(Integer.parseInt(Tokens.nextToken()), Integer.parseInt(Tokens.nextToken())); } for (int i = 0; i < NodeNumber; i++) { StringTokenizer Tokens = new StringTokenizer(Source.readLine()); Nodes[i].Connected = new int[Tokens.countTokens()]; for (int j = 0; j < Nodes[i].Connected.length; j++)Nodes[i].Connected[j] = Integer.parseInt(Tokens.nextToken()); } Source.close(); } catch (Exception Ex) { Ex.printStackTrace(); } System.out.println("\tGraph loaded..."); System.out.println("\tNode total: " + Nodes.length); List> SubGraphs = new LinkedList>(); for (int i = 0; i < Nodes.length; i++) if (!NodeUsedYet(i, SubGraphs)) SubGraphs.add(JoinNodesFromSeed(i)); Graphs = new VectorSubGraph[SubGraphs.size()]; for (int i = 0; i < Graphs.length; i++) Graphs[i] = new VectorSubGraph(SubGraphs.get(i)); int MaxSize = 0; for (int i = 0; i < Graphs.length; i++) if (Graphs[i].Size > MaxSize) { MaxSize = Graphs[i].Size; LargestGraph = Graphs[i]; } System.out.println("\tGraph total: " + SubGraphs.size()); System.out.println("\tLargest: " + LargestGraph.X + "x" + LargestGraph.Y + " = " + LargestGraph.Size); int VFUpper = 0; int VFLower = 0; for (int i = 0; i < LargestGraph.Nodes.length; i++) { int Temp = AssessNodeType(LargestGraph.Nodes[i]); if (Temp > 0) VFUpper++; if (Temp >= 0) VFLower++; } double CorrectVertices = (double)VFUpper / (double)VFLower; if (VFUpper == 0 && VFLower == 0) CorrectVertices = 0; System.out.println("\tOrtogonal Nodes: " + VFUpper + " of " + VFLower); System.out.println("Vertex Factor: " + CorrectVertices); try { /*BufferedReader Source = new BufferedReader(new FileReader(FromFile.replace(".vector", ".grid"))); StringTokenizer Tokens = new StringTokenizer(Source.readLine()); XCellSpacing = new int[Tokens.countTokens()]; for (int i = 0; i < XCellSpacing.length; i++) XCellSpacing[i] = Integer.parseInt(Tokens.nextToken()); Tokens = new StringTokenizer(Source.readLine()); YCellSpacing = new int[Tokens.countTokens()]; for (int i = 0; i < YCellSpacing.length; i++) YCellSpacing[i] = Integer.parseInt(Tokens.nextToken());*/ XCellSpacing = EstimateXCellSpacing(); YCellSpacing = EstimateYCellSpacing(); } catch (Exception Ex) { Ex.printStackTrace(); } if (XCellSpacing.length > 2 && YCellSpacing.length > 2) { int Yes = 0; int No = 0; for (int x = 1; x < XCellSpacing.length; x++) for (int y = 1; y < YCellSpacing.length; y++) { boolean Is = false; for (VectorSubGraph G : Graphs) if (G != LargestGraph) { if (G.MinX >= XCellSpacing[x-1] && G.MaxX <= XCellSpacing[x] && G.MinY >= YCellSpacing[y-1] && G.MaxY <= YCellSpacing[y]) Is = true; /*if (G.MinX <= XCellSpacing[x] && G.MaxX >= XCellSpacing[x-1] && G.MinY <= YCellSpacing[y] && G.MaxY >= YCellSpacing[y-1]) Is = true;*/ } if (Is) Yes += (XCellSpacing[x] - XCellSpacing[x-1]) * (YCellSpacing[y] - YCellSpacing[y-1]); else No += (XCellSpacing[x] - XCellSpacing[x-1]) * (YCellSpacing[y] - YCellSpacing[y-1]); } ContentFactor = Yes / (Yes + No); } System.out.println("Content Factor: " + ContentFactor); System.out.print("X Spacing: "); for (int i : XCellSpacing) System.out.print(i + " "); System.out.println(); System.out.print("Y Spacing: "); for (int i : YCellSpacing) System.out.print(i + " "); System.out.println(); VertexFactor = CorrectVertices; } private int[] EstimateYCellSpacing() { Map Values = new HashMap(); for (VectorNode V: LargestGraph.Nodes) if (AssessNodeType(V) > 0) { int Value = 0; boolean Found = false; for (int i = V.Y - 5; i <= V.Y + 5; i++) if (Values.containsKey(i)) { Value = i; Found = true; break; } if (Found) { Values.put(Value, Values.get(Value) + 1); } else { Values.put(V.Y, 1); } } Set Keys = Values.keySet(); Set ToRemove = new HashSet(); for (Integer I : Keys) if (Values.get(I) < 3) ToRemove.add(I); for (Integer I : ToRemove) Values.remove(I); int[] Final = new int[Values.size()]; int N = 0; for (Integer I : Values.keySet()) { Final[N] = I; N++; } Arrays.sort(Final); return Final; } private int[] EstimateXCellSpacing() { Map Values = new HashMap(); for (VectorNode V: LargestGraph.Nodes) { int Value = 0; boolean Found = false; for (int i = V.X - 5; i <= V.X + 5; i++) if (Values.containsKey(i)) { Value = i; Found = true; break; } if (Found) { Values.put(Value, Values.get(Value) + 1); } else { Values.put(V.X, 1); } } Set Keys = Values.keySet(); Set ToRemove = new HashSet(); for (Integer I : Keys) if (Values.get(I) < 3) ToRemove.add(I); for (Integer I : ToRemove) Values.remove(I); int[] Final = new int[Values.size()]; int N = 0; for (Integer I : Values.keySet()) { Final[N] = I; N++; } Arrays.sort(Final); return Final; } private int AssessNodeType(VectorNode Node) { if (Node.Connected.length > 4) return 0; boolean AllOrtogonal = true; boolean[] Directions = new boolean[]{false, false, false, false}; for (int i = 0; i < Node.Connected.length; i++) { if (Math.abs(Node.X - Nodes[Node.Connected[i]].X) < 10 && Math.abs(Node.Y - Nodes[Node.Connected[i]].Y) < 10) return -1; } for (int i = 0; i < Node.Connected.length; i++) { double Angle = Node.GetAngle(Nodes[Node.Connected[i]].X, Nodes[Node.Connected[i]].Y); if (Math.abs(Angle - 0) < 0.2) { if (Directions[0]) AllOrtogonal = false; else Directions[0] = true; } else if (Math.abs(Angle - (Math.PI / 2)) < 0.2) { if (Directions[1]) AllOrtogonal = false; else Directions[1] = true; } else if (Math.abs(Math.abs(Angle) - Math.PI) < 0.2) { if (Directions[2]) AllOrtogonal = false; else Directions[2] = true; } else if (Math.abs(Angle + (Math.PI / 2)) < 0.2) { if (Directions[3]) AllOrtogonal = false; else Directions[3] = true; } else AllOrtogonal = false; } if (Node.Connected.length == 2 && (Directions[0] && Directions[2])) return -1; if (Node.Connected.length == 2 && (Directions[1] && Directions[3])) return -1; if (AllOrtogonal) return Node.Connected.length; return 0; } private List JoinNodesFromSeed(int Seed) { LinkedList Return = new LinkedList(); Queue Temp = new LinkedList(); Temp.add(Nodes[Seed]); while (!Temp.isEmpty()) { VectorNode Current = Temp.remove(); Return.add(Current); for (int i = 0; i < Current.Connected.length; i++) if (!Return.contains(Nodes[Current.Connected[i]]) && !Temp.contains(Nodes[Current.Connected[i]])) Temp.add(Nodes[Current.Connected[i]]); } return Return; } private boolean NodeUsedYet(int Node, List> SubGraphs) { for (List L : SubGraphs) if (L.contains(Nodes[Node])) return true; return false; } static public void main(String[] args) { System.out.println("VG build: 18.07.2011"); String BasePath = ""; double VFTreshold = 0; double CFTreshold = 0; try { if (args.length < 3) throw new IllegalArgumentException(); BasePath = args[0]; VFTreshold = Double.parseDouble(args[1]); CFTreshold = Double.parseDouble(args[2]); } catch (Exception Ex) { System.out.println("Illegal execution arguments!"); System.out.println("Should be: java -Xmx1024m -Xms512m -jar VTD.jar [database path] [Vector factor treshold] [Content factor treshold]"); return; } int FilesProcessed = 0; //int TP = 0; //int TN = 0; //int FP = 0; //int FN = 0; double AvgVertexFactor = 0; double MinVertexFactor = Double.MAX_VALUE; double MaxVertexFactor = Double.MIN_VALUE; File[] AllFiles = (new File(BasePath)).listFiles(); try { FileWriter Stream = new FileWriter("log.txt"); BufferedWriter Out = new BufferedWriter(Stream); for (File F : AllFiles) if (F.getName().endsWith(".vector")) { FilesProcessed++; VectorGraph Test = new VectorGraph(F.getAbsolutePath()); if (Test.VertexFactor < MinVertexFactor && Test.VertexFactor > 0) MinVertexFactor = Test.VertexFactor; if (Test.VertexFactor > MaxVertexFactor) MaxVertexFactor = Test.VertexFactor; if (Test.VertexFactor > VFTreshold && Test.XCellSpacing.length > 2 && Test.YCellSpacing.length > 2 && Test.ContentFactor > CFTreshold) { //if (F.getName().toLowerCase().contains("table")) TP++; //else FP++; Out.write(F.getName() + " => table\n"); } else { //if (F.getName().toLowerCase().contains("table")) FN++; //else TN++; Out.write(F.getName() + " => other\n"); } AvgVertexFactor += Test.VertexFactor; } AvgVertexFactor /= FilesProcessed; System.out.println(); System.out.println("Files processed: " + FilesProcessed); //System.out.println("Precision: " + ((double)TP / (double)(TP + FP))); //System.out.println("Recall: " + ((double)TP / (double)(TP + FN))); //System.out.println("FP: " + FP); //System.out.println("FN: " + FN); } catch (Exception Ex) { System.out.println(Ex.getMessage()); Ex.printStackTrace(); } } }