forked from tabulapdf/tabula-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTable.java
More file actions
146 lines (116 loc) · 4.51 KB
/
Table.java
File metadata and controls
146 lines (116 loc) · 4.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package technology.tabula;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import technology.tabula.extractors.ExtractionAlgorithm;
@SuppressWarnings("serial")
public class Table extends Rectangle {
class CellPosition implements Comparable<CellPosition> {
int row, col;
CellPosition(int row, int col) {
this.row = row; this.col = col;
}
@Override
public boolean equals(Object other) {
if (this == other)
return true;
if (!(other instanceof CellPosition))
return false;
return other != null && this.row == ((CellPosition) other).row && this.col == ((CellPosition) other).col;
}
@Override
public int hashCode() {
return this.row * 100000 + this.col;
}
@Override
public int compareTo(CellPosition other) {
int rv = 0;
if(this.row < other.row) {
rv = -1;
}
else if (this.row > other.row) {
rv = 1;
}
else if (this.col > other.col) {
rv = 1;
}
else if (this.col < other.col) {
rv = -1;
}
return rv;
}
}
class CellContainer extends TreeMap<CellPosition, RectangularTextContainer> {
public int maxRow = 0, maxCol = 0;
public RectangularTextContainer get(int row, int col) {
return this.get(new CellPosition(row, col));
}
public List<RectangularTextContainer> getRow(int row) {
return new ArrayList<RectangularTextContainer>(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values());
}
@Override
public RectangularTextContainer put(CellPosition cp, RectangularTextContainer value) {
this.maxRow = Math.max(maxRow, cp.row);
this.maxCol = Math.max(maxCol, cp.col);
if (this.containsKey(cp)) { // adding on an existing CellPosition, concatenate content and resize
value.merge(this.get(cp));
}
super.put(cp, value);
return value;
}
@Override
public RectangularTextContainer get(Object key) {
return this.containsKey(key) ? super.get(key) : TextChunk.EMPTY;
}
public boolean containsKey(int row, int col) {
return this.containsKey(new CellPosition(row, col));
}
}
public static final Table EMPTY = new Table();
CellContainer cellContainer = new CellContainer();
Page page;
ExtractionAlgorithm extractionAlgorithm;
List<List<RectangularTextContainer>> rows = null;
public Table() {
super();
}
public Table(Page page, ExtractionAlgorithm extractionAlgorithm) {
this();
this.page = page;
this.extractionAlgorithm = extractionAlgorithm;
}
public void add(RectangularTextContainer tc, int i, int j) {
this.merge(tc);
this.cellContainer.put(new CellPosition(i, j), tc);
this.rows = null; // clear the memoized rows
}
public List<List<RectangularTextContainer>> getRows() {
if (this.rows != null) {
return this.rows;
}
this.rows = new ArrayList<List<RectangularTextContainer>>();
for (int i = 0; i <= this.cellContainer.maxRow; i++) {
List<RectangularTextContainer> lastRow = new ArrayList<RectangularTextContainer>();
this.rows.add(lastRow);
for (int j = 0; j <= this.cellContainer.maxCol; j++) {
lastRow.add(this.cellContainer.containsKey(i, j) ? this.cellContainer.get(i, j) : TextChunk.EMPTY);
}
}
return this.rows;
}
public RectangularTextContainer getCell(int i, int j) {
return this.cellContainer.get(i, j);
}
public List<List<RectangularTextContainer>> getCols() {
return Utils.transpose(this.getRows());
}
public void setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) {
this.extractionAlgorithm = extractionAlgorithm;
}
public ExtractionAlgorithm getExtractionAlgorithm() {
return extractionAlgorithm;
}
public List<RectangularTextContainer> getCells() {
return (List<RectangularTextContainer>) new ArrayList<RectangularTextContainer>(this.cellContainer.values());
}
}