1use crate::edges::{Edge, EdgeSource};
7use crate::geometry::{BBox, Orientation};
8use crate::text::Char;
9use crate::words::{Word, WordExtractor, WordOptions};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum Strategy {
15 #[default]
17 Lattice,
18 LatticeStrict,
20 Stream,
22 Explicit,
24}
25
26#[derive(Debug, Clone, PartialEq)]
30#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31pub struct TableSettings {
32 pub strategy: Strategy,
34 pub snap_tolerance: f64,
36 pub snap_x_tolerance: f64,
38 pub snap_y_tolerance: f64,
40 pub join_tolerance: f64,
42 pub join_x_tolerance: f64,
44 pub join_y_tolerance: f64,
46 pub edge_min_length: f64,
48 pub min_words_vertical: usize,
50 pub min_words_horizontal: usize,
52 pub text_tolerance: f64,
54 pub text_x_tolerance: f64,
56 pub text_y_tolerance: f64,
58 pub intersection_tolerance: f64,
60 pub intersection_x_tolerance: f64,
62 pub intersection_y_tolerance: f64,
64 pub explicit_lines: Option<ExplicitLines>,
66}
67
68impl Default for TableSettings {
69 fn default() -> Self {
70 Self {
71 strategy: Strategy::default(),
72 snap_tolerance: 3.0,
73 snap_x_tolerance: 3.0,
74 snap_y_tolerance: 3.0,
75 join_tolerance: 3.0,
76 join_x_tolerance: 3.0,
77 join_y_tolerance: 3.0,
78 edge_min_length: 3.0,
79 min_words_vertical: 3,
80 min_words_horizontal: 1,
81 text_tolerance: 3.0,
82 text_x_tolerance: 3.0,
83 text_y_tolerance: 3.0,
84 intersection_tolerance: 3.0,
85 intersection_x_tolerance: 3.0,
86 intersection_y_tolerance: 3.0,
87 explicit_lines: None,
88 }
89 }
90}
91
92#[derive(Debug, Clone, PartialEq)]
94#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
95pub struct ExplicitLines {
96 pub horizontal_lines: Vec<f64>,
98 pub vertical_lines: Vec<f64>,
100}
101
102#[derive(Debug, Clone, PartialEq)]
104#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
105pub struct Cell {
106 pub bbox: BBox,
108 pub text: Option<String>,
110}
111
112#[derive(Debug, Clone, PartialEq)]
114#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
115pub struct Table {
116 pub bbox: BBox,
118 pub cells: Vec<Cell>,
120 pub rows: Vec<Vec<Cell>>,
122 pub columns: Vec<Vec<Cell>>,
124}
125
126pub fn snap_edges(edges: Vec<Edge>, snap_x_tolerance: f64, snap_y_tolerance: f64) -> Vec<Edge> {
136 let mut result = Vec::with_capacity(edges.len());
137 let mut horizontals: Vec<Edge> = Vec::new();
138 let mut verticals: Vec<Edge> = Vec::new();
139
140 for edge in edges {
141 match edge.orientation {
142 Orientation::Horizontal => horizontals.push(edge),
143 Orientation::Vertical => verticals.push(edge),
144 Orientation::Diagonal => result.push(edge),
145 }
146 }
147
148 snap_group(
150 &mut horizontals,
151 snap_y_tolerance,
152 |e| e.top,
153 |e, v| {
154 e.top = v;
155 e.bottom = v;
156 },
157 );
158 result.extend(horizontals);
159
160 snap_group(
162 &mut verticals,
163 snap_x_tolerance,
164 |e| e.x0,
165 |e, v| {
166 e.x0 = v;
167 e.x1 = v;
168 },
169 );
170 result.extend(verticals);
171
172 result
173}
174
175fn snap_group<F, G>(edges: &mut [Edge], tolerance: f64, key: F, mut set: G)
177where
178 F: Fn(&Edge) -> f64,
179 G: FnMut(&mut Edge, f64),
180{
181 if edges.is_empty() {
182 return;
183 }
184
185 edges.sort_by(|a, b| key(a).partial_cmp(&key(b)).unwrap());
187
188 let mut cluster_start = 0;
190 for i in 1..=edges.len() {
191 let end_of_cluster =
192 i == edges.len() || (key(&edges[i]) - key(&edges[cluster_start])).abs() > tolerance;
193 if end_of_cluster {
194 let sum: f64 = (cluster_start..i).map(|j| key(&edges[j])).sum();
196 let mean = sum / (i - cluster_start) as f64;
197 for edge in &mut edges[cluster_start..i] {
198 set(edge, mean);
199 }
200 cluster_start = i;
201 }
202 }
203}
204
205pub fn join_edge_group(
214 edges: Vec<Edge>,
215 join_x_tolerance: f64,
216 join_y_tolerance: f64,
217) -> Vec<Edge> {
218 let mut result: Vec<Edge> = Vec::new();
219 let mut horizontals: Vec<Edge> = Vec::new();
220 let mut verticals: Vec<Edge> = Vec::new();
221
222 for edge in edges {
223 match edge.orientation {
224 Orientation::Horizontal => horizontals.push(edge),
225 Orientation::Vertical => verticals.push(edge),
226 Orientation::Diagonal => result.push(edge),
227 }
228 }
229
230 result.extend(join_collinear(
232 horizontals,
233 |e| e.top,
234 |e| (e.x0, e.x1),
235 |proto, start, end| Edge {
236 x0: start,
237 top: proto.top,
238 x1: end,
239 bottom: proto.bottom,
240 orientation: proto.orientation,
241 source: proto.source,
242 },
243 join_x_tolerance,
244 ));
245
246 result.extend(join_collinear(
248 verticals,
249 |e| e.x0,
250 |e| (e.top, e.bottom),
251 |proto, start, end| Edge {
252 x0: proto.x0,
253 top: start,
254 x1: proto.x1,
255 bottom: end,
256 orientation: proto.orientation,
257 source: proto.source,
258 },
259 join_y_tolerance,
260 ));
261
262 result
263}
264
265fn join_collinear<K, S, B>(
267 mut edges: Vec<Edge>,
268 key: K,
269 span: S,
270 build: B,
271 tolerance: f64,
272) -> Vec<Edge>
273where
274 K: Fn(&Edge) -> f64,
275 S: Fn(&Edge) -> (f64, f64),
276 B: Fn(&Edge, f64, f64) -> Edge,
277{
278 if edges.is_empty() {
279 return Vec::new();
280 }
281
282 edges.sort_by(|a, b| {
284 key(a)
285 .partial_cmp(&key(b))
286 .unwrap()
287 .then_with(|| span(a).0.partial_cmp(&span(b).0).unwrap())
288 });
289
290 let mut result = Vec::new();
291 let mut i = 0;
292
293 while i < edges.len() {
294 let group_key = key(&edges[i]);
296 let mut j = i + 1;
297 while j < edges.len() && (key(&edges[j]) - group_key).abs() < 1e-9 {
298 j += 1;
299 }
300
301 let (mut cur_start, mut cur_end) = span(&edges[i]);
303 let mut proto_idx = i;
304
305 for k in (i + 1)..j {
306 let (s, e) = span(&edges[k]);
307 if s <= cur_end + tolerance {
308 if e > cur_end {
310 cur_end = e;
311 }
312 } else {
313 result.push(build(&edges[proto_idx], cur_start, cur_end));
315 cur_start = s;
316 cur_end = e;
317 proto_idx = k;
318 }
319 }
320 result.push(build(&edges[proto_idx], cur_start, cur_end));
321
322 i = j;
323 }
324
325 result
326}
327
328#[derive(Debug, Clone, PartialEq)]
330#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
331pub struct Intersection {
332 pub x: f64,
334 pub y: f64,
336}
337
338pub fn edges_to_intersections(
347 edges: &[Edge],
348 x_tolerance: f64,
349 y_tolerance: f64,
350) -> Vec<Intersection> {
351 let horizontals: Vec<&Edge> = edges
352 .iter()
353 .filter(|e| e.orientation == Orientation::Horizontal)
354 .collect();
355 let verticals: Vec<&Edge> = edges
356 .iter()
357 .filter(|e| e.orientation == Orientation::Vertical)
358 .collect();
359
360 let mut intersections = Vec::new();
361
362 for h in &horizontals {
363 let h_y = h.top; for v in &verticals {
365 let v_x = v.x0; if v_x >= h.x0 - x_tolerance
370 && v_x <= h.x1 + x_tolerance
371 && h_y >= v.top - y_tolerance
372 && h_y <= v.bottom + y_tolerance
373 {
374 intersections.push(Intersection { x: v_x, y: h_y });
375 }
376 }
377 }
378
379 intersections.sort_by(|a, b| {
381 a.x.partial_cmp(&b.x)
382 .unwrap()
383 .then_with(|| a.y.partial_cmp(&b.y).unwrap())
384 });
385 intersections.dedup_by(|a, b| (a.x - b.x).abs() < 1e-9 && (a.y - b.y).abs() < 1e-9);
386
387 intersections
388}
389
390pub fn intersections_to_cells(intersections: &[Intersection]) -> Vec<Cell> {
397 if intersections.is_empty() {
398 return Vec::new();
399 }
400
401 let mut xs: Vec<f64> = Vec::new();
403 let mut ys: Vec<f64> = Vec::new();
404
405 for pt in intersections {
406 if !xs.iter().any(|&x| (x - pt.x).abs() < 1e-9) {
407 xs.push(pt.x);
408 }
409 if !ys.iter().any(|&y| (y - pt.y).abs() < 1e-9) {
410 ys.push(pt.y);
411 }
412 }
413
414 xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
415 ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
416
417 let has_point = |x: f64, y: f64| -> bool {
419 intersections
420 .iter()
421 .any(|pt| (pt.x - x).abs() < 1e-9 && (pt.y - y).abs() < 1e-9)
422 };
423
424 let mut cells = Vec::new();
425
426 for yi in 0..ys.len().saturating_sub(1) {
428 for xi in 0..xs.len().saturating_sub(1) {
429 let x0 = xs[xi];
430 let x1 = xs[xi + 1];
431 let top = ys[yi];
432 let bottom = ys[yi + 1];
433
434 if has_point(x0, top)
435 && has_point(x1, top)
436 && has_point(x0, bottom)
437 && has_point(x1, bottom)
438 {
439 cells.push(Cell {
440 bbox: BBox::new(x0, top, x1, bottom),
441 text: None,
442 });
443 }
444 }
445 }
446
447 cells
448}
449
450pub fn cells_to_tables(cells: Vec<Cell>) -> Vec<Table> {
458 if cells.is_empty() {
459 return Vec::new();
460 }
461
462 let n = cells.len();
463
464 let mut parent: Vec<usize> = (0..n).collect();
466
467 fn find(parent: &mut [usize], mut i: usize) -> usize {
468 while parent[i] != i {
469 parent[i] = parent[parent[i]]; i = parent[i];
471 }
472 i
473 }
474
475 fn union(parent: &mut [usize], a: usize, b: usize) {
476 let ra = find(parent, a);
477 let rb = find(parent, b);
478 if ra != rb {
479 parent[rb] = ra;
480 }
481 }
482
483 for i in 0..n {
487 for j in (i + 1)..n {
488 if cells_share_edge(&cells[i], &cells[j]) {
489 union(&mut parent, i, j);
490 }
491 }
492 }
493
494 let mut groups: std::collections::HashMap<usize, Vec<usize>> = std::collections::HashMap::new();
496 for i in 0..n {
497 let root = find(&mut parent, i);
498 groups.entry(root).or_default().push(i);
499 }
500
501 let mut tables: Vec<Table> = groups
503 .into_values()
504 .map(|indices| {
505 let group_cells: Vec<Cell> = indices.iter().map(|&i| cells[i].clone()).collect();
506
507 let mut bbox = group_cells[0].bbox;
509 for cell in &group_cells[1..] {
510 bbox = bbox.union(&cell.bbox);
511 }
512
513 let mut row_map: std::collections::BTreeMap<i64, Vec<Cell>> =
515 std::collections::BTreeMap::new();
516 for cell in &group_cells {
517 let key = float_key(cell.bbox.top);
518 row_map.entry(key).or_default().push(cell.clone());
519 }
520 let rows: Vec<Vec<Cell>> = row_map
521 .into_values()
522 .map(|mut row| {
523 row.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
524 row
525 })
526 .collect();
527
528 let mut col_map: std::collections::BTreeMap<i64, Vec<Cell>> =
530 std::collections::BTreeMap::new();
531 for cell in &group_cells {
532 let key = float_key(cell.bbox.x0);
533 col_map.entry(key).or_default().push(cell.clone());
534 }
535 let columns: Vec<Vec<Cell>> = col_map
536 .into_values()
537 .map(|mut col| {
538 col.sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
539 col
540 })
541 .collect();
542
543 Table {
544 bbox,
545 cells: group_cells,
546 rows,
547 columns,
548 }
549 })
550 .collect();
551
552 tables.sort_by(|a, b| {
554 a.bbox
555 .top
556 .partial_cmp(&b.bbox.top)
557 .unwrap()
558 .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
559 });
560
561 tables
562}
563
564fn cells_share_edge(a: &Cell, b: &Cell) -> bool {
566 let eps = 1e-6;
567
568 let shared_vertical = ((a.bbox.x1 - b.bbox.x0).abs() < eps
571 || (a.bbox.x0 - b.bbox.x1).abs() < eps)
572 && a.bbox.top < b.bbox.bottom + eps
573 && b.bbox.top < a.bbox.bottom + eps;
574
575 let shared_horizontal = ((a.bbox.bottom - b.bbox.top).abs() < eps
578 || (a.bbox.top - b.bbox.bottom).abs() < eps)
579 && a.bbox.x0 < b.bbox.x1 + eps
580 && b.bbox.x0 < a.bbox.x1 + eps;
581
582 shared_vertical || shared_horizontal
583}
584
585fn float_key(v: f64) -> i64 {
587 (v * 1000.0).round() as i64
588}
589
590fn edge_length(edge: &Edge) -> f64 {
592 let dx = edge.x1 - edge.x0;
593 let dy = edge.bottom - edge.top;
594 (dx * dx + dy * dy).sqrt()
595}
596
597pub fn extract_text_for_cells(cells: &mut [Cell], chars: &[Char]) {
606 let options = WordOptions::default();
607
608 for cell in cells.iter_mut() {
609 let cell_chars: Vec<Char> = chars
611 .iter()
612 .filter(|ch| {
613 let cx = (ch.bbox.x0 + ch.bbox.x1) / 2.0;
614 let cy = (ch.bbox.top + ch.bbox.bottom) / 2.0;
615 cx >= cell.bbox.x0
616 && cx <= cell.bbox.x1
617 && cy >= cell.bbox.top
618 && cy <= cell.bbox.bottom
619 })
620 .cloned()
621 .collect();
622
623 if cell_chars.is_empty() {
624 cell.text = None;
625 continue;
626 }
627
628 let words = WordExtractor::extract(&cell_chars, &options);
630 if words.is_empty() {
631 cell.text = None;
632 continue;
633 }
634
635 let mut sorted_words: Vec<&crate::words::Word> = words.iter().collect();
637 sorted_words.sort_by(|a, b| {
638 a.bbox
639 .top
640 .partial_cmp(&b.bbox.top)
641 .unwrap()
642 .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
643 });
644
645 let mut lines: Vec<Vec<&crate::words::Word>> = Vec::new();
646 for word in &sorted_words {
647 let added = lines.last_mut().and_then(|line| {
648 let last_top = line[0].bbox.top;
649 if (word.bbox.top - last_top).abs() <= options.y_tolerance {
650 line.push(word);
651 Some(())
652 } else {
653 None
654 }
655 });
656 if added.is_none() {
657 lines.push(vec![word]);
658 }
659 }
660
661 let text: String = lines
663 .iter()
664 .map(|line| {
665 line.iter()
666 .map(|w| w.text.as_str())
667 .collect::<Vec<_>>()
668 .join(" ")
669 })
670 .collect::<Vec<_>>()
671 .join("\n");
672
673 cell.text = Some(text);
674 }
675}
676
677pub fn words_to_edges_stream(
689 words: &[Word],
690 text_x_tolerance: f64,
691 text_y_tolerance: f64,
692 min_words_vertical: usize,
693 min_words_horizontal: usize,
694) -> Vec<Edge> {
695 if words.is_empty() {
696 return Vec::new();
697 }
698
699 let mut edges = Vec::new();
700
701 edges.extend(cluster_words_to_edges(
703 words,
704 |w| w.bbox.x0,
705 text_x_tolerance,
706 min_words_vertical,
707 EdgeKind::Vertical,
708 ));
709
710 edges.extend(cluster_words_to_edges(
712 words,
713 |w| w.bbox.x1,
714 text_x_tolerance,
715 min_words_vertical,
716 EdgeKind::Vertical,
717 ));
718
719 edges.extend(cluster_words_to_edges(
721 words,
722 |w| w.bbox.top,
723 text_y_tolerance,
724 min_words_horizontal,
725 EdgeKind::Horizontal,
726 ));
727
728 edges.extend(cluster_words_to_edges(
730 words,
731 |w| w.bbox.bottom,
732 text_y_tolerance,
733 min_words_horizontal,
734 EdgeKind::Horizontal,
735 ));
736
737 edges
738}
739
740enum EdgeKind {
742 Vertical,
743 Horizontal,
744}
745
746fn cluster_words_to_edges<F>(
748 words: &[Word],
749 key: F,
750 tolerance: f64,
751 min_words: usize,
752 kind: EdgeKind,
753) -> Vec<Edge>
754where
755 F: Fn(&Word) -> f64,
756{
757 if words.is_empty() || min_words == 0 {
758 return Vec::new();
759 }
760
761 let mut indices: Vec<usize> = (0..words.len()).collect();
763 indices.sort_by(|&a, &b| key(&words[a]).partial_cmp(&key(&words[b])).unwrap());
764
765 let mut edges = Vec::new();
766 let mut cluster_start = 0;
767
768 for i in 1..=indices.len() {
769 let end_of_cluster = i == indices.len()
770 || (key(&words[indices[i]]) - key(&words[indices[cluster_start]])).abs() > tolerance;
771
772 if end_of_cluster {
773 let cluster_size = i - cluster_start;
774 if cluster_size >= min_words {
775 let sum: f64 = (cluster_start..i).map(|j| key(&words[indices[j]])).sum();
777 let mean_pos = sum / cluster_size as f64;
778
779 let cluster_words: Vec<&Word> =
781 (cluster_start..i).map(|j| &words[indices[j]]).collect();
782
783 match kind {
784 EdgeKind::Vertical => {
785 let min_top = cluster_words
786 .iter()
787 .map(|w| w.bbox.top)
788 .fold(f64::INFINITY, f64::min);
789 let max_bottom = cluster_words
790 .iter()
791 .map(|w| w.bbox.bottom)
792 .fold(f64::NEG_INFINITY, f64::max);
793 edges.push(Edge {
794 x0: mean_pos,
795 top: min_top,
796 x1: mean_pos,
797 bottom: max_bottom,
798 orientation: Orientation::Vertical,
799 source: EdgeSource::Stream,
800 });
801 }
802 EdgeKind::Horizontal => {
803 let min_x0 = cluster_words
804 .iter()
805 .map(|w| w.bbox.x0)
806 .fold(f64::INFINITY, f64::min);
807 let max_x1 = cluster_words
808 .iter()
809 .map(|w| w.bbox.x1)
810 .fold(f64::NEG_INFINITY, f64::max);
811 edges.push(Edge {
812 x0: min_x0,
813 top: mean_pos,
814 x1: max_x1,
815 bottom: mean_pos,
816 orientation: Orientation::Horizontal,
817 source: EdgeSource::Stream,
818 });
819 }
820 }
821 }
822 cluster_start = i;
823 }
824 }
825
826 edges
827}
828
829pub fn explicit_lines_to_edges(explicit: &ExplicitLines) -> Vec<Edge> {
837 if explicit.horizontal_lines.is_empty() || explicit.vertical_lines.is_empty() {
838 return Vec::new();
839 }
840
841 let min_x = explicit
842 .vertical_lines
843 .iter()
844 .copied()
845 .fold(f64::INFINITY, f64::min);
846 let max_x = explicit
847 .vertical_lines
848 .iter()
849 .copied()
850 .fold(f64::NEG_INFINITY, f64::max);
851 let min_y = explicit
852 .horizontal_lines
853 .iter()
854 .copied()
855 .fold(f64::INFINITY, f64::min);
856 let max_y = explicit
857 .horizontal_lines
858 .iter()
859 .copied()
860 .fold(f64::NEG_INFINITY, f64::max);
861
862 let mut edges = Vec::new();
863
864 for &y in &explicit.horizontal_lines {
866 edges.push(Edge {
867 x0: min_x,
868 top: y,
869 x1: max_x,
870 bottom: y,
871 orientation: Orientation::Horizontal,
872 source: EdgeSource::Explicit,
873 });
874 }
875
876 for &x in &explicit.vertical_lines {
878 edges.push(Edge {
879 x0: x,
880 top: min_y,
881 x1: x,
882 bottom: max_y,
883 orientation: Orientation::Vertical,
884 source: EdgeSource::Explicit,
885 });
886 }
887
888 edges
889}
890
891pub struct TableFinder {
896 edges: Vec<Edge>,
898 words: Vec<Word>,
900 settings: TableSettings,
902}
903
904impl TableFinder {
905 pub fn new(edges: Vec<Edge>, settings: TableSettings) -> Self {
907 Self {
908 edges,
909 words: Vec::new(),
910 settings,
911 }
912 }
913
914 pub fn new_with_words(edges: Vec<Edge>, words: Vec<Word>, settings: TableSettings) -> Self {
919 Self {
920 edges,
921 words,
922 settings,
923 }
924 }
925
926 pub fn settings(&self) -> &TableSettings {
928 &self.settings
929 }
930
931 pub fn edges(&self) -> &[Edge] {
933 &self.edges
934 }
935
936 pub fn find_tables(&self) -> Vec<Table> {
946 let edges: Vec<Edge> = match self.settings.strategy {
948 Strategy::LatticeStrict => self
949 .edges
950 .iter()
951 .filter(|e| e.source == EdgeSource::Line)
952 .cloned()
953 .collect(),
954 Strategy::Stream => {
955 words_to_edges_stream(
957 &self.words,
958 self.settings.text_x_tolerance,
959 self.settings.text_y_tolerance,
960 self.settings.min_words_vertical,
961 self.settings.min_words_horizontal,
962 )
963 }
964 Strategy::Explicit => {
965 let mut edges = self.edges.clone();
967
968 if let Some(ref explicit) = self.settings.explicit_lines {
969 let mut min_x = f64::INFINITY;
971 let mut max_x = f64::NEG_INFINITY;
972 let mut min_y = f64::INFINITY;
973 let mut max_y = f64::NEG_INFINITY;
974
975 for e in &edges {
976 min_x = min_x.min(e.x0);
977 max_x = max_x.max(e.x1);
978 min_y = min_y.min(e.top);
979 max_y = max_y.max(e.bottom);
980 }
981 for &x in &explicit.vertical_lines {
982 min_x = min_x.min(x);
983 max_x = max_x.max(x);
984 }
985 for &y in &explicit.horizontal_lines {
986 min_y = min_y.min(y);
987 max_y = max_y.max(y);
988 }
989
990 if min_x <= max_x && min_y <= max_y {
991 for &y in &explicit.horizontal_lines {
992 edges.push(Edge {
993 x0: min_x,
994 top: y,
995 x1: max_x,
996 bottom: y,
997 orientation: Orientation::Horizontal,
998 source: EdgeSource::Explicit,
999 });
1000 }
1001 for &x in &explicit.vertical_lines {
1002 edges.push(Edge {
1003 x0: x,
1004 top: min_y,
1005 x1: x,
1006 bottom: max_y,
1007 orientation: Orientation::Vertical,
1008 source: EdgeSource::Explicit,
1009 });
1010 }
1011 }
1012 }
1013
1014 edges
1015 }
1016 Strategy::Lattice => self.edges.clone(),
1018 };
1019
1020 let min_len = self.settings.edge_min_length;
1022 let edges: Vec<Edge> = edges
1023 .into_iter()
1024 .filter(|e| edge_length(e) >= min_len)
1025 .collect();
1026
1027 if edges.is_empty() {
1028 return Vec::new();
1029 }
1030
1031 let edges = snap_edges(
1033 edges,
1034 self.settings.snap_x_tolerance,
1035 self.settings.snap_y_tolerance,
1036 );
1037
1038 let edges = join_edge_group(
1040 edges,
1041 self.settings.join_x_tolerance,
1042 self.settings.join_y_tolerance,
1043 );
1044
1045 let intersections = edges_to_intersections(
1047 &edges,
1048 self.settings.intersection_x_tolerance,
1049 self.settings.intersection_y_tolerance,
1050 );
1051
1052 let cells = intersections_to_cells(&intersections);
1054
1055 cells_to_tables(cells)
1057 }
1058}
1059
1060#[cfg(test)]
1061mod tests {
1062 use super::*;
1063 use crate::geometry::Orientation;
1064
1065 #[test]
1068 fn test_strategy_default_is_lattice() {
1069 assert_eq!(Strategy::default(), Strategy::Lattice);
1070 }
1071
1072 #[test]
1073 fn test_strategy_variants_are_distinct() {
1074 let strategies = [
1075 Strategy::Lattice,
1076 Strategy::LatticeStrict,
1077 Strategy::Stream,
1078 Strategy::Explicit,
1079 ];
1080 for i in 0..strategies.len() {
1081 for j in (i + 1)..strategies.len() {
1082 assert_ne!(strategies[i], strategies[j]);
1083 }
1084 }
1085 }
1086
1087 #[test]
1088 fn test_strategy_copy() {
1089 let s = Strategy::Stream;
1090 let s2 = s;
1091 assert_eq!(s, s2);
1092 }
1093
1094 #[test]
1097 fn test_table_settings_default_values() {
1098 let settings = TableSettings::default();
1099 assert_eq!(settings.strategy, Strategy::Lattice);
1100 assert_eq!(settings.snap_tolerance, 3.0);
1101 assert_eq!(settings.snap_x_tolerance, 3.0);
1102 assert_eq!(settings.snap_y_tolerance, 3.0);
1103 assert_eq!(settings.join_tolerance, 3.0);
1104 assert_eq!(settings.join_x_tolerance, 3.0);
1105 assert_eq!(settings.join_y_tolerance, 3.0);
1106 assert_eq!(settings.edge_min_length, 3.0);
1107 assert_eq!(settings.min_words_vertical, 3);
1108 assert_eq!(settings.min_words_horizontal, 1);
1109 assert_eq!(settings.text_tolerance, 3.0);
1110 assert_eq!(settings.text_x_tolerance, 3.0);
1111 assert_eq!(settings.text_y_tolerance, 3.0);
1112 assert_eq!(settings.intersection_tolerance, 3.0);
1113 assert_eq!(settings.intersection_x_tolerance, 3.0);
1114 assert_eq!(settings.intersection_y_tolerance, 3.0);
1115 assert!(settings.explicit_lines.is_none());
1116 }
1117
1118 #[test]
1119 fn test_table_settings_custom_construction() {
1120 let settings = TableSettings {
1121 strategy: Strategy::Stream,
1122 snap_tolerance: 5.0,
1123 min_words_vertical: 5,
1124 min_words_horizontal: 2,
1125 ..TableSettings::default()
1126 };
1127 assert_eq!(settings.strategy, Strategy::Stream);
1128 assert_eq!(settings.snap_tolerance, 5.0);
1129 assert_eq!(settings.min_words_vertical, 5);
1130 assert_eq!(settings.min_words_horizontal, 2);
1131 assert_eq!(settings.join_tolerance, 3.0);
1133 assert_eq!(settings.edge_min_length, 3.0);
1134 }
1135
1136 #[test]
1137 fn test_table_settings_with_explicit_lines() {
1138 let settings = TableSettings {
1139 strategy: Strategy::Explicit,
1140 explicit_lines: Some(ExplicitLines {
1141 horizontal_lines: vec![10.0, 50.0, 100.0],
1142 vertical_lines: vec![20.0, 80.0, 140.0],
1143 }),
1144 ..TableSettings::default()
1145 };
1146 assert_eq!(settings.strategy, Strategy::Explicit);
1147 let lines = settings.explicit_lines.as_ref().unwrap();
1148 assert_eq!(lines.horizontal_lines.len(), 3);
1149 assert_eq!(lines.vertical_lines.len(), 3);
1150 }
1151
1152 #[test]
1153 fn test_table_settings_strategy_selection() {
1154 for strategy in [
1155 Strategy::Lattice,
1156 Strategy::LatticeStrict,
1157 Strategy::Stream,
1158 Strategy::Explicit,
1159 ] {
1160 let settings = TableSettings {
1161 strategy,
1162 ..TableSettings::default()
1163 };
1164 assert_eq!(settings.strategy, strategy);
1165 }
1166 }
1167
1168 #[test]
1171 fn test_cell_with_text() {
1172 let cell = Cell {
1173 bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1174 text: Some("Hello".to_string()),
1175 };
1176 assert_eq!(cell.bbox.x0, 10.0);
1177 assert_eq!(cell.text.as_deref(), Some("Hello"));
1178 }
1179
1180 #[test]
1181 fn test_cell_without_text() {
1182 let cell = Cell {
1183 bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1184 text: None,
1185 };
1186 assert!(cell.text.is_none());
1187 }
1188
1189 #[test]
1192 fn test_table_construction() {
1193 let cells = vec![
1194 Cell {
1195 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1196 text: Some("A".to_string()),
1197 },
1198 Cell {
1199 bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1200 text: Some("B".to_string()),
1201 },
1202 ];
1203 let table = Table {
1204 bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
1205 cells: cells.clone(),
1206 rows: vec![cells.clone()],
1207 columns: vec![vec![cells[0].clone()], vec![cells[1].clone()]],
1208 };
1209 assert_eq!(table.bbox.x0, 0.0);
1210 assert_eq!(table.bbox.x1, 100.0);
1211 assert_eq!(table.cells.len(), 2);
1212 assert_eq!(table.rows.len(), 1);
1213 assert_eq!(table.rows[0].len(), 2);
1214 assert_eq!(table.columns.len(), 2);
1215 }
1216
1217 #[test]
1218 fn test_table_multi_row() {
1219 let row1 = vec![
1220 Cell {
1221 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1222 text: Some("A1".to_string()),
1223 },
1224 Cell {
1225 bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1226 text: Some("B1".to_string()),
1227 },
1228 ];
1229 let row2 = vec![
1230 Cell {
1231 bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1232 text: Some("A2".to_string()),
1233 },
1234 Cell {
1235 bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1236 text: Some("B2".to_string()),
1237 },
1238 ];
1239 let all_cells: Vec<Cell> = row1.iter().chain(row2.iter()).cloned().collect();
1240 let table = Table {
1241 bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
1242 cells: all_cells,
1243 rows: vec![row1, row2],
1244 columns: vec![
1245 vec![
1246 Cell {
1247 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1248 text: Some("A1".to_string()),
1249 },
1250 Cell {
1251 bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1252 text: Some("A2".to_string()),
1253 },
1254 ],
1255 vec![
1256 Cell {
1257 bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1258 text: Some("B1".to_string()),
1259 },
1260 Cell {
1261 bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1262 text: Some("B2".to_string()),
1263 },
1264 ],
1265 ],
1266 };
1267 assert_eq!(table.rows.len(), 2);
1268 assert_eq!(table.columns.len(), 2);
1269 assert_eq!(table.cells.len(), 4);
1270 }
1271
1272 #[test]
1275 fn test_table_finder_construction() {
1276 let edges = vec![Edge {
1277 x0: 0.0,
1278 top: 50.0,
1279 x1: 100.0,
1280 bottom: 50.0,
1281 orientation: Orientation::Horizontal,
1282 source: crate::edges::EdgeSource::Line,
1283 }];
1284 let settings = TableSettings::default();
1285 let finder = TableFinder::new(edges.clone(), settings.clone());
1286
1287 assert_eq!(finder.edges().len(), 1);
1288 assert_eq!(finder.settings().strategy, Strategy::Lattice);
1289 }
1290
1291 #[test]
1292 fn test_table_finder_empty_edges() {
1293 let finder = TableFinder::new(Vec::new(), TableSettings::default());
1294 assert!(finder.edges().is_empty());
1295 let tables = finder.find_tables();
1296 assert!(tables.is_empty());
1297 }
1298
1299 #[test]
1300 fn test_table_finder_custom_settings() {
1301 let settings = TableSettings {
1302 strategy: Strategy::LatticeStrict,
1303 snap_tolerance: 5.0,
1304 ..TableSettings::default()
1305 };
1306 let finder = TableFinder::new(Vec::new(), settings);
1307 assert_eq!(finder.settings().strategy, Strategy::LatticeStrict);
1308 assert_eq!(finder.settings().snap_tolerance, 5.0);
1309 }
1310
1311 #[test]
1314 fn test_explicit_lines_construction() {
1315 let lines = ExplicitLines {
1316 horizontal_lines: vec![0.0, 30.0, 60.0],
1317 vertical_lines: vec![0.0, 50.0, 100.0],
1318 };
1319 assert_eq!(lines.horizontal_lines.len(), 3);
1320 assert_eq!(lines.vertical_lines.len(), 3);
1321 assert_eq!(lines.horizontal_lines[1], 30.0);
1322 assert_eq!(lines.vertical_lines[2], 100.0);
1323 }
1324
1325 #[test]
1326 fn test_explicit_lines_empty() {
1327 let lines = ExplicitLines {
1328 horizontal_lines: Vec::new(),
1329 vertical_lines: Vec::new(),
1330 };
1331 assert!(lines.horizontal_lines.is_empty());
1332 assert!(lines.vertical_lines.is_empty());
1333 }
1334
1335 fn make_h_edge(x0: f64, y: f64, x1: f64) -> Edge {
1338 Edge {
1339 x0,
1340 top: y,
1341 x1,
1342 bottom: y,
1343 orientation: Orientation::Horizontal,
1344 source: crate::edges::EdgeSource::Line,
1345 }
1346 }
1347
1348 fn make_v_edge(x: f64, top: f64, bottom: f64) -> Edge {
1349 Edge {
1350 x0: x,
1351 top,
1352 x1: x,
1353 bottom,
1354 orientation: Orientation::Vertical,
1355 source: crate::edges::EdgeSource::Line,
1356 }
1357 }
1358
1359 fn assert_approx(a: f64, b: f64) {
1360 assert!(
1361 (a - b).abs() < 1e-6,
1362 "expected {b}, got {a}, diff={}",
1363 (a - b).abs()
1364 );
1365 }
1366
1367 #[test]
1368 fn test_snap_edges_empty() {
1369 let result = snap_edges(Vec::new(), 3.0, 3.0);
1370 assert!(result.is_empty());
1371 }
1372
1373 #[test]
1374 fn test_snap_nearby_horizontal_lines() {
1375 let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 51.5, 100.0)];
1378 let result = snap_edges(edges, 3.0, 3.0);
1379
1380 let horizontals: Vec<&Edge> = result
1381 .iter()
1382 .filter(|e| e.orientation == Orientation::Horizontal)
1383 .collect();
1384 assert_eq!(horizontals.len(), 2);
1385 assert_approx(horizontals[0].top, 50.75);
1386 assert_approx(horizontals[0].bottom, 50.75);
1387 assert_approx(horizontals[1].top, 50.75);
1388 assert_approx(horizontals[1].bottom, 50.75);
1389 }
1390
1391 #[test]
1392 fn test_snap_nearby_vertical_lines() {
1393 let edges = vec![
1396 make_v_edge(100.0, 0.0, 200.0),
1397 make_v_edge(101.0, 0.0, 200.0),
1398 ];
1399 let result = snap_edges(edges, 3.0, 3.0);
1400
1401 let verticals: Vec<&Edge> = result
1402 .iter()
1403 .filter(|e| e.orientation == Orientation::Vertical)
1404 .collect();
1405 assert_eq!(verticals.len(), 2);
1406 assert_approx(verticals[0].x0, 100.5);
1407 assert_approx(verticals[0].x1, 100.5);
1408 assert_approx(verticals[1].x0, 100.5);
1409 assert_approx(verticals[1].x1, 100.5);
1410 }
1411
1412 #[test]
1413 fn test_snap_edges_far_apart_remain_unchanged() {
1414 let edges = vec![
1416 make_h_edge(0.0, 50.0, 100.0),
1417 make_h_edge(0.0, 100.0, 100.0),
1418 ];
1419 let result = snap_edges(edges, 3.0, 3.0);
1420
1421 let horizontals: Vec<&Edge> = result
1422 .iter()
1423 .filter(|e| e.orientation == Orientation::Horizontal)
1424 .collect();
1425 assert_eq!(horizontals.len(), 2);
1426 let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1428 ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1429 assert_approx(ys[0], 50.0);
1430 assert_approx(ys[1], 100.0);
1431 }
1432
1433 #[test]
1434 fn test_snap_edges_separate_x_y_tolerance() {
1435 let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 52.0, 100.0)];
1438 let result = snap_edges(edges, 3.0, 1.0);
1439
1440 let horizontals: Vec<&Edge> = result
1441 .iter()
1442 .filter(|e| e.orientation == Orientation::Horizontal)
1443 .collect();
1444 let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1445 ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1446 assert_approx(ys[0], 50.0);
1447 assert_approx(ys[1], 52.0);
1448 }
1449
1450 #[test]
1451 fn test_snap_edges_separate_x_tolerance() {
1452 let edges = vec![
1455 make_v_edge(100.0, 0.0, 200.0),
1456 make_v_edge(102.0, 0.0, 200.0),
1457 ];
1458 let result = snap_edges(edges, 1.0, 3.0);
1459
1460 let verticals: Vec<&Edge> = result
1461 .iter()
1462 .filter(|e| e.orientation == Orientation::Vertical)
1463 .collect();
1464 let mut xs: Vec<f64> = verticals.iter().map(|e| e.x0).collect();
1465 xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
1466 assert_approx(xs[0], 100.0);
1467 assert_approx(xs[1], 102.0);
1468 }
1469
1470 #[test]
1471 fn test_snap_edges_does_not_merge() {
1472 let edges = vec![
1474 make_h_edge(0.0, 50.0, 100.0),
1475 make_h_edge(10.0, 51.0, 90.0),
1476 make_h_edge(20.0, 50.5, 80.0),
1477 ];
1478 let result = snap_edges(edges, 3.0, 3.0);
1479
1480 let horizontals: Vec<&Edge> = result
1481 .iter()
1482 .filter(|e| e.orientation == Orientation::Horizontal)
1483 .collect();
1484 assert_eq!(horizontals.len(), 3);
1486 for h in &horizontals {
1488 assert_approx(h.top, 50.5);
1489 assert_approx(h.bottom, 50.5);
1490 }
1491 }
1492
1493 #[test]
1494 fn test_snap_edges_preserves_along_axis_coords() {
1495 let edges = vec![
1497 make_h_edge(10.0, 50.0, 200.0),
1498 make_h_edge(30.0, 51.0, 180.0),
1499 ];
1500 let result = snap_edges(edges, 3.0, 3.0);
1501
1502 let horizontals: Vec<&Edge> = result
1503 .iter()
1504 .filter(|e| e.orientation == Orientation::Horizontal)
1505 .collect();
1506 let mut found_10 = false;
1508 let mut found_30 = false;
1509 for h in &horizontals {
1510 if (h.x0 - 10.0).abs() < 1e-6 {
1511 assert_approx(h.x1, 200.0);
1512 found_10 = true;
1513 }
1514 if (h.x0 - 30.0).abs() < 1e-6 {
1515 assert_approx(h.x1, 180.0);
1516 found_30 = true;
1517 }
1518 }
1519 assert!(found_10 && found_30, "x-coordinates should be preserved");
1520 }
1521
1522 #[test]
1523 fn test_snap_edges_mixed_orientations() {
1524 let edges = vec![
1526 make_h_edge(0.0, 50.0, 100.0),
1527 make_h_edge(0.0, 51.0, 100.0),
1528 make_v_edge(200.0, 0.0, 100.0),
1529 make_v_edge(201.0, 0.0, 100.0),
1530 ];
1531 let result = snap_edges(edges, 3.0, 3.0);
1532 assert_eq!(result.len(), 4);
1533
1534 let horizontals: Vec<&Edge> = result
1535 .iter()
1536 .filter(|e| e.orientation == Orientation::Horizontal)
1537 .collect();
1538 let verticals: Vec<&Edge> = result
1539 .iter()
1540 .filter(|e| e.orientation == Orientation::Vertical)
1541 .collect();
1542
1543 for h in &horizontals {
1545 assert_approx(h.top, 50.5);
1546 }
1547 for v in &verticals {
1549 assert_approx(v.x0, 200.5);
1550 }
1551 }
1552
1553 #[test]
1554 fn test_snap_edges_multiple_clusters() {
1555 let edges = vec![
1557 make_h_edge(0.0, 10.0, 100.0),
1558 make_h_edge(0.0, 11.0, 100.0),
1559 make_h_edge(0.0, 50.0, 100.0),
1561 make_h_edge(0.0, 51.0, 100.0),
1562 make_h_edge(0.0, 100.0, 100.0),
1564 make_h_edge(0.0, 101.0, 100.0),
1565 ];
1566 let result = snap_edges(edges, 3.0, 3.0);
1567
1568 let horizontals: Vec<&Edge> = result
1569 .iter()
1570 .filter(|e| e.orientation == Orientation::Horizontal)
1571 .collect();
1572 assert_eq!(horizontals.len(), 6);
1573
1574 let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1575 ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1576 assert_approx(ys[0], 10.5);
1578 assert_approx(ys[1], 10.5);
1579 assert_approx(ys[2], 50.5);
1581 assert_approx(ys[3], 50.5);
1582 assert_approx(ys[4], 100.5);
1584 assert_approx(ys[5], 100.5);
1585 }
1586
1587 #[test]
1588 fn test_snap_edges_single_edge_unchanged() {
1589 let edges = vec![make_h_edge(0.0, 50.0, 100.0)];
1590 let result = snap_edges(edges, 3.0, 3.0);
1591 assert_eq!(result.len(), 1);
1592 assert_approx(result[0].top, 50.0);
1593 assert_approx(result[0].bottom, 50.0);
1594 }
1595
1596 #[test]
1597 fn test_snap_edges_diagonal_passed_through() {
1598 let edges = vec![
1599 Edge {
1600 x0: 0.0,
1601 top: 0.0,
1602 x1: 100.0,
1603 bottom: 100.0,
1604 orientation: Orientation::Diagonal,
1605 source: crate::edges::EdgeSource::Curve,
1606 },
1607 make_h_edge(0.0, 50.0, 100.0),
1608 ];
1609 let result = snap_edges(edges, 3.0, 3.0);
1610 assert_eq!(result.len(), 2);
1611
1612 let diagonals: Vec<&Edge> = result
1613 .iter()
1614 .filter(|e| e.orientation == Orientation::Diagonal)
1615 .collect();
1616 assert_eq!(diagonals.len(), 1);
1617 assert_approx(diagonals[0].x0, 0.0);
1619 assert_approx(diagonals[0].top, 0.0);
1620 assert_approx(diagonals[0].x1, 100.0);
1621 assert_approx(diagonals[0].bottom, 100.0);
1622 }
1623
1624 #[test]
1627 fn test_join_edge_group_empty() {
1628 let result = join_edge_group(Vec::new(), 3.0, 3.0);
1629 assert!(result.is_empty());
1630 }
1631
1632 #[test]
1633 fn test_join_edge_group_single_edge_unchanged() {
1634 let edges = vec![make_h_edge(10.0, 50.0, 80.0)];
1635 let result = join_edge_group(edges, 3.0, 3.0);
1636 assert_eq!(result.len(), 1);
1637 assert_approx(result[0].x0, 10.0);
1638 assert_approx(result[0].x1, 80.0);
1639 }
1640
1641 #[test]
1642 fn test_join_two_overlapping_horizontal_edges() {
1643 let edges = vec![make_h_edge(10.0, 50.0, 60.0), make_h_edge(40.0, 50.0, 90.0)];
1646 let result = join_edge_group(edges, 3.0, 3.0);
1647 assert_eq!(result.len(), 1);
1648 assert_approx(result[0].x0, 10.0);
1649 assert_approx(result[0].x1, 90.0);
1650 assert_approx(result[0].top, 50.0);
1651 }
1652
1653 #[test]
1654 fn test_join_two_adjacent_horizontal_edges_within_tolerance() {
1655 let edges = vec![make_h_edge(10.0, 50.0, 50.0), make_h_edge(52.0, 50.0, 90.0)];
1658 let result = join_edge_group(edges, 3.0, 3.0);
1659 assert_eq!(result.len(), 1);
1660 assert_approx(result[0].x0, 10.0);
1661 assert_approx(result[0].x1, 90.0);
1662 }
1663
1664 #[test]
1665 fn test_join_distant_horizontal_edges_not_merged() {
1666 let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(60.0, 50.0, 90.0)];
1669 let result = join_edge_group(edges, 3.0, 3.0);
1670 assert_eq!(result.len(), 2);
1671 }
1672
1673 #[test]
1674 fn test_join_chain_of_three_horizontal_segments() {
1675 let edges = vec![
1678 make_h_edge(10.0, 50.0, 40.0),
1679 make_h_edge(38.0, 50.0, 70.0),
1680 make_h_edge(68.0, 50.0, 100.0),
1681 ];
1682 let result = join_edge_group(edges, 3.0, 3.0);
1683 assert_eq!(result.len(), 1);
1684 assert_approx(result[0].x0, 10.0);
1685 assert_approx(result[0].x1, 100.0);
1686 }
1687
1688 #[test]
1689 fn test_join_two_overlapping_vertical_edges() {
1690 let edges = vec![make_v_edge(50.0, 10.0, 60.0), make_v_edge(50.0, 40.0, 90.0)];
1693 let result = join_edge_group(edges, 3.0, 3.0);
1694 assert_eq!(result.len(), 1);
1695 assert_approx(result[0].top, 10.0);
1696 assert_approx(result[0].bottom, 90.0);
1697 assert_approx(result[0].x0, 50.0);
1698 }
1699
1700 #[test]
1701 fn test_join_adjacent_vertical_edges_within_tolerance() {
1702 let edges = vec![make_v_edge(50.0, 10.0, 50.0), make_v_edge(50.0, 52.0, 90.0)];
1705 let result = join_edge_group(edges, 3.0, 3.0);
1706 assert_eq!(result.len(), 1);
1707 assert_approx(result[0].top, 10.0);
1708 assert_approx(result[0].bottom, 90.0);
1709 }
1710
1711 #[test]
1712 fn test_join_groups_by_collinear_position() {
1713 let edges = vec![
1717 make_h_edge(10.0, 50.0, 50.0),
1718 make_h_edge(48.0, 50.0, 90.0),
1719 make_h_edge(10.0, 100.0, 40.0),
1720 make_h_edge(60.0, 100.0, 90.0),
1721 ];
1722 let result = join_edge_group(edges, 3.0, 3.0);
1723 assert_eq!(result.len(), 3);
1724
1725 let at_50: Vec<&Edge> = result
1726 .iter()
1727 .filter(|e| (e.top - 50.0).abs() < 1e-6)
1728 .collect();
1729 assert_eq!(at_50.len(), 1);
1730 assert_approx(at_50[0].x0, 10.0);
1731 assert_approx(at_50[0].x1, 90.0);
1732
1733 let at_100: Vec<&Edge> = result
1734 .iter()
1735 .filter(|e| (e.top - 100.0).abs() < 1e-6)
1736 .collect();
1737 assert_eq!(at_100.len(), 2);
1738 }
1739
1740 #[test]
1741 fn test_join_mixed_orientations() {
1742 let edges = vec![
1744 make_h_edge(10.0, 50.0, 50.0),
1745 make_h_edge(48.0, 50.0, 90.0),
1746 make_v_edge(200.0, 10.0, 50.0),
1747 make_v_edge(200.0, 48.0, 90.0),
1748 ];
1749 let result = join_edge_group(edges, 3.0, 3.0);
1750 assert_eq!(result.len(), 2);
1751
1752 let horizontals: Vec<&Edge> = result
1753 .iter()
1754 .filter(|e| e.orientation == Orientation::Horizontal)
1755 .collect();
1756 assert_eq!(horizontals.len(), 1);
1757 assert_approx(horizontals[0].x0, 10.0);
1758 assert_approx(horizontals[0].x1, 90.0);
1759
1760 let verticals: Vec<&Edge> = result
1761 .iter()
1762 .filter(|e| e.orientation == Orientation::Vertical)
1763 .collect();
1764 assert_eq!(verticals.len(), 1);
1765 assert_approx(verticals[0].top, 10.0);
1766 assert_approx(verticals[0].bottom, 90.0);
1767 }
1768
1769 #[test]
1770 fn test_join_separate_x_y_tolerance() {
1771 let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(44.0, 50.0, 80.0)];
1773 let result = join_edge_group(edges, 3.0, 3.0);
1774 assert_eq!(result.len(), 2);
1775
1776 let edges = vec![make_v_edge(50.0, 10.0, 40.0), make_v_edge(50.0, 44.0, 80.0)];
1778 let result = join_edge_group(edges, 3.0, 5.0);
1779 assert_eq!(result.len(), 1);
1780 }
1781
1782 #[test]
1783 fn test_join_diagonal_edges_pass_through() {
1784 let diag = Edge {
1785 x0: 0.0,
1786 top: 0.0,
1787 x1: 100.0,
1788 bottom: 100.0,
1789 orientation: Orientation::Diagonal,
1790 source: crate::edges::EdgeSource::Curve,
1791 };
1792 let edges = vec![diag.clone(), make_h_edge(10.0, 50.0, 90.0)];
1793 let result = join_edge_group(edges, 3.0, 3.0);
1794 assert_eq!(result.len(), 2);
1795
1796 let diagonals: Vec<&Edge> = result
1797 .iter()
1798 .filter(|e| e.orientation == Orientation::Diagonal)
1799 .collect();
1800 assert_eq!(diagonals.len(), 1);
1801 assert_approx(diagonals[0].x0, 0.0);
1802 assert_approx(diagonals[0].bottom, 100.0);
1803 }
1804
1805 #[test]
1806 fn test_snap_edges_zero_tolerance() {
1807 let edges = vec![
1809 make_h_edge(0.0, 50.0, 100.0),
1810 make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 50.1, 100.0), ];
1813 let result = snap_edges(edges, 0.0, 0.0);
1814
1815 let horizontals: Vec<&Edge> = result
1816 .iter()
1817 .filter(|e| e.orientation == Orientation::Horizontal)
1818 .collect();
1819 assert_eq!(horizontals.len(), 3);
1820 let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1821 ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1822 assert_approx(ys[0], 50.0);
1823 assert_approx(ys[1], 50.0);
1824 assert_approx(ys[2], 50.1);
1825 }
1826
1827 fn has_intersection(intersections: &[Intersection], x: f64, y: f64) -> bool {
1830 intersections
1831 .iter()
1832 .any(|i| (i.x - x).abs() < 1e-6 && (i.y - y).abs() < 1e-6)
1833 }
1834
1835 #[test]
1836 fn test_intersections_empty_edges() {
1837 let result = edges_to_intersections(&[], 3.0, 3.0);
1838 assert!(result.is_empty());
1839 }
1840
1841 #[test]
1842 fn test_intersections_simple_cross() {
1843 let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 100.0)];
1847 let result = edges_to_intersections(&edges, 3.0, 3.0);
1848 assert_eq!(result.len(), 1);
1849 assert!(has_intersection(&result, 50.0, 50.0));
1850 }
1851
1852 #[test]
1853 fn test_intersections_t_intersection() {
1854 let edges = vec![
1858 make_h_edge(0.0, 50.0, 100.0),
1859 make_v_edge(50.0, 50.0, 100.0),
1860 ];
1861 let result = edges_to_intersections(&edges, 3.0, 3.0);
1862 assert_eq!(result.len(), 1);
1863 assert!(has_intersection(&result, 50.0, 50.0));
1864 }
1865
1866 #[test]
1867 fn test_intersections_l_intersection_corner() {
1868 let edges = vec![make_h_edge(50.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 50.0)];
1872 let result = edges_to_intersections(&edges, 3.0, 3.0);
1873 assert_eq!(result.len(), 1);
1874 assert!(has_intersection(&result, 50.0, 50.0));
1875 }
1876
1877 #[test]
1878 fn test_intersections_no_intersection_parallel() {
1879 let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 80.0, 100.0)];
1881 let result = edges_to_intersections(&edges, 3.0, 3.0);
1882 assert!(result.is_empty());
1883 }
1884
1885 #[test]
1886 fn test_intersections_no_intersection_non_overlapping() {
1887 let edges = vec![make_h_edge(0.0, 50.0, 40.0), make_v_edge(60.0, 0.0, 100.0)];
1891 let result = edges_to_intersections(&edges, 3.0, 3.0);
1892 assert!(result.is_empty());
1893 }
1894
1895 #[test]
1896 fn test_intersections_tolerance_based() {
1897 let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
1901 let result = edges_to_intersections(&edges, 3.0, 3.0);
1902 assert_eq!(result.len(), 1);
1903 assert!(has_intersection(&result, 50.0, 50.0));
1904 }
1905
1906 #[test]
1907 fn test_intersections_tolerance_y_based() {
1908 let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 48.0)];
1912 let result = edges_to_intersections(&edges, 3.0, 3.0);
1913 assert_eq!(result.len(), 1);
1914 assert!(has_intersection(&result, 50.0, 50.0));
1915 }
1916
1917 #[test]
1918 fn test_intersections_beyond_tolerance_no_match() {
1919 let edges = vec![make_h_edge(0.0, 50.0, 45.0), make_v_edge(50.0, 0.0, 100.0)];
1923 let result = edges_to_intersections(&edges, 3.0, 3.0);
1924 assert!(result.is_empty());
1925 }
1926
1927 #[test]
1928 fn test_intersections_grid_2x2() {
1929 let edges = vec![
1933 make_h_edge(0.0, 0.0, 100.0),
1934 make_h_edge(0.0, 50.0, 100.0),
1935 make_h_edge(0.0, 100.0, 100.0),
1936 make_v_edge(0.0, 0.0, 100.0),
1937 make_v_edge(50.0, 0.0, 100.0),
1938 make_v_edge(100.0, 0.0, 100.0),
1939 ];
1940 let result = edges_to_intersections(&edges, 3.0, 3.0);
1941 assert_eq!(result.len(), 9);
1942 assert!(has_intersection(&result, 0.0, 0.0));
1944 assert!(has_intersection(&result, 100.0, 0.0));
1945 assert!(has_intersection(&result, 0.0, 100.0));
1946 assert!(has_intersection(&result, 100.0, 100.0));
1947 assert!(has_intersection(&result, 50.0, 50.0));
1949 }
1950
1951 #[test]
1952 fn test_intersections_ignores_diagonal_edges() {
1953 let edges = vec![
1955 Edge {
1956 x0: 0.0,
1957 top: 0.0,
1958 x1: 100.0,
1959 bottom: 100.0,
1960 orientation: Orientation::Diagonal,
1961 source: crate::edges::EdgeSource::Curve,
1962 },
1963 make_h_edge(0.0, 50.0, 100.0),
1964 ];
1965 let result = edges_to_intersections(&edges, 3.0, 3.0);
1966 assert!(result.is_empty());
1967 }
1968
1969 #[test]
1970 fn test_intersections_multiple_h_one_v() {
1971 let edges = vec![
1975 make_h_edge(0.0, 10.0, 100.0),
1976 make_h_edge(0.0, 50.0, 100.0),
1977 make_h_edge(0.0, 90.0, 100.0),
1978 make_v_edge(50.0, 0.0, 100.0),
1979 ];
1980 let result = edges_to_intersections(&edges, 3.0, 3.0);
1981 assert_eq!(result.len(), 3);
1982 assert!(has_intersection(&result, 50.0, 10.0));
1983 assert!(has_intersection(&result, 50.0, 50.0));
1984 assert!(has_intersection(&result, 50.0, 90.0));
1985 }
1986
1987 #[test]
1988 fn test_intersections_separate_x_y_tolerance() {
1989 let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
1993 let result = edges_to_intersections(&edges, 1.0, 3.0);
1994 assert!(result.is_empty());
1995
1996 let result = edges_to_intersections(&edges, 3.0, 3.0);
1998 assert_eq!(result.len(), 1);
1999 }
2000
2001 #[test]
2002 fn test_intersections_no_duplicate_points() {
2003 let edges = vec![
2007 make_h_edge(0.0, 50.0, 100.0),
2008 make_h_edge(20.0, 50.0, 80.0),
2009 make_v_edge(50.0, 0.0, 100.0),
2010 ];
2011 let result = edges_to_intersections(&edges, 3.0, 3.0);
2012 assert_eq!(result.len(), 1);
2015 assert!(has_intersection(&result, 50.0, 50.0));
2016 }
2017
2018 fn make_intersection(x: f64, y: f64) -> Intersection {
2021 Intersection { x, y }
2022 }
2023
2024 #[test]
2025 fn test_intersections_to_cells_empty() {
2026 let result = intersections_to_cells(&[]);
2027 assert!(result.is_empty());
2028 }
2029
2030 #[test]
2031 fn test_intersections_to_cells_simple_2x2_grid() {
2032 let intersections = vec![
2036 make_intersection(0.0, 0.0),
2037 make_intersection(100.0, 0.0),
2038 make_intersection(0.0, 50.0),
2039 make_intersection(100.0, 50.0),
2040 ];
2041 let cells = intersections_to_cells(&intersections);
2042 assert_eq!(cells.len(), 1);
2043 assert_approx(cells[0].bbox.x0, 0.0);
2044 assert_approx(cells[0].bbox.top, 0.0);
2045 assert_approx(cells[0].bbox.x1, 100.0);
2046 assert_approx(cells[0].bbox.bottom, 50.0);
2047 assert!(cells[0].text.is_none());
2048 }
2049
2050 #[test]
2051 fn test_intersections_to_cells_3x3_grid() {
2052 let intersections = vec![
2057 make_intersection(0.0, 0.0),
2058 make_intersection(50.0, 0.0),
2059 make_intersection(100.0, 0.0),
2060 make_intersection(0.0, 30.0),
2061 make_intersection(50.0, 30.0),
2062 make_intersection(100.0, 30.0),
2063 make_intersection(0.0, 60.0),
2064 make_intersection(50.0, 60.0),
2065 make_intersection(100.0, 60.0),
2066 ];
2067 let cells = intersections_to_cells(&intersections);
2068 assert_eq!(cells.len(), 4);
2069
2070 assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2072 && (c.bbox.top - 0.0).abs() < 1e-6
2073 && (c.bbox.x1 - 50.0).abs() < 1e-6
2074 && (c.bbox.bottom - 30.0).abs() < 1e-6));
2075 assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2077 && (c.bbox.top - 0.0).abs() < 1e-6
2078 && (c.bbox.x1 - 100.0).abs() < 1e-6
2079 && (c.bbox.bottom - 30.0).abs() < 1e-6));
2080 assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2082 && (c.bbox.top - 30.0).abs() < 1e-6
2083 && (c.bbox.x1 - 50.0).abs() < 1e-6
2084 && (c.bbox.bottom - 60.0).abs() < 1e-6));
2085 assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2087 && (c.bbox.top - 30.0).abs() < 1e-6
2088 && (c.bbox.x1 - 100.0).abs() < 1e-6
2089 && (c.bbox.bottom - 60.0).abs() < 1e-6));
2090 }
2091
2092 #[test]
2093 fn test_intersections_to_cells_missing_corner() {
2094 let intersections = vec![
2098 make_intersection(0.0, 0.0),
2099 make_intersection(100.0, 0.0),
2100 make_intersection(0.0, 50.0),
2101 ];
2102 let cells = intersections_to_cells(&intersections);
2103 assert!(cells.is_empty());
2104 }
2105
2106 #[test]
2107 fn test_intersections_to_cells_irregular_grid() {
2108 let intersections = vec![
2118 make_intersection(0.0, 0.0),
2119 make_intersection(50.0, 0.0),
2120 make_intersection(100.0, 0.0),
2121 make_intersection(0.0, 30.0),
2122 make_intersection(100.0, 30.0),
2124 make_intersection(0.0, 60.0),
2125 make_intersection(50.0, 60.0),
2126 make_intersection(100.0, 60.0),
2127 ];
2128 let cells = intersections_to_cells(&intersections);
2129 assert_eq!(cells.len(), 0);
2147 }
2148
2149 #[test]
2150 fn test_intersections_to_cells_partial_grid_with_valid_cells() {
2151 let intersections = vec![
2157 make_intersection(0.0, 0.0),
2158 make_intersection(50.0, 0.0),
2159 make_intersection(0.0, 30.0),
2160 make_intersection(50.0, 30.0),
2161 make_intersection(100.0, 30.0),
2162 make_intersection(100.0, 60.0),
2163 ];
2164 let cells = intersections_to_cells(&intersections);
2165 assert_eq!(cells.len(), 1);
2166 assert_approx(cells[0].bbox.x0, 0.0);
2167 assert_approx(cells[0].bbox.top, 0.0);
2168 assert_approx(cells[0].bbox.x1, 50.0);
2169 assert_approx(cells[0].bbox.bottom, 30.0);
2170 }
2171
2172 #[test]
2173 fn test_intersections_to_cells_single_point() {
2174 let intersections = vec![make_intersection(50.0, 50.0)];
2176 let cells = intersections_to_cells(&intersections);
2177 assert!(cells.is_empty());
2178 }
2179
2180 #[test]
2181 fn test_intersections_to_cells_collinear_points() {
2182 let intersections = vec![
2184 make_intersection(0.0, 50.0),
2185 make_intersection(50.0, 50.0),
2186 make_intersection(100.0, 50.0),
2187 ];
2188 let cells = intersections_to_cells(&intersections);
2189 assert!(cells.is_empty());
2190 }
2191
2192 #[test]
2193 fn test_intersections_to_cells_4x3_grid() {
2194 let mut intersections = Vec::new();
2196 for &x in &[0.0, 40.0, 80.0, 120.0] {
2197 for &y in &[0.0, 30.0, 60.0] {
2198 intersections.push(make_intersection(x, y));
2199 }
2200 }
2201 let cells = intersections_to_cells(&intersections);
2202 assert_eq!(cells.len(), 6);
2203 }
2204
2205 #[test]
2206 fn test_intersections_to_cells_text_is_none() {
2207 let intersections = vec![
2209 make_intersection(0.0, 0.0),
2210 make_intersection(100.0, 0.0),
2211 make_intersection(0.0, 50.0),
2212 make_intersection(100.0, 50.0),
2213 ];
2214 let cells = intersections_to_cells(&intersections);
2215 for cell in &cells {
2216 assert!(cell.text.is_none());
2217 }
2218 }
2219
2220 fn make_cell(x0: f64, top: f64, x1: f64, bottom: f64) -> Cell {
2223 Cell {
2224 bbox: BBox::new(x0, top, x1, bottom),
2225 text: None,
2226 }
2227 }
2228
2229 #[test]
2230 fn test_cells_to_tables_empty() {
2231 let tables = cells_to_tables(Vec::new());
2232 assert!(tables.is_empty());
2233 }
2234
2235 #[test]
2236 fn test_cells_to_tables_single_cell() {
2237 let cells = vec![make_cell(0.0, 0.0, 50.0, 30.0)];
2239 let tables = cells_to_tables(cells);
2240 assert_eq!(tables.len(), 1);
2241 assert_approx(tables[0].bbox.x0, 0.0);
2242 assert_approx(tables[0].bbox.top, 0.0);
2243 assert_approx(tables[0].bbox.x1, 50.0);
2244 assert_approx(tables[0].bbox.bottom, 30.0);
2245 assert_eq!(tables[0].cells.len(), 1);
2246 assert_eq!(tables[0].rows.len(), 1);
2247 assert_eq!(tables[0].rows[0].len(), 1);
2248 assert_eq!(tables[0].columns.len(), 1);
2249 assert_eq!(tables[0].columns[0].len(), 1);
2250 }
2251
2252 #[test]
2253 fn test_cells_to_tables_single_table_2x2() {
2254 let cells = vec![
2256 make_cell(0.0, 0.0, 50.0, 30.0),
2257 make_cell(50.0, 0.0, 100.0, 30.0),
2258 make_cell(0.0, 30.0, 50.0, 60.0),
2259 make_cell(50.0, 30.0, 100.0, 60.0),
2260 ];
2261 let tables = cells_to_tables(cells);
2262 assert_eq!(tables.len(), 1);
2263 assert_approx(tables[0].bbox.x0, 0.0);
2264 assert_approx(tables[0].bbox.top, 0.0);
2265 assert_approx(tables[0].bbox.x1, 100.0);
2266 assert_approx(tables[0].bbox.bottom, 60.0);
2267 assert_eq!(tables[0].cells.len(), 4);
2268 assert_eq!(tables[0].rows.len(), 2);
2270 assert_eq!(tables[0].rows[0].len(), 2);
2271 assert_eq!(tables[0].rows[1].len(), 2);
2272 assert_eq!(tables[0].columns.len(), 2);
2274 assert_eq!(tables[0].columns[0].len(), 2);
2275 assert_eq!(tables[0].columns[1].len(), 2);
2276 }
2277
2278 #[test]
2279 fn test_cells_to_tables_single_table_rows_ordered() {
2280 let cells = vec![
2282 make_cell(50.0, 30.0, 100.0, 60.0), make_cell(0.0, 0.0, 50.0, 30.0), make_cell(50.0, 0.0, 100.0, 30.0), make_cell(0.0, 30.0, 50.0, 60.0), ];
2287 let tables = cells_to_tables(cells);
2288 assert_eq!(tables.len(), 1);
2289 assert_approx(tables[0].rows[0][0].bbox.x0, 0.0);
2291 assert_approx(tables[0].rows[0][1].bbox.x0, 50.0);
2292 assert_approx(tables[0].rows[1][0].bbox.x0, 0.0);
2294 assert_approx(tables[0].rows[1][1].bbox.x0, 50.0);
2295 }
2296
2297 #[test]
2298 fn test_cells_to_tables_single_table_columns_ordered() {
2299 let cells = vec![
2301 make_cell(0.0, 0.0, 50.0, 30.0),
2302 make_cell(50.0, 0.0, 100.0, 30.0),
2303 make_cell(0.0, 30.0, 50.0, 60.0),
2304 make_cell(50.0, 30.0, 100.0, 60.0),
2305 ];
2306 let tables = cells_to_tables(cells);
2307 assert_eq!(tables.len(), 1);
2308 assert_approx(tables[0].columns[0][0].bbox.top, 0.0);
2310 assert_approx(tables[0].columns[0][1].bbox.top, 30.0);
2311 assert_approx(tables[0].columns[1][0].bbox.top, 0.0);
2313 assert_approx(tables[0].columns[1][1].bbox.top, 30.0);
2314 }
2315
2316 #[test]
2317 fn test_cells_to_tables_two_separate_tables() {
2318 let cells = vec![
2322 make_cell(0.0, 0.0, 50.0, 30.0),
2324 make_cell(50.0, 0.0, 100.0, 30.0),
2325 make_cell(200.0, 200.0, 250.0, 230.0),
2327 make_cell(250.0, 200.0, 300.0, 230.0),
2328 ];
2329 let tables = cells_to_tables(cells);
2330 assert_eq!(tables.len(), 2);
2331
2332 let mut tables = tables;
2334 tables.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
2335
2336 assert_approx(tables[0].bbox.x0, 0.0);
2338 assert_approx(tables[0].bbox.x1, 100.0);
2339 assert_eq!(tables[0].cells.len(), 2);
2340 assert_eq!(tables[0].rows.len(), 1);
2341 assert_eq!(tables[0].columns.len(), 2);
2342
2343 assert_approx(tables[1].bbox.x0, 200.0);
2345 assert_approx(tables[1].bbox.x1, 300.0);
2346 assert_eq!(tables[1].cells.len(), 2);
2347 assert_eq!(tables[1].rows.len(), 1);
2348 assert_eq!(tables[1].columns.len(), 2);
2349 }
2350
2351 #[test]
2352 fn test_cells_to_tables_3x3_grid() {
2353 let mut cells = Vec::new();
2355 for row in 0..3 {
2356 for col in 0..3 {
2357 let x0 = col as f64 * 40.0;
2358 let top = row as f64 * 30.0;
2359 cells.push(make_cell(x0, top, x0 + 40.0, top + 30.0));
2360 }
2361 }
2362 let tables = cells_to_tables(cells);
2363 assert_eq!(tables.len(), 1);
2364 assert_eq!(tables[0].cells.len(), 9);
2365 assert_eq!(tables[0].rows.len(), 3);
2366 for row in &tables[0].rows {
2367 assert_eq!(row.len(), 3);
2368 }
2369 assert_eq!(tables[0].columns.len(), 3);
2370 for col in &tables[0].columns {
2371 assert_eq!(col.len(), 3);
2372 }
2373 assert_approx(tables[0].bbox.x0, 0.0);
2374 assert_approx(tables[0].bbox.top, 0.0);
2375 assert_approx(tables[0].bbox.x1, 120.0);
2376 assert_approx(tables[0].bbox.bottom, 90.0);
2377 }
2378
2379 #[test]
2380 fn test_cells_to_tables_single_row() {
2381 let cells = vec![
2383 make_cell(0.0, 0.0, 40.0, 30.0),
2384 make_cell(40.0, 0.0, 80.0, 30.0),
2385 make_cell(80.0, 0.0, 120.0, 30.0),
2386 ];
2387 let tables = cells_to_tables(cells);
2388 assert_eq!(tables.len(), 1);
2389 assert_eq!(tables[0].rows.len(), 1);
2390 assert_eq!(tables[0].rows[0].len(), 3);
2391 assert_eq!(tables[0].columns.len(), 3);
2392 for col in &tables[0].columns {
2393 assert_eq!(col.len(), 1);
2394 }
2395 }
2396
2397 #[test]
2398 fn test_cells_to_tables_single_column() {
2399 let cells = vec![
2401 make_cell(0.0, 0.0, 50.0, 30.0),
2402 make_cell(0.0, 30.0, 50.0, 60.0),
2403 make_cell(0.0, 60.0, 50.0, 90.0),
2404 ];
2405 let tables = cells_to_tables(cells);
2406 assert_eq!(tables.len(), 1);
2407 assert_eq!(tables[0].rows.len(), 3);
2408 for row in &tables[0].rows {
2409 assert_eq!(row.len(), 1);
2410 }
2411 assert_eq!(tables[0].columns.len(), 1);
2412 assert_eq!(tables[0].columns[0].len(), 3);
2413 }
2414
2415 fn make_h_edge_src(x0: f64, y: f64, x1: f64, source: crate::edges::EdgeSource) -> Edge {
2418 Edge {
2419 x0,
2420 top: y,
2421 x1,
2422 bottom: y,
2423 orientation: Orientation::Horizontal,
2424 source,
2425 }
2426 }
2427
2428 fn make_v_edge_src(x: f64, top: f64, bottom: f64, source: crate::edges::EdgeSource) -> Edge {
2429 Edge {
2430 x0: x,
2431 top,
2432 x1: x,
2433 bottom,
2434 orientation: Orientation::Vertical,
2435 source,
2436 }
2437 }
2438
2439 #[test]
2440 fn test_lattice_simple_bordered_table() {
2441 let edges = vec![
2446 make_h_edge(0.0, 0.0, 100.0),
2447 make_h_edge(0.0, 30.0, 100.0),
2448 make_h_edge(0.0, 60.0, 100.0),
2449 make_v_edge(0.0, 0.0, 60.0),
2450 make_v_edge(50.0, 0.0, 60.0),
2451 make_v_edge(100.0, 0.0, 60.0),
2452 ];
2453 let settings = TableSettings::default();
2454 let finder = TableFinder::new(edges, settings);
2455 let tables = finder.find_tables();
2456
2457 assert_eq!(tables.len(), 1);
2458 assert_eq!(tables[0].cells.len(), 4);
2459 assert_eq!(tables[0].rows.len(), 2);
2460 assert_eq!(tables[0].rows[0].len(), 2);
2461 assert_eq!(tables[0].rows[1].len(), 2);
2462 assert_approx(tables[0].bbox.x0, 0.0);
2463 assert_approx(tables[0].bbox.top, 0.0);
2464 assert_approx(tables[0].bbox.x1, 100.0);
2465 assert_approx(tables[0].bbox.bottom, 60.0);
2466 }
2467
2468 #[test]
2469 fn test_lattice_with_rect_edges() {
2470 let edges = vec![
2473 make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2474 make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2475 make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2476 make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2477 ];
2478 let settings = TableSettings {
2479 strategy: Strategy::Lattice,
2480 ..TableSettings::default()
2481 };
2482 let finder = TableFinder::new(edges, settings);
2483 let tables = finder.find_tables();
2484
2485 assert_eq!(tables.len(), 1);
2487 assert_eq!(tables[0].cells.len(), 1);
2488 }
2489
2490 #[test]
2491 fn test_lattice_strict_excludes_rect_edges() {
2492 let edges = vec![
2495 make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2497 make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2498 make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2499 make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2500 ];
2501 let settings = TableSettings {
2502 strategy: Strategy::LatticeStrict,
2503 ..TableSettings::default()
2504 };
2505 let finder = TableFinder::new(edges, settings);
2506 let tables = finder.find_tables();
2507
2508 assert!(tables.is_empty());
2510 }
2511
2512 #[test]
2513 fn test_lattice_strict_with_line_edges() {
2514 let edges = vec![
2516 make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2517 make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2518 make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2519 make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2520 ];
2521 let settings = TableSettings {
2522 strategy: Strategy::LatticeStrict,
2523 ..TableSettings::default()
2524 };
2525 let finder = TableFinder::new(edges, settings);
2526 let tables = finder.find_tables();
2527
2528 assert_eq!(tables.len(), 1);
2529 assert_eq!(tables[0].cells.len(), 1);
2530 }
2531
2532 #[test]
2533 fn test_lattice_edge_min_length_filtering() {
2534 let edges = vec![
2537 make_h_edge(0.0, 0.0, 100.0), make_h_edge(0.0, 50.0, 100.0), make_v_edge(0.0, 0.0, 50.0), make_v_edge(100.0, 0.0, 50.0), make_h_edge(200.0, 0.0, 201.0), make_v_edge(200.0, 0.0, 2.0), ];
2546 let settings = TableSettings {
2547 edge_min_length: 3.0,
2548 ..TableSettings::default()
2549 };
2550 let finder = TableFinder::new(edges, settings);
2551 let tables = finder.find_tables();
2552
2553 assert_eq!(tables.len(), 1);
2555 assert_eq!(tables[0].cells.len(), 1);
2556 }
2557
2558 #[test]
2559 fn test_lattice_edge_min_length_filters_all() {
2560 let edges = vec![
2562 make_h_edge(0.0, 0.0, 2.0), make_h_edge(0.0, 50.0, 1.5), make_v_edge(0.0, 0.0, 2.5), make_v_edge(100.0, 0.0, 1.0), ];
2567 let settings = TableSettings {
2568 edge_min_length: 3.0,
2569 ..TableSettings::default()
2570 };
2571 let finder = TableFinder::new(edges, settings);
2572 let tables = finder.find_tables();
2573
2574 assert!(tables.is_empty());
2575 }
2576
2577 #[test]
2578 fn test_lattice_full_pipeline_snap_and_join() {
2579 let edges = vec![
2592 make_h_edge(0.0, 0.5, 60.0),
2593 make_h_edge(55.0, -0.3, 100.0),
2594 make_h_edge(0.0, 50.0, 100.0),
2595 make_v_edge(0.0, 0.0, 50.0),
2596 make_v_edge(100.2, 0.0, 25.0),
2597 make_v_edge(99.8, 23.0, 50.0),
2598 ];
2599 let settings = TableSettings::default(); let finder = TableFinder::new(edges, settings);
2601 let tables = finder.find_tables();
2602
2603 assert_eq!(tables.len(), 1);
2605 assert_eq!(tables[0].cells.len(), 1);
2606 }
2607
2608 #[test]
2609 fn test_lattice_empty_edges() {
2610 let finder = TableFinder::new(Vec::new(), TableSettings::default());
2612 let tables = finder.find_tables();
2613 assert!(tables.is_empty());
2614 }
2615
2616 #[test]
2617 fn test_lattice_no_intersections() {
2618 let edges = vec![
2620 make_h_edge(0.0, 0.0, 100.0),
2621 make_h_edge(0.0, 50.0, 100.0),
2622 ];
2624 let finder = TableFinder::new(edges, TableSettings::default());
2625 let tables = finder.find_tables();
2626 assert!(tables.is_empty());
2627 }
2628
2629 #[test]
2630 fn test_lattice_strict_mixed_line_and_rect_edges() {
2631 let edges = vec![
2634 make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2636 make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2637 make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2639 make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2640 make_v_edge_src(50.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2642 ];
2643 let settings = TableSettings {
2644 strategy: Strategy::LatticeStrict,
2645 ..TableSettings::default()
2646 };
2647 let finder = TableFinder::new(edges, settings);
2648 let tables = finder.find_tables();
2649
2650 assert_eq!(tables.len(), 1);
2652 assert_eq!(tables[0].cells.len(), 1);
2653 }
2654
2655 fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Char {
2658 Char {
2659 text: text.to_string(),
2660 bbox: BBox::new(x0, top, x1, bottom),
2661 fontname: "TestFont".to_string(),
2662 size: 12.0,
2663 doctop: top,
2664 upright: true,
2665 direction: crate::text::TextDirection::Ltr,
2666 stroking_color: None,
2667 non_stroking_color: None,
2668 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
2669 char_code: 0,
2670 }
2671 }
2672
2673 #[test]
2674 fn test_extract_text_single_word_in_cell() {
2675 let mut cells = vec![Cell {
2677 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2678 text: None,
2679 }];
2680 let chars = vec![
2681 make_char("H", 10.0, 15.0, 20.0, 27.0),
2682 make_char("i", 20.0, 15.0, 28.0, 27.0),
2683 ];
2684 extract_text_for_cells(&mut cells, &chars);
2685 assert_eq!(cells[0].text, Some("Hi".to_string()));
2686 }
2687
2688 #[test]
2689 fn test_extract_text_empty_cell() {
2690 let mut cells = vec![Cell {
2692 bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2693 text: None,
2694 }];
2695 let chars: Vec<Char> = vec![];
2696 extract_text_for_cells(&mut cells, &chars);
2697 assert_eq!(cells[0].text, None);
2698 }
2699
2700 #[test]
2701 fn test_extract_text_chars_outside_cell() {
2702 let mut cells = vec![Cell {
2704 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2705 text: None,
2706 }];
2707 let chars = vec![
2709 make_char("A", 200.0, 10.0, 210.0, 22.0),
2710 make_char("B", 210.0, 10.0, 220.0, 22.0),
2711 ];
2712 extract_text_for_cells(&mut cells, &chars);
2713 assert_eq!(cells[0].text, None);
2714 }
2715
2716 #[test]
2717 fn test_extract_text_center_point_containment() {
2718 let mut cells = vec![Cell {
2722 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2723 text: None,
2724 }];
2725 let chars = vec![make_char("X", 48.0, 10.0, 60.0, 22.0)];
2726 extract_text_for_cells(&mut cells, &chars);
2727 assert_eq!(cells[0].text, None);
2728 }
2729
2730 #[test]
2731 fn test_extract_text_center_inside_cell() {
2732 let mut cells = vec![Cell {
2736 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2737 text: None,
2738 }];
2739 let chars = vec![make_char("Y", 40.0, 10.0, 52.0, 22.0)];
2740 extract_text_for_cells(&mut cells, &chars);
2741 assert_eq!(cells[0].text, Some("Y".to_string()));
2742 }
2743
2744 #[test]
2745 fn test_extract_text_multiple_words_in_cell() {
2746 let mut cells = vec![Cell {
2748 bbox: BBox::new(0.0, 0.0, 200.0, 50.0),
2749 text: None,
2750 }];
2751 let chars = vec![
2752 make_char("H", 10.0, 15.0, 20.0, 27.0),
2753 make_char("i", 20.0, 15.0, 28.0, 27.0),
2754 make_char(" ", 28.0, 15.0, 33.0, 27.0),
2755 make_char("B", 33.0, 15.0, 43.0, 27.0),
2756 make_char("o", 43.0, 15.0, 51.0, 27.0),
2757 make_char("b", 51.0, 15.0, 59.0, 27.0),
2758 ];
2759 extract_text_for_cells(&mut cells, &chars);
2760 assert_eq!(cells[0].text, Some("Hi Bob".to_string()));
2761 }
2762
2763 #[test]
2764 fn test_extract_text_multiple_lines_in_cell() {
2765 let mut cells = vec![Cell {
2767 bbox: BBox::new(0.0, 0.0, 200.0, 80.0),
2768 text: None,
2769 }];
2770 let chars = vec![
2771 make_char("A", 10.0, 10.0, 20.0, 22.0),
2773 make_char("B", 20.0, 10.0, 30.0, 22.0),
2774 make_char("C", 10.0, 40.0, 20.0, 52.0),
2776 make_char("D", 20.0, 40.0, 30.0, 52.0),
2777 ];
2778 extract_text_for_cells(&mut cells, &chars);
2779 assert_eq!(cells[0].text, Some("AB\nCD".to_string()));
2780 }
2781
2782 #[test]
2783 fn test_extract_text_two_cells() {
2784 let mut cells = vec![
2786 Cell {
2787 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2788 text: None,
2789 },
2790 Cell {
2791 bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
2792 text: None,
2793 },
2794 ];
2795 let chars = vec![
2796 make_char("A", 10.0, 10.0, 20.0, 22.0),
2798 make_char("B", 60.0, 10.0, 70.0, 22.0),
2800 ];
2801 extract_text_for_cells(&mut cells, &chars);
2802 assert_eq!(cells[0].text, Some("A".to_string()));
2803 assert_eq!(cells[1].text, Some("B".to_string()));
2804 }
2805
2806 #[test]
2807 fn test_extract_text_no_cells() {
2808 let mut cells: Vec<Cell> = vec![];
2810 let chars = vec![make_char("A", 10.0, 10.0, 20.0, 22.0)];
2811 extract_text_for_cells(&mut cells, &chars);
2812 assert!(cells.is_empty());
2813 }
2814
2815 #[test]
2816 fn test_extract_text_mixed_empty_and_populated_cells() {
2817 let mut cells = vec![
2819 Cell {
2820 bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2821 text: None,
2822 },
2823 Cell {
2824 bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
2825 text: None,
2826 },
2827 Cell {
2828 bbox: BBox::new(100.0, 0.0, 150.0, 30.0),
2829 text: None,
2830 },
2831 ];
2832 let chars = vec![
2833 make_char("X", 10.0, 10.0, 20.0, 22.0), make_char("Z", 110.0, 10.0, 120.0, 22.0), ];
2837 extract_text_for_cells(&mut cells, &chars);
2838 assert_eq!(cells[0].text, Some("X".to_string()));
2839 assert_eq!(cells[1].text, None);
2840 assert_eq!(cells[2].text, Some("Z".to_string()));
2841 }
2842
2843 fn make_word(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Word {
2846 Word {
2847 text: text.to_string(),
2848 bbox: BBox::new(x0, top, x1, bottom),
2849 doctop: top,
2850 direction: crate::text::TextDirection::Ltr,
2851 chars: vec![],
2852 }
2853 }
2854
2855 #[test]
2856 fn test_words_to_edges_stream_empty() {
2857 let edges = words_to_edges_stream(&[], 3.0, 3.0, 3, 1);
2858 assert!(edges.is_empty());
2859 }
2860
2861 #[test]
2862 fn test_words_to_edges_stream_vertical_x0_alignment() {
2863 let words = vec![
2866 make_word("A", 10.0, 10.0, 30.0, 22.0),
2867 make_word("B", 10.0, 30.0, 35.0, 42.0),
2868 make_word("C", 10.0, 50.0, 40.0, 62.0),
2869 ];
2870 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2871
2872 let v_edges: Vec<&Edge> = edges
2874 .iter()
2875 .filter(|e| e.orientation == Orientation::Vertical)
2876 .collect();
2877 assert!(
2878 !v_edges.is_empty(),
2879 "Should produce vertical edges from x0 alignment"
2880 );
2881
2882 let v_edge = v_edges
2884 .iter()
2885 .find(|e| (e.x0 - 10.0).abs() < 1.0)
2886 .expect("Should have a vertical edge near x=10");
2887 assert!((v_edge.top - 10.0).abs() < 0.01);
2888 assert!((v_edge.bottom - 62.0).abs() < 0.01);
2889 assert_eq!(v_edge.source, EdgeSource::Stream);
2890 }
2891
2892 #[test]
2893 fn test_words_to_edges_stream_vertical_x1_alignment() {
2894 let words = vec![
2896 make_word("A", 10.0, 10.0, 50.0, 22.0),
2897 make_word("B", 20.0, 30.0, 50.0, 42.0),
2898 make_word("C", 15.0, 50.0, 50.0, 62.0),
2899 ];
2900 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2901
2902 let v_edges: Vec<&Edge> = edges
2903 .iter()
2904 .filter(|e| e.orientation == Orientation::Vertical)
2905 .collect();
2906 assert!(
2907 !v_edges.is_empty(),
2908 "Should produce vertical edges from x1 alignment"
2909 );
2910
2911 let v_edge = v_edges
2912 .iter()
2913 .find(|e| (e.x0 - 50.0).abs() < 1.0)
2914 .expect("Should have a vertical edge near x=50");
2915 assert!((v_edge.top - 10.0).abs() < 0.01);
2916 assert!((v_edge.bottom - 62.0).abs() < 0.01);
2917 }
2918
2919 #[test]
2920 fn test_words_to_edges_stream_horizontal_top_alignment() {
2921 let words = vec![
2923 make_word("A", 10.0, 10.0, 30.0, 22.0),
2924 make_word("B", 40.0, 10.0, 60.0, 22.0),
2925 make_word("C", 70.0, 10.0, 90.0, 22.0),
2926 ];
2927 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2928
2929 let h_edges: Vec<&Edge> = edges
2930 .iter()
2931 .filter(|e| e.orientation == Orientation::Horizontal)
2932 .collect();
2933 assert!(
2934 !h_edges.is_empty(),
2935 "Should produce horizontal edges from top alignment"
2936 );
2937
2938 let h_edge = h_edges
2940 .iter()
2941 .find(|e| (e.top - 10.0).abs() < 1.0)
2942 .expect("Should have a horizontal edge near y=10");
2943 assert!((h_edge.x0 - 10.0).abs() < 0.01);
2944 assert!((h_edge.x1 - 90.0).abs() < 0.01);
2945 }
2946
2947 #[test]
2948 fn test_words_to_edges_stream_horizontal_bottom_alignment() {
2949 let words = vec![
2951 make_word("A", 10.0, 10.0, 30.0, 22.0),
2952 make_word("B", 40.0, 12.0, 60.0, 22.0),
2953 make_word("C", 70.0, 8.0, 90.0, 22.0),
2954 ];
2955 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2956
2957 let h_edges: Vec<&Edge> = edges
2958 .iter()
2959 .filter(|e| e.orientation == Orientation::Horizontal)
2960 .collect();
2961 assert!(
2962 !h_edges.is_empty(),
2963 "Should produce horizontal edges from bottom alignment"
2964 );
2965
2966 let h_edge = h_edges
2967 .iter()
2968 .find(|e| (e.top - 22.0).abs() < 1.0)
2969 .expect("Should have a horizontal edge near y=22");
2970 assert!((h_edge.x0 - 10.0).abs() < 0.01);
2971 assert!((h_edge.x1 - 90.0).abs() < 0.01);
2972 }
2973
2974 #[test]
2975 fn test_words_to_edges_stream_threshold_filtering_vertical() {
2976 let words = vec![
2978 make_word("A", 10.0, 10.0, 30.0, 22.0),
2979 make_word("B", 10.0, 30.0, 35.0, 42.0),
2980 ];
2981 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2982
2983 let v_edges: Vec<&Edge> = edges
2984 .iter()
2985 .filter(|e| e.orientation == Orientation::Vertical)
2986 .collect();
2987 assert!(
2988 v_edges.is_empty(),
2989 "Should not produce vertical edges below threshold"
2990 );
2991 }
2992
2993 #[test]
2994 fn test_words_to_edges_stream_threshold_filtering_horizontal() {
2995 let words = vec![
2997 make_word("A", 10.0, 10.0, 30.0, 22.0),
2998 make_word("B", 40.0, 10.0, 60.0, 22.0),
2999 ];
3000 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 3);
3001
3002 let h_edges: Vec<&Edge> = edges
3003 .iter()
3004 .filter(|e| e.orientation == Orientation::Horizontal)
3005 .collect();
3006 assert!(
3007 h_edges.is_empty(),
3008 "Should not produce horizontal edges below threshold"
3009 );
3010 }
3011
3012 #[test]
3013 fn test_words_to_edges_stream_tolerance_grouping() {
3014 let words = vec![
3016 make_word("A", 10.0, 10.0, 30.0, 22.0),
3017 make_word("B", 11.5, 30.0, 35.0, 42.0),
3018 make_word("C", 12.0, 50.0, 40.0, 62.0),
3019 ];
3020 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3021
3022 let v_edges: Vec<&Edge> = edges
3023 .iter()
3024 .filter(|e| e.orientation == Orientation::Vertical)
3025 .collect();
3026 assert!(
3028 !v_edges.is_empty(),
3029 "Should group nearby x0 values within tolerance"
3030 );
3031 }
3032
3033 #[test]
3034 fn test_words_to_edges_stream_no_grouping_beyond_tolerance() {
3035 let words = vec![
3037 make_word("A", 10.0, 10.0, 30.0, 22.0),
3038 make_word("B", 50.0, 30.0, 70.0, 42.0),
3039 make_word("C", 90.0, 50.0, 110.0, 62.0),
3040 ];
3041 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3042
3043 let v_edges: Vec<&Edge> = edges
3044 .iter()
3045 .filter(|e| e.orientation == Orientation::Vertical)
3046 .collect();
3047 assert!(
3048 v_edges.is_empty(),
3049 "Should not group x0 values that are far apart"
3050 );
3051 }
3052
3053 #[test]
3054 fn test_stream_strategy_full_pipeline() {
3055 let words = vec![
3062 make_word("A", 10.0, 10.0, 30.0, 22.0),
3064 make_word("B", 50.0, 10.0, 70.0, 22.0),
3065 make_word("C", 90.0, 10.0, 110.0, 22.0),
3066 make_word("D", 10.0, 30.0, 30.0, 42.0),
3068 make_word("E", 50.0, 30.0, 70.0, 42.0),
3069 make_word("F", 90.0, 30.0, 110.0, 42.0),
3070 make_word("G", 10.0, 50.0, 30.0, 62.0),
3072 make_word("H", 50.0, 50.0, 70.0, 62.0),
3073 make_word("I", 90.0, 50.0, 110.0, 62.0),
3074 ];
3075
3076 let settings = TableSettings {
3077 strategy: Strategy::Stream,
3078 min_words_vertical: 3,
3079 min_words_horizontal: 3,
3080 ..TableSettings::default()
3081 };
3082
3083 let finder = TableFinder::new_with_words(vec![], words, settings);
3084 let tables = finder.find_tables();
3085
3086 assert!(!tables.is_empty(), "Stream strategy should detect a table");
3088
3089 assert!(
3091 !tables[0].cells.is_empty(),
3092 "Table should have detected cells"
3093 );
3094 }
3095
3096 #[test]
3097 fn test_stream_strategy_with_no_words() {
3098 let settings = TableSettings {
3100 strategy: Strategy::Stream,
3101 ..TableSettings::default()
3102 };
3103 let finder = TableFinder::new_with_words(vec![], vec![], settings);
3104 let tables = finder.find_tables();
3105 assert!(tables.is_empty());
3106 }
3107
3108 #[test]
3109 fn test_stream_edge_source_is_stream() {
3110 let words = vec![
3112 make_word("A", 10.0, 10.0, 30.0, 22.0),
3113 make_word("B", 10.0, 30.0, 35.0, 42.0),
3114 make_word("C", 10.0, 50.0, 40.0, 62.0),
3115 ];
3116 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3117 for edge in &edges {
3118 assert_eq!(
3119 edge.source,
3120 EdgeSource::Stream,
3121 "All stream edges should have EdgeSource::Stream"
3122 );
3123 }
3124 }
3125
3126 #[test]
3127 fn test_stream_strategy_min_words_horizontal_default() {
3128 let words = vec![
3130 make_word("A", 10.0, 10.0, 30.0, 22.0),
3131 make_word("B", 50.0, 10.0, 70.0, 22.0),
3132 make_word("C", 90.0, 10.0, 110.0, 22.0),
3133 ];
3134 let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3136
3137 let h_edges: Vec<&Edge> = edges
3138 .iter()
3139 .filter(|e| e.orientation == Orientation::Horizontal)
3140 .collect();
3141 assert!(
3142 !h_edges.is_empty(),
3143 "min_words_horizontal=1 should produce horizontal edges for 3 aligned words"
3144 );
3145 }
3146
3147 #[test]
3150 fn test_explicit_lines_to_edges_basic() {
3151 let explicit = ExplicitLines {
3153 horizontal_lines: vec![10.0, 30.0, 50.0],
3154 vertical_lines: vec![100.0, 200.0, 300.0],
3155 };
3156 let edges = explicit_lines_to_edges(&explicit);
3157
3158 assert_eq!(edges.len(), 6);
3160
3161 let h_edges: Vec<&Edge> = edges
3162 .iter()
3163 .filter(|e| e.orientation == Orientation::Horizontal)
3164 .collect();
3165 let v_edges: Vec<&Edge> = edges
3166 .iter()
3167 .filter(|e| e.orientation == Orientation::Vertical)
3168 .collect();
3169 assert_eq!(h_edges.len(), 3);
3170 assert_eq!(v_edges.len(), 3);
3171
3172 for h in &h_edges {
3174 assert_eq!(h.x0, 100.0);
3175 assert_eq!(h.x1, 300.0);
3176 }
3177 for v in &v_edges {
3179 assert_eq!(v.top, 10.0);
3180 assert_eq!(v.bottom, 50.0);
3181 }
3182 }
3183
3184 #[test]
3185 fn test_explicit_lines_to_edges_empty_horizontal() {
3186 let explicit = ExplicitLines {
3187 horizontal_lines: vec![],
3188 vertical_lines: vec![100.0, 200.0],
3189 };
3190 let edges = explicit_lines_to_edges(&explicit);
3191 assert!(edges.is_empty());
3193 }
3194
3195 #[test]
3196 fn test_explicit_lines_to_edges_empty_vertical() {
3197 let explicit = ExplicitLines {
3198 horizontal_lines: vec![10.0, 20.0],
3199 vertical_lines: vec![],
3200 };
3201 let edges = explicit_lines_to_edges(&explicit);
3202 assert!(edges.is_empty());
3204 }
3205
3206 #[test]
3207 fn test_explicit_lines_to_edges_both_empty() {
3208 let explicit = ExplicitLines {
3209 horizontal_lines: vec![],
3210 vertical_lines: vec![],
3211 };
3212 let edges = explicit_lines_to_edges(&explicit);
3213 assert!(edges.is_empty());
3214 }
3215
3216 #[test]
3217 fn test_explicit_edge_source_is_explicit() {
3218 let explicit = ExplicitLines {
3219 horizontal_lines: vec![10.0, 50.0],
3220 vertical_lines: vec![100.0, 200.0],
3221 };
3222 let edges = explicit_lines_to_edges(&explicit);
3223 for edge in &edges {
3224 assert_eq!(edge.source, EdgeSource::Explicit);
3225 }
3226 }
3227
3228 #[test]
3229 fn test_explicit_grid_detection() {
3230 let explicit = ExplicitLines {
3232 horizontal_lines: vec![0.0, 20.0, 40.0],
3233 vertical_lines: vec![0.0, 50.0, 100.0],
3234 };
3235 let settings = TableSettings {
3236 strategy: Strategy::Explicit,
3237 explicit_lines: Some(explicit),
3238 ..TableSettings::default()
3239 };
3240 let finder = TableFinder::new(vec![], settings);
3241 let tables = finder.find_tables();
3242
3243 assert_eq!(tables.len(), 1);
3244 assert_eq!(tables[0].cells.len(), 4);
3245 assert_eq!(tables[0].rows.len(), 2);
3246 assert_eq!(tables[0].columns.len(), 2);
3247 }
3248
3249 #[test]
3250 fn test_explicit_2x2_grid() {
3251 let explicit = ExplicitLines {
3253 horizontal_lines: vec![10.0, 50.0],
3254 vertical_lines: vec![100.0, 300.0],
3255 };
3256 let settings = TableSettings {
3257 strategy: Strategy::Explicit,
3258 explicit_lines: Some(explicit),
3259 ..TableSettings::default()
3260 };
3261 let finder = TableFinder::new(vec![], settings);
3262 let tables = finder.find_tables();
3263
3264 assert_eq!(tables.len(), 1);
3265 assert_eq!(tables[0].cells.len(), 1);
3266 let cell = &tables[0].cells[0];
3267 assert_eq!(cell.bbox.x0, 100.0);
3268 assert_eq!(cell.bbox.top, 10.0);
3269 assert_eq!(cell.bbox.x1, 300.0);
3270 assert_eq!(cell.bbox.bottom, 50.0);
3271 }
3272
3273 #[test]
3274 fn test_explicit_strategy_no_explicit_lines() {
3275 let settings = TableSettings {
3277 strategy: Strategy::Explicit,
3278 explicit_lines: None,
3279 ..TableSettings::default()
3280 };
3281 let finder = TableFinder::new(vec![], settings);
3282 let tables = finder.find_tables();
3283 assert!(tables.is_empty());
3284 }
3285
3286 #[test]
3287 fn test_explicit_mixing_with_detected_edges() {
3288 let detected_edges = vec![make_v_edge(0.0, 0.0, 40.0), make_v_edge(100.0, 0.0, 40.0)];
3291 let explicit = ExplicitLines {
3293 horizontal_lines: vec![0.0, 40.0],
3294 vertical_lines: vec![], };
3296 let settings = TableSettings {
3297 strategy: Strategy::Explicit,
3298 explicit_lines: Some(explicit),
3299 ..TableSettings::default()
3300 };
3301 let finder = TableFinder::new(detected_edges, settings);
3302 let tables = finder.find_tables();
3303
3304 assert_eq!(tables.len(), 1);
3306 assert_eq!(tables[0].cells.len(), 1);
3307 }
3308
3309 #[test]
3310 fn test_explicit_single_line_each() {
3311 let explicit = ExplicitLines {
3313 horizontal_lines: vec![10.0],
3314 vertical_lines: vec![100.0],
3315 };
3316 let settings = TableSettings {
3317 strategy: Strategy::Explicit,
3318 explicit_lines: Some(explicit),
3319 ..TableSettings::default()
3320 };
3321 let finder = TableFinder::new(vec![], settings);
3322 let tables = finder.find_tables();
3323 assert!(tables.is_empty());
3324 }
3325
3326 #[test]
3327 fn test_explicit_unsorted_coordinates() {
3328 let explicit = ExplicitLines {
3330 horizontal_lines: vec![40.0, 0.0, 20.0],
3331 vertical_lines: vec![100.0, 0.0, 50.0],
3332 };
3333 let settings = TableSettings {
3334 strategy: Strategy::Explicit,
3335 explicit_lines: Some(explicit),
3336 ..TableSettings::default()
3337 };
3338 let finder = TableFinder::new(vec![], settings);
3339 let tables = finder.find_tables();
3340
3341 assert_eq!(tables.len(), 1);
3342 assert_eq!(tables[0].cells.len(), 4); }
3344}