Skip to main content

pdfplumber_core/
table.rs

1//! Table detection types and pipeline.
2//!
3//! This module provides the configuration types, data structures, and orchestration
4//! for detecting tables in PDF pages using Lattice, Stream, or Explicit strategies.
5
6use crate::edges::{Edge, EdgeSource};
7use crate::geometry::{BBox, Orientation};
8use crate::text::Char;
9use crate::words::{Word, WordExtractor, WordOptions};
10
11/// Strategy for table detection.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum Strategy {
15    /// Detect tables using visible lines and rect edges.
16    #[default]
17    Lattice,
18    /// Detect tables using only visible lines (no rect edges).
19    LatticeStrict,
20    /// Detect tables from text alignment patterns (no visible borders needed).
21    Stream,
22    /// Detect tables using user-provided line coordinates.
23    Explicit,
24}
25
26/// Configuration for table detection.
27///
28/// All tolerance values default to 3.0, matching Python pdfplumber defaults.
29#[derive(Debug, Clone, PartialEq)]
30#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31pub struct TableSettings {
32    /// Table detection strategy.
33    pub strategy: Strategy,
34    /// General snap tolerance for aligning nearby edges.
35    pub snap_tolerance: f64,
36    /// Snap tolerance for horizontal alignment.
37    pub snap_x_tolerance: f64,
38    /// Snap tolerance for vertical alignment.
39    pub snap_y_tolerance: f64,
40    /// General join tolerance for merging collinear edges.
41    pub join_tolerance: f64,
42    /// Join tolerance for horizontal edges.
43    pub join_x_tolerance: f64,
44    /// Join tolerance for vertical edges.
45    pub join_y_tolerance: f64,
46    /// Minimum edge length to consider for table detection.
47    pub edge_min_length: f64,
48    /// Minimum number of words sharing a vertical alignment for Stream strategy.
49    pub min_words_vertical: usize,
50    /// Minimum number of words sharing a horizontal alignment for Stream strategy.
51    pub min_words_horizontal: usize,
52    /// General text tolerance for assigning text to cells.
53    pub text_tolerance: f64,
54    /// Text tolerance along x-axis.
55    pub text_x_tolerance: f64,
56    /// Text tolerance along y-axis.
57    pub text_y_tolerance: f64,
58    /// General intersection tolerance for detecting edge crossings.
59    pub intersection_tolerance: f64,
60    /// Intersection tolerance along x-axis.
61    pub intersection_x_tolerance: f64,
62    /// Intersection tolerance along y-axis.
63    pub intersection_y_tolerance: f64,
64    /// Optional explicit line coordinates for Explicit strategy.
65    pub explicit_lines: Option<ExplicitLines>,
66    /// Minimum accuracy threshold for auto-filtering low-quality tables (0.0 to 1.0).
67    /// Tables with accuracy below this threshold are discarded. Default: None (no filtering).
68    pub min_accuracy: Option<f64>,
69}
70
71impl Default for TableSettings {
72    fn default() -> Self {
73        Self {
74            strategy: Strategy::default(),
75            snap_tolerance: 3.0,
76            snap_x_tolerance: 3.0,
77            snap_y_tolerance: 3.0,
78            join_tolerance: 3.0,
79            join_x_tolerance: 3.0,
80            join_y_tolerance: 3.0,
81            edge_min_length: 3.0,
82            min_words_vertical: 3,
83            min_words_horizontal: 1,
84            text_tolerance: 3.0,
85            text_x_tolerance: 3.0,
86            text_y_tolerance: 3.0,
87            intersection_tolerance: 3.0,
88            intersection_x_tolerance: 3.0,
89            intersection_y_tolerance: 3.0,
90            explicit_lines: None,
91            min_accuracy: None,
92        }
93    }
94}
95
96/// User-provided line coordinates for Explicit strategy.
97#[derive(Debug, Clone, PartialEq)]
98#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
99pub struct ExplicitLines {
100    /// Y-coordinates for horizontal lines.
101    pub horizontal_lines: Vec<f64>,
102    /// X-coordinates for vertical lines.
103    pub vertical_lines: Vec<f64>,
104}
105
106/// A detected table cell.
107#[derive(Debug, Clone, PartialEq)]
108#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
109pub struct Cell {
110    /// Bounding box of the cell.
111    pub bbox: BBox,
112    /// Text content within the cell, if any.
113    pub text: Option<String>,
114}
115
116/// Quality metrics for a detected table.
117#[derive(Debug, Clone, PartialEq)]
118#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
119pub struct TableQuality {
120    /// Percentage of cells with non-empty text (0.0 to 1.0).
121    pub accuracy: f64,
122    /// Average ratio of whitespace in cell text (0.0 to 1.0, lower is better).
123    pub whitespace: f64,
124}
125
126/// A detected table.
127#[derive(Debug, Clone, PartialEq)]
128#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
129pub struct Table {
130    /// Bounding box enclosing the entire table.
131    pub bbox: BBox,
132    /// All cells in the table.
133    pub cells: Vec<Cell>,
134    /// Cells organized into rows (top-to-bottom, left-to-right within each row).
135    pub rows: Vec<Vec<Cell>>,
136    /// Cells organized into columns (left-to-right, top-to-bottom within each column).
137    pub columns: Vec<Vec<Cell>>,
138}
139
140impl Table {
141    /// Percentage of cells with non-empty text (0.0 to 1.0).
142    ///
143    /// Returns 0.0 if the table has no cells.
144    pub fn accuracy(&self) -> f64 {
145        if self.cells.is_empty() {
146            return 0.0;
147        }
148        let filled = self
149            .cells
150            .iter()
151            .filter(|c| c.text.as_ref().is_some_and(|t| !t.trim().is_empty()))
152            .count();
153        filled as f64 / self.cells.len() as f64
154    }
155
156    /// Average ratio of whitespace characters in cell text (0.0 to 1.0, lower is better).
157    ///
158    /// Only considers cells that have text. Returns 0.0 if no cells have text.
159    pub fn whitespace(&self) -> f64 {
160        let ratios: Vec<f64> = self
161            .cells
162            .iter()
163            .filter_map(|c| c.text.as_ref())
164            .filter(|t| !t.is_empty())
165            .map(|t| {
166                let ws = t.chars().filter(|ch| ch.is_whitespace()).count();
167                ws as f64 / t.len() as f64
168            })
169            .collect();
170        if ratios.is_empty() {
171            return 0.0;
172        }
173        ratios.iter().sum::<f64>() / ratios.len() as f64
174    }
175
176    /// Combined quality metrics for the table.
177    pub fn quality(&self) -> TableQuality {
178        TableQuality {
179            accuracy: self.accuracy(),
180            whitespace: self.whitespace(),
181        }
182    }
183}
184
185/// Snap nearby parallel edges to aligned positions.
186///
187/// Groups edges by orientation and clusters them along the perpendicular axis.
188/// For horizontal edges, clusters by y-coordinate within `snap_y_tolerance`.
189/// For vertical edges, clusters by x-coordinate within `snap_x_tolerance`.
190/// Clustered edges have their perpendicular coordinates replaced with the cluster mean.
191/// Diagonal edges pass through unchanged.
192///
193/// This does **not** merge edges — it only aligns their positions.
194pub fn snap_edges(edges: Vec<Edge>, snap_x_tolerance: f64, snap_y_tolerance: f64) -> Vec<Edge> {
195    let mut result = Vec::with_capacity(edges.len());
196    let mut horizontals: Vec<Edge> = Vec::new();
197    let mut verticals: Vec<Edge> = Vec::new();
198
199    for edge in edges {
200        match edge.orientation {
201            Orientation::Horizontal => horizontals.push(edge),
202            Orientation::Vertical => verticals.push(edge),
203            Orientation::Diagonal => result.push(edge),
204        }
205    }
206
207    // Snap horizontal edges: cluster by y-coordinate (top/bottom)
208    snap_group(
209        &mut horizontals,
210        snap_y_tolerance,
211        |e| e.top,
212        |e, v| {
213            e.top = v;
214            e.bottom = v;
215        },
216    );
217    result.extend(horizontals);
218
219    // Snap vertical edges: cluster by x-coordinate (x0/x1)
220    snap_group(
221        &mut verticals,
222        snap_x_tolerance,
223        |e| e.x0,
224        |e, v| {
225            e.x0 = v;
226            e.x1 = v;
227        },
228    );
229    result.extend(verticals);
230
231    result
232}
233
234/// Cluster edges along a single axis and snap each cluster to its mean.
235fn snap_group<F, G>(edges: &mut [Edge], tolerance: f64, key: F, mut set: G)
236where
237    F: Fn(&Edge) -> f64,
238    G: FnMut(&mut Edge, f64),
239{
240    if edges.is_empty() {
241        return;
242    }
243
244    // Sort by the perpendicular coordinate
245    edges.sort_by(|a, b| key(a).partial_cmp(&key(b)).unwrap());
246
247    // Build clusters of consecutive edges within tolerance
248    let mut cluster_start = 0;
249    for i in 1..=edges.len() {
250        let end_of_cluster =
251            i == edges.len() || (key(&edges[i]) - key(&edges[cluster_start])).abs() > tolerance;
252        if end_of_cluster {
253            // Compute mean of the cluster
254            let sum: f64 = (cluster_start..i).map(|j| key(&edges[j])).sum();
255            let mean = sum / (i - cluster_start) as f64;
256            for edge in &mut edges[cluster_start..i] {
257                set(edge, mean);
258            }
259            cluster_start = i;
260        }
261    }
262}
263
264/// Merge overlapping or adjacent collinear edge segments.
265///
266/// Groups edges by orientation and collinear position, then merges segments
267/// within each group when their gap is within the join tolerance.
268/// For horizontal edges, segments on the same y-line merge when the gap along x
269/// is within `join_x_tolerance`. For vertical edges, segments on the same x-line
270/// merge when the gap along y is within `join_y_tolerance`.
271/// Diagonal edges pass through unchanged.
272pub fn join_edge_group(
273    edges: Vec<Edge>,
274    join_x_tolerance: f64,
275    join_y_tolerance: f64,
276) -> Vec<Edge> {
277    let mut result: Vec<Edge> = Vec::new();
278    let mut horizontals: Vec<Edge> = Vec::new();
279    let mut verticals: Vec<Edge> = Vec::new();
280
281    for edge in edges {
282        match edge.orientation {
283            Orientation::Horizontal => horizontals.push(edge),
284            Orientation::Vertical => verticals.push(edge),
285            Orientation::Diagonal => result.push(edge),
286        }
287    }
288
289    // Join horizontal edges: group by y-coordinate, merge along x-axis
290    result.extend(join_collinear(
291        horizontals,
292        |e| e.top,
293        |e| (e.x0, e.x1),
294        |proto, start, end| Edge {
295            x0: start,
296            top: proto.top,
297            x1: end,
298            bottom: proto.bottom,
299            orientation: proto.orientation,
300            source: proto.source,
301        },
302        join_x_tolerance,
303    ));
304
305    // Join vertical edges: group by x-coordinate, merge along y-axis
306    result.extend(join_collinear(
307        verticals,
308        |e| e.x0,
309        |e| (e.top, e.bottom),
310        |proto, start, end| Edge {
311            x0: proto.x0,
312            top: start,
313            x1: proto.x1,
314            bottom: end,
315            orientation: proto.orientation,
316            source: proto.source,
317        },
318        join_y_tolerance,
319    ));
320
321    result
322}
323
324/// Group edges by a collinear key, then merge overlapping/adjacent segments within each group.
325fn join_collinear<K, S, B>(
326    mut edges: Vec<Edge>,
327    key: K,
328    span: S,
329    build: B,
330    tolerance: f64,
331) -> Vec<Edge>
332where
333    K: Fn(&Edge) -> f64,
334    S: Fn(&Edge) -> (f64, f64),
335    B: Fn(&Edge, f64, f64) -> Edge,
336{
337    if edges.is_empty() {
338        return Vec::new();
339    }
340
341    // Sort by collinear key first, then by span start
342    edges.sort_by(|a, b| {
343        key(a)
344            .partial_cmp(&key(b))
345            .unwrap()
346            .then_with(|| span(a).0.partial_cmp(&span(b).0).unwrap())
347    });
348
349    let mut result = Vec::new();
350    let mut i = 0;
351
352    while i < edges.len() {
353        // Collect group of edges on the same collinear line (exact match after snapping)
354        let group_key = key(&edges[i]);
355        let mut j = i + 1;
356        while j < edges.len() && (key(&edges[j]) - group_key).abs() < 1e-9 {
357            j += 1;
358        }
359
360        // Merge segments within this collinear group
361        let (mut cur_start, mut cur_end) = span(&edges[i]);
362        let mut proto_idx = i;
363
364        for k in (i + 1)..j {
365            let (s, e) = span(&edges[k]);
366            if s <= cur_end + tolerance {
367                // Overlapping or within tolerance — extend
368                if e > cur_end {
369                    cur_end = e;
370                }
371            } else {
372                // Gap too large — emit current merged edge, start new one
373                result.push(build(&edges[proto_idx], cur_start, cur_end));
374                cur_start = s;
375                cur_end = e;
376                proto_idx = k;
377            }
378        }
379        result.push(build(&edges[proto_idx], cur_start, cur_end));
380
381        i = j;
382    }
383
384    result
385}
386
387/// An intersection point between horizontal and vertical edges.
388#[derive(Debug, Clone, PartialEq)]
389#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
390pub struct Intersection {
391    /// X coordinate of the intersection point.
392    pub x: f64,
393    /// Y coordinate of the intersection point.
394    pub y: f64,
395}
396
397/// Find all intersection points between horizontal and vertical edges.
398///
399/// An intersection exists when a vertical edge's x-coordinate falls within a
400/// horizontal edge's x-span (within `x_tolerance`) AND the horizontal edge's
401/// y-coordinate falls within the vertical edge's y-span (within `y_tolerance`).
402///
403/// Only considers actual overlapping segments, not infinite line extensions.
404/// Diagonal edges are ignored.
405pub fn edges_to_intersections(
406    edges: &[Edge],
407    x_tolerance: f64,
408    y_tolerance: f64,
409) -> Vec<Intersection> {
410    let horizontals: Vec<&Edge> = edges
411        .iter()
412        .filter(|e| e.orientation == Orientation::Horizontal)
413        .collect();
414    let verticals: Vec<&Edge> = edges
415        .iter()
416        .filter(|e| e.orientation == Orientation::Vertical)
417        .collect();
418
419    let mut intersections = Vec::new();
420
421    for h in &horizontals {
422        let h_y = h.top; // horizontal edge: top == bottom
423        for v in &verticals {
424            let v_x = v.x0; // vertical edge: x0 == x1
425
426            // Check that the vertical's x is within the horizontal's x-span (with tolerance)
427            // and the horizontal's y is within the vertical's y-span (with tolerance)
428            if v_x >= h.x0 - x_tolerance
429                && v_x <= h.x1 + x_tolerance
430                && h_y >= v.top - y_tolerance
431                && h_y <= v.bottom + y_tolerance
432            {
433                intersections.push(Intersection { x: v_x, y: h_y });
434            }
435        }
436    }
437
438    // Sort and deduplicate intersection points at the same location
439    intersections.sort_by(|a, b| {
440        a.x.partial_cmp(&b.x)
441            .unwrap()
442            .then_with(|| a.y.partial_cmp(&b.y).unwrap())
443    });
444    intersections.dedup_by(|a, b| (a.x - b.x).abs() < 1e-9 && (a.y - b.y).abs() < 1e-9);
445
446    intersections
447}
448
449/// Construct rectangular cells from a grid of intersection points.
450///
451/// Groups intersection points into a grid of unique y-rows and x-columns (sorted).
452/// For each pair of adjacent rows and adjacent columns, checks if all 4 corner
453/// intersections exist. If so, creates a [`Cell`] with the corresponding bounding box.
454/// Missing corners are skipped gracefully.
455pub fn intersections_to_cells(intersections: &[Intersection]) -> Vec<Cell> {
456    if intersections.is_empty() {
457        return Vec::new();
458    }
459
460    // Collect unique x and y coordinates (sorted)
461    let mut xs: Vec<f64> = Vec::new();
462    let mut ys: Vec<f64> = Vec::new();
463
464    for pt in intersections {
465        if !xs.iter().any(|&x| (x - pt.x).abs() < 1e-9) {
466            xs.push(pt.x);
467        }
468        if !ys.iter().any(|&y| (y - pt.y).abs() < 1e-9) {
469            ys.push(pt.y);
470        }
471    }
472
473    xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
474    ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
475
476    // Helper to check if an intersection exists at (x, y)
477    let has_point = |x: f64, y: f64| -> bool {
478        intersections
479            .iter()
480            .any(|pt| (pt.x - x).abs() < 1e-9 && (pt.y - y).abs() < 1e-9)
481    };
482
483    let mut cells = Vec::new();
484
485    // For each pair of adjacent rows and columns, check all 4 corners
486    for yi in 0..ys.len().saturating_sub(1) {
487        for xi in 0..xs.len().saturating_sub(1) {
488            let x0 = xs[xi];
489            let x1 = xs[xi + 1];
490            let top = ys[yi];
491            let bottom = ys[yi + 1];
492
493            if has_point(x0, top)
494                && has_point(x1, top)
495                && has_point(x0, bottom)
496                && has_point(x1, bottom)
497            {
498                cells.push(Cell {
499                    bbox: BBox::new(x0, top, x1, bottom),
500                    text: None,
501                });
502            }
503        }
504    }
505
506    cells
507}
508
509/// Group adjacent cells into distinct tables.
510///
511/// Cells that share an edge (same x-boundary or y-boundary) are grouped into
512/// the same table using a union-find algorithm. Each table receives:
513/// - A `bbox` that is the union of all its cells' bounding boxes
514/// - `rows`: cells organized by y-coordinate (top-to-bottom), sorted left-to-right within each row
515/// - `columns`: cells organized by x-coordinate (left-to-right), sorted top-to-bottom within each column
516pub fn cells_to_tables(cells: Vec<Cell>) -> Vec<Table> {
517    if cells.is_empty() {
518        return Vec::new();
519    }
520
521    let n = cells.len();
522
523    // Union-Find to group cells sharing edges
524    let mut parent: Vec<usize> = (0..n).collect();
525
526    fn find(parent: &mut [usize], mut i: usize) -> usize {
527        while parent[i] != i {
528            parent[i] = parent[parent[i]]; // path compression
529            i = parent[i];
530        }
531        i
532    }
533
534    fn union(parent: &mut [usize], a: usize, b: usize) {
535        let ra = find(parent, a);
536        let rb = find(parent, b);
537        if ra != rb {
538            parent[rb] = ra;
539        }
540    }
541
542    // Two cells share an edge if they have a common boundary segment:
543    // - Same x0/x1 boundary AND overlapping y-ranges, or
544    // - Same top/bottom boundary AND overlapping x-ranges
545    for i in 0..n {
546        for j in (i + 1)..n {
547            if cells_share_edge(&cells[i], &cells[j]) {
548                union(&mut parent, i, j);
549            }
550        }
551    }
552
553    // Group cells by their root
554    let mut groups: std::collections::HashMap<usize, Vec<usize>> = std::collections::HashMap::new();
555    for i in 0..n {
556        let root = find(&mut parent, i);
557        groups.entry(root).or_default().push(i);
558    }
559
560    // Build a Table from each group
561    let mut tables: Vec<Table> = groups
562        .into_values()
563        .map(|indices| {
564            let group_cells: Vec<Cell> = indices.iter().map(|&i| cells[i].clone()).collect();
565
566            // Compute union bbox
567            let mut bbox = group_cells[0].bbox;
568            for cell in &group_cells[1..] {
569                bbox = bbox.union(&cell.bbox);
570            }
571
572            // Organize into rows: group by top coordinate, sort left-to-right
573            let mut row_map: std::collections::BTreeMap<i64, Vec<Cell>> =
574                std::collections::BTreeMap::new();
575            for cell in &group_cells {
576                let key = float_key(cell.bbox.top);
577                row_map.entry(key).or_default().push(cell.clone());
578            }
579            let rows: Vec<Vec<Cell>> = row_map
580                .into_values()
581                .map(|mut row| {
582                    row.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
583                    row
584                })
585                .collect();
586
587            // Organize into columns: group by x0 coordinate, sort top-to-bottom
588            let mut col_map: std::collections::BTreeMap<i64, Vec<Cell>> =
589                std::collections::BTreeMap::new();
590            for cell in &group_cells {
591                let key = float_key(cell.bbox.x0);
592                col_map.entry(key).or_default().push(cell.clone());
593            }
594            let columns: Vec<Vec<Cell>> = col_map
595                .into_values()
596                .map(|mut col| {
597                    col.sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
598                    col
599                })
600                .collect();
601
602            Table {
603                bbox,
604                cells: group_cells,
605                rows,
606                columns,
607            }
608        })
609        .collect();
610
611    // Sort tables by position for deterministic output (top-to-bottom, left-to-right)
612    tables.sort_by(|a, b| {
613        a.bbox
614            .top
615            .partial_cmp(&b.bbox.top)
616            .unwrap()
617            .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
618    });
619
620    tables
621}
622
623/// Check if two cells share an edge (a common boundary segment).
624fn cells_share_edge(a: &Cell, b: &Cell) -> bool {
625    let eps = 1e-6;
626
627    // Check for shared vertical boundary (one cell's x1 == other's x0 or vice versa)
628    // with overlapping y-ranges
629    let shared_vertical = ((a.bbox.x1 - b.bbox.x0).abs() < eps
630        || (a.bbox.x0 - b.bbox.x1).abs() < eps)
631        && a.bbox.top < b.bbox.bottom + eps
632        && b.bbox.top < a.bbox.bottom + eps;
633
634    // Check for shared horizontal boundary (one cell's bottom == other's top or vice versa)
635    // with overlapping x-ranges
636    let shared_horizontal = ((a.bbox.bottom - b.bbox.top).abs() < eps
637        || (a.bbox.top - b.bbox.bottom).abs() < eps)
638        && a.bbox.x0 < b.bbox.x1 + eps
639        && b.bbox.x0 < a.bbox.x1 + eps;
640
641    shared_vertical || shared_horizontal
642}
643
644/// Convert a float to an integer key for grouping (multiply by 1000 to preserve 3 decimal places).
645fn float_key(v: f64) -> i64 {
646    (v * 1000.0).round() as i64
647}
648
649/// Compute the length of an edge along its primary axis.
650fn edge_length(edge: &Edge) -> f64 {
651    let dx = edge.x1 - edge.x0;
652    let dy = edge.bottom - edge.top;
653    (dx * dx + dy * dy).sqrt()
654}
655
656/// Extract text content for each cell by finding characters within the cell bbox.
657///
658/// For each cell, finds all [`Char`]s whose bbox center point falls within the
659/// cell's bounding box. Characters are grouped into words using [`WordExtractor`],
660/// then joined into text with spaces between words on the same line and newlines
661/// between lines.
662///
663/// Cells with no matching characters get `text = None`.
664pub fn extract_text_for_cells(cells: &mut [Cell], chars: &[Char]) {
665    let options = WordOptions::default();
666
667    for cell in cells.iter_mut() {
668        // Find chars whose bbox center falls within this cell
669        let cell_chars: Vec<Char> = chars
670            .iter()
671            .filter(|ch| {
672                let cx = (ch.bbox.x0 + ch.bbox.x1) / 2.0;
673                let cy = (ch.bbox.top + ch.bbox.bottom) / 2.0;
674                cx >= cell.bbox.x0
675                    && cx <= cell.bbox.x1
676                    && cy >= cell.bbox.top
677                    && cy <= cell.bbox.bottom
678            })
679            .cloned()
680            .collect();
681
682        if cell_chars.is_empty() {
683            cell.text = None;
684            continue;
685        }
686
687        // Group chars into words
688        let words = WordExtractor::extract(&cell_chars, &options);
689        if words.is_empty() {
690            cell.text = None;
691            continue;
692        }
693
694        // Group words into lines by y-coordinate proximity
695        let mut sorted_words: Vec<&crate::words::Word> = words.iter().collect();
696        sorted_words.sort_by(|a, b| {
697            a.bbox
698                .top
699                .partial_cmp(&b.bbox.top)
700                .unwrap()
701                .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
702        });
703
704        let mut lines: Vec<Vec<&crate::words::Word>> = Vec::new();
705        for word in &sorted_words {
706            let added = lines.last_mut().and_then(|line| {
707                let last_top = line[0].bbox.top;
708                if (word.bbox.top - last_top).abs() <= options.y_tolerance {
709                    line.push(word);
710                    Some(())
711                } else {
712                    None
713                }
714            });
715            if added.is_none() {
716                lines.push(vec![word]);
717            }
718        }
719
720        // Join: words within a line separated by spaces, lines separated by newlines
721        let text: String = lines
722            .iter()
723            .map(|line| {
724                line.iter()
725                    .map(|w| w.text.as_str())
726                    .collect::<Vec<_>>()
727                    .join(" ")
728            })
729            .collect::<Vec<_>>()
730            .join("\n");
731
732        cell.text = Some(text);
733    }
734}
735
736/// Generate synthetic edges from text alignment patterns for the Stream strategy.
737///
738/// Analyzes word positions to detect vertical and horizontal text alignments:
739/// - Words sharing similar x0 or x1 coordinates → synthetic vertical edges
740/// - Words sharing similar top or bottom coordinates → synthetic horizontal edges
741///
742/// Groups must meet the minimum word count thresholds (`min_words_vertical` for
743/// vertical edges, `min_words_horizontal` for horizontal edges) to produce an edge.
744///
745/// Each synthetic edge spans the full extent of the aligned words in the
746/// perpendicular direction.
747pub fn words_to_edges_stream(
748    words: &[Word],
749    text_x_tolerance: f64,
750    text_y_tolerance: f64,
751    min_words_vertical: usize,
752    min_words_horizontal: usize,
753) -> Vec<Edge> {
754    if words.is_empty() {
755        return Vec::new();
756    }
757
758    let mut edges = Vec::new();
759
760    // Vertical edges from x0 alignment (left edges of words)
761    edges.extend(cluster_words_to_edges(
762        words,
763        |w| w.bbox.x0,
764        text_x_tolerance,
765        min_words_vertical,
766        EdgeKind::Vertical,
767    ));
768
769    // Vertical edges from x1 alignment (right edges of words)
770    edges.extend(cluster_words_to_edges(
771        words,
772        |w| w.bbox.x1,
773        text_x_tolerance,
774        min_words_vertical,
775        EdgeKind::Vertical,
776    ));
777
778    // Horizontal edges from top alignment
779    edges.extend(cluster_words_to_edges(
780        words,
781        |w| w.bbox.top,
782        text_y_tolerance,
783        min_words_horizontal,
784        EdgeKind::Horizontal,
785    ));
786
787    // Horizontal edges from bottom alignment
788    edges.extend(cluster_words_to_edges(
789        words,
790        |w| w.bbox.bottom,
791        text_y_tolerance,
792        min_words_horizontal,
793        EdgeKind::Horizontal,
794    ));
795
796    edges
797}
798
799/// Internal enum to specify what kind of edge to produce from word clusters.
800enum EdgeKind {
801    Vertical,
802    Horizontal,
803}
804
805/// Cluster words by a coordinate accessor, then produce synthetic edges for qualifying clusters.
806fn cluster_words_to_edges<F>(
807    words: &[Word],
808    key: F,
809    tolerance: f64,
810    min_words: usize,
811    kind: EdgeKind,
812) -> Vec<Edge>
813where
814    F: Fn(&Word) -> f64,
815{
816    if words.is_empty() || min_words == 0 {
817        return Vec::new();
818    }
819
820    // Sort word indices by the key coordinate
821    let mut indices: Vec<usize> = (0..words.len()).collect();
822    indices.sort_by(|&a, &b| key(&words[a]).partial_cmp(&key(&words[b])).unwrap());
823
824    let mut edges = Vec::new();
825    let mut cluster_start = 0;
826
827    for i in 1..=indices.len() {
828        let end_of_cluster = i == indices.len()
829            || (key(&words[indices[i]]) - key(&words[indices[cluster_start]])).abs() > tolerance;
830
831        if end_of_cluster {
832            let cluster_size = i - cluster_start;
833            if cluster_size >= min_words {
834                // Compute the mean position for the cluster
835                let sum: f64 = (cluster_start..i).map(|j| key(&words[indices[j]])).sum();
836                let mean_pos = sum / cluster_size as f64;
837
838                // Compute the span in the perpendicular direction
839                let cluster_words: Vec<&Word> =
840                    (cluster_start..i).map(|j| &words[indices[j]]).collect();
841
842                match kind {
843                    EdgeKind::Vertical => {
844                        let min_top = cluster_words
845                            .iter()
846                            .map(|w| w.bbox.top)
847                            .fold(f64::INFINITY, f64::min);
848                        let max_bottom = cluster_words
849                            .iter()
850                            .map(|w| w.bbox.bottom)
851                            .fold(f64::NEG_INFINITY, f64::max);
852                        edges.push(Edge {
853                            x0: mean_pos,
854                            top: min_top,
855                            x1: mean_pos,
856                            bottom: max_bottom,
857                            orientation: Orientation::Vertical,
858                            source: EdgeSource::Stream,
859                        });
860                    }
861                    EdgeKind::Horizontal => {
862                        let min_x0 = cluster_words
863                            .iter()
864                            .map(|w| w.bbox.x0)
865                            .fold(f64::INFINITY, f64::min);
866                        let max_x1 = cluster_words
867                            .iter()
868                            .map(|w| w.bbox.x1)
869                            .fold(f64::NEG_INFINITY, f64::max);
870                        edges.push(Edge {
871                            x0: min_x0,
872                            top: mean_pos,
873                            x1: max_x1,
874                            bottom: mean_pos,
875                            orientation: Orientation::Horizontal,
876                            source: EdgeSource::Stream,
877                        });
878                    }
879                }
880            }
881            cluster_start = i;
882        }
883    }
884
885    edges
886}
887
888/// Convert user-provided explicit line coordinates into edges.
889///
890/// Horizontal lines (y-coordinates) become horizontal edges spanning the full
891/// x-range of the vertical lines. Vertical lines (x-coordinates) become
892/// vertical edges spanning the full y-range of the horizontal lines.
893///
894/// Returns an empty Vec if either list is empty (a grid requires both).
895pub fn explicit_lines_to_edges(explicit: &ExplicitLines) -> Vec<Edge> {
896    if explicit.horizontal_lines.is_empty() || explicit.vertical_lines.is_empty() {
897        return Vec::new();
898    }
899
900    let min_x = explicit
901        .vertical_lines
902        .iter()
903        .copied()
904        .fold(f64::INFINITY, f64::min);
905    let max_x = explicit
906        .vertical_lines
907        .iter()
908        .copied()
909        .fold(f64::NEG_INFINITY, f64::max);
910    let min_y = explicit
911        .horizontal_lines
912        .iter()
913        .copied()
914        .fold(f64::INFINITY, f64::min);
915    let max_y = explicit
916        .horizontal_lines
917        .iter()
918        .copied()
919        .fold(f64::NEG_INFINITY, f64::max);
920
921    let mut edges = Vec::new();
922
923    // Horizontal edges: each y-coordinate spans from min_x to max_x
924    for &y in &explicit.horizontal_lines {
925        edges.push(Edge {
926            x0: min_x,
927            top: y,
928            x1: max_x,
929            bottom: y,
930            orientation: Orientation::Horizontal,
931            source: EdgeSource::Explicit,
932        });
933    }
934
935    // Vertical edges: each x-coordinate spans from min_y to max_y
936    for &x in &explicit.vertical_lines {
937        edges.push(Edge {
938            x0: x,
939            top: min_y,
940            x1: x,
941            bottom: max_y,
942            orientation: Orientation::Vertical,
943            source: EdgeSource::Explicit,
944        });
945    }
946
947    edges
948}
949
950/// Intermediate results from the table detection pipeline.
951///
952/// Returned by [`TableFinder::find_tables_debug`] to expose every stage of the
953/// pipeline for visual debugging (edges, intersections, cells, tables).
954#[derive(Debug, Clone)]
955pub struct TableFinderDebug {
956    /// Processed edges after filtering, snapping, and joining.
957    pub edges: Vec<Edge>,
958    /// Intersection points between horizontal and vertical edges.
959    pub intersections: Vec<Intersection>,
960    /// Cells constructed from the intersection grid.
961    pub cells: Vec<Cell>,
962    /// Final tables grouped from adjacent cells.
963    pub tables: Vec<Table>,
964}
965
966/// Orchestrator for the table detection pipeline.
967///
968/// Takes edges (and optionally words/chars) and settings, then runs
969/// the appropriate detection strategy to produce tables.
970pub struct TableFinder {
971    /// Edges available for table detection.
972    edges: Vec<Edge>,
973    /// Words for Stream strategy text alignment detection.
974    words: Vec<Word>,
975    /// Configuration settings.
976    settings: TableSettings,
977}
978
979impl TableFinder {
980    /// Create a new TableFinder with the given edges and settings.
981    pub fn new(edges: Vec<Edge>, settings: TableSettings) -> Self {
982        Self {
983            edges,
984            words: Vec::new(),
985            settings,
986        }
987    }
988
989    /// Create a new TableFinder with edges, words, and settings.
990    ///
991    /// The words are used by the Stream strategy to generate synthetic edges
992    /// from text alignment patterns.
993    pub fn new_with_words(edges: Vec<Edge>, words: Vec<Word>, settings: TableSettings) -> Self {
994        Self {
995            edges,
996            words,
997            settings,
998        }
999    }
1000
1001    /// Get a reference to the settings.
1002    pub fn settings(&self) -> &TableSettings {
1003        &self.settings
1004    }
1005
1006    /// Get a reference to the edges.
1007    pub fn edges(&self) -> &[Edge] {
1008        &self.edges
1009    }
1010
1011    /// Run the table detection pipeline and return detected tables.
1012    ///
1013    /// Pipeline: filter edges → snap → join → intersections → cells → tables.
1014    ///
1015    /// For **Lattice** strategy, all edges (lines + rect edges) are used.
1016    /// For **LatticeStrict** strategy, only line-sourced edges are used (no rect edges).
1017    /// For **Stream** strategy, synthetic edges are generated from word alignment patterns.
1018    /// For **Explicit** strategy, edges from user-provided coordinates are used,
1019    /// combined with any detected edges passed to the finder (mixing).
1020    pub fn find_tables(&self) -> Vec<Table> {
1021        // Step 1: Select edges based on strategy
1022        let edges: Vec<Edge> = match self.settings.strategy {
1023            Strategy::LatticeStrict => self
1024                .edges
1025                .iter()
1026                .filter(|e| e.source == EdgeSource::Line)
1027                .cloned()
1028                .collect(),
1029            Strategy::Stream => {
1030                // Generate synthetic edges from word alignment patterns
1031                words_to_edges_stream(
1032                    &self.words,
1033                    self.settings.text_x_tolerance,
1034                    self.settings.text_y_tolerance,
1035                    self.settings.min_words_vertical,
1036                    self.settings.min_words_horizontal,
1037                )
1038            }
1039            Strategy::Explicit => {
1040                // Start with detected edges (for mixing)
1041                let mut edges = self.edges.clone();
1042
1043                if let Some(ref explicit) = self.settings.explicit_lines {
1044                    // Compute the overall bounding range from detected edges + explicit coords
1045                    let mut min_x = f64::INFINITY;
1046                    let mut max_x = f64::NEG_INFINITY;
1047                    let mut min_y = f64::INFINITY;
1048                    let mut max_y = f64::NEG_INFINITY;
1049
1050                    for e in &edges {
1051                        min_x = min_x.min(e.x0);
1052                        max_x = max_x.max(e.x1);
1053                        min_y = min_y.min(e.top);
1054                        max_y = max_y.max(e.bottom);
1055                    }
1056                    for &x in &explicit.vertical_lines {
1057                        min_x = min_x.min(x);
1058                        max_x = max_x.max(x);
1059                    }
1060                    for &y in &explicit.horizontal_lines {
1061                        min_y = min_y.min(y);
1062                        max_y = max_y.max(y);
1063                    }
1064
1065                    if min_x <= max_x && min_y <= max_y {
1066                        for &y in &explicit.horizontal_lines {
1067                            edges.push(Edge {
1068                                x0: min_x,
1069                                top: y,
1070                                x1: max_x,
1071                                bottom: y,
1072                                orientation: Orientation::Horizontal,
1073                                source: EdgeSource::Explicit,
1074                            });
1075                        }
1076                        for &x in &explicit.vertical_lines {
1077                            edges.push(Edge {
1078                                x0: x,
1079                                top: min_y,
1080                                x1: x,
1081                                bottom: max_y,
1082                                orientation: Orientation::Vertical,
1083                                source: EdgeSource::Explicit,
1084                            });
1085                        }
1086                    }
1087                }
1088
1089                edges
1090            }
1091            // Lattice (default): use all edges
1092            Strategy::Lattice => self.edges.clone(),
1093        };
1094
1095        // Step 2: Filter edges by minimum length
1096        let min_len = self.settings.edge_min_length;
1097        let edges: Vec<Edge> = edges
1098            .into_iter()
1099            .filter(|e| edge_length(e) >= min_len)
1100            .collect();
1101
1102        if edges.is_empty() {
1103            return Vec::new();
1104        }
1105
1106        // Step 3: Snap nearby parallel edges
1107        let edges = snap_edges(
1108            edges,
1109            self.settings.snap_x_tolerance,
1110            self.settings.snap_y_tolerance,
1111        );
1112
1113        // Step 4: Join collinear edge segments
1114        let edges = join_edge_group(
1115            edges,
1116            self.settings.join_x_tolerance,
1117            self.settings.join_y_tolerance,
1118        );
1119
1120        // Step 5: Find intersections
1121        let intersections = edges_to_intersections(
1122            &edges,
1123            self.settings.intersection_x_tolerance,
1124            self.settings.intersection_y_tolerance,
1125        );
1126
1127        // Step 6: Build cells from intersections
1128        let cells = intersections_to_cells(&intersections);
1129
1130        // Step 7: Group cells into tables
1131        cells_to_tables(cells)
1132    }
1133
1134    /// Run the table detection pipeline and return intermediate results for debugging.
1135    ///
1136    /// Returns a [`TableFinderDebug`] containing the processed edges, intersections,
1137    /// cells, and tables from each pipeline stage. This is used by the visual
1138    /// debugging system to render the table detection process.
1139    pub fn find_tables_debug(&self) -> TableFinderDebug {
1140        // Step 1: Select edges based on strategy (same as find_tables)
1141        let edges: Vec<Edge> = match self.settings.strategy {
1142            Strategy::LatticeStrict => self
1143                .edges
1144                .iter()
1145                .filter(|e| e.source == EdgeSource::Line)
1146                .cloned()
1147                .collect(),
1148            Strategy::Stream => words_to_edges_stream(
1149                &self.words,
1150                self.settings.text_x_tolerance,
1151                self.settings.text_y_tolerance,
1152                self.settings.min_words_vertical,
1153                self.settings.min_words_horizontal,
1154            ),
1155            Strategy::Explicit => {
1156                let mut edges = self.edges.clone();
1157                if let Some(ref explicit) = self.settings.explicit_lines {
1158                    let mut min_x = f64::INFINITY;
1159                    let mut max_x = f64::NEG_INFINITY;
1160                    let mut min_y = f64::INFINITY;
1161                    let mut max_y = f64::NEG_INFINITY;
1162                    for e in &edges {
1163                        min_x = min_x.min(e.x0);
1164                        max_x = max_x.max(e.x1);
1165                        min_y = min_y.min(e.top);
1166                        max_y = max_y.max(e.bottom);
1167                    }
1168                    for &x in &explicit.vertical_lines {
1169                        min_x = min_x.min(x);
1170                        max_x = max_x.max(x);
1171                    }
1172                    for &y in &explicit.horizontal_lines {
1173                        min_y = min_y.min(y);
1174                        max_y = max_y.max(y);
1175                    }
1176                    if min_x <= max_x && min_y <= max_y {
1177                        for &y in &explicit.horizontal_lines {
1178                            edges.push(Edge {
1179                                x0: min_x,
1180                                top: y,
1181                                x1: max_x,
1182                                bottom: y,
1183                                orientation: Orientation::Horizontal,
1184                                source: EdgeSource::Explicit,
1185                            });
1186                        }
1187                        for &x in &explicit.vertical_lines {
1188                            edges.push(Edge {
1189                                x0: x,
1190                                top: min_y,
1191                                x1: x,
1192                                bottom: max_y,
1193                                orientation: Orientation::Vertical,
1194                                source: EdgeSource::Explicit,
1195                            });
1196                        }
1197                    }
1198                }
1199                edges
1200            }
1201            Strategy::Lattice => self.edges.clone(),
1202        };
1203
1204        // Step 2: Filter by minimum length
1205        let min_len = self.settings.edge_min_length;
1206        let edges: Vec<Edge> = edges
1207            .into_iter()
1208            .filter(|e| edge_length(e) >= min_len)
1209            .collect();
1210
1211        if edges.is_empty() {
1212            return TableFinderDebug {
1213                edges: Vec::new(),
1214                intersections: Vec::new(),
1215                cells: Vec::new(),
1216                tables: Vec::new(),
1217            };
1218        }
1219
1220        // Step 3: Snap
1221        let edges = snap_edges(
1222            edges,
1223            self.settings.snap_x_tolerance,
1224            self.settings.snap_y_tolerance,
1225        );
1226
1227        // Step 4: Join
1228        let edges = join_edge_group(
1229            edges,
1230            self.settings.join_x_tolerance,
1231            self.settings.join_y_tolerance,
1232        );
1233
1234        // Step 5: Intersections
1235        let intersections = edges_to_intersections(
1236            &edges,
1237            self.settings.intersection_x_tolerance,
1238            self.settings.intersection_y_tolerance,
1239        );
1240
1241        // Step 6: Cells
1242        let cells = intersections_to_cells(&intersections);
1243
1244        // Step 7: Tables
1245        let tables = cells_to_tables(cells.clone());
1246
1247        TableFinderDebug {
1248            edges,
1249            intersections,
1250            cells,
1251            tables,
1252        }
1253    }
1254}
1255
1256#[cfg(test)]
1257mod tests {
1258    use super::*;
1259    use crate::geometry::Orientation;
1260
1261    // --- Strategy tests ---
1262
1263    #[test]
1264    fn test_strategy_default_is_lattice() {
1265        assert_eq!(Strategy::default(), Strategy::Lattice);
1266    }
1267
1268    #[test]
1269    fn test_strategy_variants_are_distinct() {
1270        let strategies = [
1271            Strategy::Lattice,
1272            Strategy::LatticeStrict,
1273            Strategy::Stream,
1274            Strategy::Explicit,
1275        ];
1276        for i in 0..strategies.len() {
1277            for j in (i + 1)..strategies.len() {
1278                assert_ne!(strategies[i], strategies[j]);
1279            }
1280        }
1281    }
1282
1283    #[test]
1284    fn test_strategy_copy() {
1285        let s = Strategy::Stream;
1286        let s2 = s;
1287        assert_eq!(s, s2);
1288    }
1289
1290    // --- TableSettings tests ---
1291
1292    #[test]
1293    fn test_table_settings_default_values() {
1294        let settings = TableSettings::default();
1295        assert_eq!(settings.strategy, Strategy::Lattice);
1296        assert_eq!(settings.snap_tolerance, 3.0);
1297        assert_eq!(settings.snap_x_tolerance, 3.0);
1298        assert_eq!(settings.snap_y_tolerance, 3.0);
1299        assert_eq!(settings.join_tolerance, 3.0);
1300        assert_eq!(settings.join_x_tolerance, 3.0);
1301        assert_eq!(settings.join_y_tolerance, 3.0);
1302        assert_eq!(settings.edge_min_length, 3.0);
1303        assert_eq!(settings.min_words_vertical, 3);
1304        assert_eq!(settings.min_words_horizontal, 1);
1305        assert_eq!(settings.text_tolerance, 3.0);
1306        assert_eq!(settings.text_x_tolerance, 3.0);
1307        assert_eq!(settings.text_y_tolerance, 3.0);
1308        assert_eq!(settings.intersection_tolerance, 3.0);
1309        assert_eq!(settings.intersection_x_tolerance, 3.0);
1310        assert_eq!(settings.intersection_y_tolerance, 3.0);
1311        assert!(settings.explicit_lines.is_none());
1312    }
1313
1314    #[test]
1315    fn test_table_settings_custom_construction() {
1316        let settings = TableSettings {
1317            strategy: Strategy::Stream,
1318            snap_tolerance: 5.0,
1319            min_words_vertical: 5,
1320            min_words_horizontal: 2,
1321            ..TableSettings::default()
1322        };
1323        assert_eq!(settings.strategy, Strategy::Stream);
1324        assert_eq!(settings.snap_tolerance, 5.0);
1325        assert_eq!(settings.min_words_vertical, 5);
1326        assert_eq!(settings.min_words_horizontal, 2);
1327        // Other fields should still be defaults
1328        assert_eq!(settings.join_tolerance, 3.0);
1329        assert_eq!(settings.edge_min_length, 3.0);
1330    }
1331
1332    #[test]
1333    fn test_table_settings_with_explicit_lines() {
1334        let settings = TableSettings {
1335            strategy: Strategy::Explicit,
1336            explicit_lines: Some(ExplicitLines {
1337                horizontal_lines: vec![10.0, 50.0, 100.0],
1338                vertical_lines: vec![20.0, 80.0, 140.0],
1339            }),
1340            ..TableSettings::default()
1341        };
1342        assert_eq!(settings.strategy, Strategy::Explicit);
1343        let lines = settings.explicit_lines.as_ref().unwrap();
1344        assert_eq!(lines.horizontal_lines.len(), 3);
1345        assert_eq!(lines.vertical_lines.len(), 3);
1346    }
1347
1348    #[test]
1349    fn test_table_settings_strategy_selection() {
1350        for strategy in [
1351            Strategy::Lattice,
1352            Strategy::LatticeStrict,
1353            Strategy::Stream,
1354            Strategy::Explicit,
1355        ] {
1356            let settings = TableSettings {
1357                strategy,
1358                ..TableSettings::default()
1359            };
1360            assert_eq!(settings.strategy, strategy);
1361        }
1362    }
1363
1364    // --- Cell tests ---
1365
1366    #[test]
1367    fn test_cell_with_text() {
1368        let cell = Cell {
1369            bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1370            text: Some("Hello".to_string()),
1371        };
1372        assert_eq!(cell.bbox.x0, 10.0);
1373        assert_eq!(cell.text.as_deref(), Some("Hello"));
1374    }
1375
1376    #[test]
1377    fn test_cell_without_text() {
1378        let cell = Cell {
1379            bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1380            text: None,
1381        };
1382        assert!(cell.text.is_none());
1383    }
1384
1385    // --- Table tests ---
1386
1387    #[test]
1388    fn test_table_construction() {
1389        let cells = vec![
1390            Cell {
1391                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1392                text: Some("A".to_string()),
1393            },
1394            Cell {
1395                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1396                text: Some("B".to_string()),
1397            },
1398        ];
1399        let table = Table {
1400            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
1401            cells: cells.clone(),
1402            rows: vec![cells.clone()],
1403            columns: vec![vec![cells[0].clone()], vec![cells[1].clone()]],
1404        };
1405        assert_eq!(table.bbox.x0, 0.0);
1406        assert_eq!(table.bbox.x1, 100.0);
1407        assert_eq!(table.cells.len(), 2);
1408        assert_eq!(table.rows.len(), 1);
1409        assert_eq!(table.rows[0].len(), 2);
1410        assert_eq!(table.columns.len(), 2);
1411    }
1412
1413    #[test]
1414    fn test_table_multi_row() {
1415        let row1 = vec![
1416            Cell {
1417                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1418                text: Some("A1".to_string()),
1419            },
1420            Cell {
1421                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1422                text: Some("B1".to_string()),
1423            },
1424        ];
1425        let row2 = vec![
1426            Cell {
1427                bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1428                text: Some("A2".to_string()),
1429            },
1430            Cell {
1431                bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1432                text: Some("B2".to_string()),
1433            },
1434        ];
1435        let all_cells: Vec<Cell> = row1.iter().chain(row2.iter()).cloned().collect();
1436        let table = Table {
1437            bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
1438            cells: all_cells,
1439            rows: vec![row1, row2],
1440            columns: vec![
1441                vec![
1442                    Cell {
1443                        bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1444                        text: Some("A1".to_string()),
1445                    },
1446                    Cell {
1447                        bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1448                        text: Some("A2".to_string()),
1449                    },
1450                ],
1451                vec![
1452                    Cell {
1453                        bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1454                        text: Some("B1".to_string()),
1455                    },
1456                    Cell {
1457                        bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1458                        text: Some("B2".to_string()),
1459                    },
1460                ],
1461            ],
1462        };
1463        assert_eq!(table.rows.len(), 2);
1464        assert_eq!(table.columns.len(), 2);
1465        assert_eq!(table.cells.len(), 4);
1466    }
1467
1468    // --- TableFinder tests ---
1469
1470    #[test]
1471    fn test_table_finder_construction() {
1472        let edges = vec![Edge {
1473            x0: 0.0,
1474            top: 50.0,
1475            x1: 100.0,
1476            bottom: 50.0,
1477            orientation: Orientation::Horizontal,
1478            source: crate::edges::EdgeSource::Line,
1479        }];
1480        let settings = TableSettings::default();
1481        let finder = TableFinder::new(edges.clone(), settings.clone());
1482
1483        assert_eq!(finder.edges().len(), 1);
1484        assert_eq!(finder.settings().strategy, Strategy::Lattice);
1485    }
1486
1487    #[test]
1488    fn test_table_finder_empty_edges() {
1489        let finder = TableFinder::new(Vec::new(), TableSettings::default());
1490        assert!(finder.edges().is_empty());
1491        let tables = finder.find_tables();
1492        assert!(tables.is_empty());
1493    }
1494
1495    #[test]
1496    fn test_table_finder_custom_settings() {
1497        let settings = TableSettings {
1498            strategy: Strategy::LatticeStrict,
1499            snap_tolerance: 5.0,
1500            ..TableSettings::default()
1501        };
1502        let finder = TableFinder::new(Vec::new(), settings);
1503        assert_eq!(finder.settings().strategy, Strategy::LatticeStrict);
1504        assert_eq!(finder.settings().snap_tolerance, 5.0);
1505    }
1506
1507    // --- ExplicitLines tests ---
1508
1509    #[test]
1510    fn test_explicit_lines_construction() {
1511        let lines = ExplicitLines {
1512            horizontal_lines: vec![0.0, 30.0, 60.0],
1513            vertical_lines: vec![0.0, 50.0, 100.0],
1514        };
1515        assert_eq!(lines.horizontal_lines.len(), 3);
1516        assert_eq!(lines.vertical_lines.len(), 3);
1517        assert_eq!(lines.horizontal_lines[1], 30.0);
1518        assert_eq!(lines.vertical_lines[2], 100.0);
1519    }
1520
1521    #[test]
1522    fn test_explicit_lines_empty() {
1523        let lines = ExplicitLines {
1524            horizontal_lines: Vec::new(),
1525            vertical_lines: Vec::new(),
1526        };
1527        assert!(lines.horizontal_lines.is_empty());
1528        assert!(lines.vertical_lines.is_empty());
1529    }
1530
1531    // --- snap_edges tests ---
1532
1533    fn make_h_edge(x0: f64, y: f64, x1: f64) -> Edge {
1534        Edge {
1535            x0,
1536            top: y,
1537            x1,
1538            bottom: y,
1539            orientation: Orientation::Horizontal,
1540            source: crate::edges::EdgeSource::Line,
1541        }
1542    }
1543
1544    fn make_v_edge(x: f64, top: f64, bottom: f64) -> Edge {
1545        Edge {
1546            x0: x,
1547            top,
1548            x1: x,
1549            bottom,
1550            orientation: Orientation::Vertical,
1551            source: crate::edges::EdgeSource::Line,
1552        }
1553    }
1554
1555    fn assert_approx(a: f64, b: f64) {
1556        assert!(
1557            (a - b).abs() < 1e-6,
1558            "expected {b}, got {a}, diff={}",
1559            (a - b).abs()
1560        );
1561    }
1562
1563    #[test]
1564    fn test_snap_edges_empty() {
1565        let result = snap_edges(Vec::new(), 3.0, 3.0);
1566        assert!(result.is_empty());
1567    }
1568
1569    #[test]
1570    fn test_snap_nearby_horizontal_lines() {
1571        // Two horizontal edges at y=50.0 and y=51.5 (within tolerance 3.0)
1572        // Should snap to mean = 50.75
1573        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 51.5, 100.0)];
1574        let result = snap_edges(edges, 3.0, 3.0);
1575
1576        let horizontals: Vec<&Edge> = result
1577            .iter()
1578            .filter(|e| e.orientation == Orientation::Horizontal)
1579            .collect();
1580        assert_eq!(horizontals.len(), 2);
1581        assert_approx(horizontals[0].top, 50.75);
1582        assert_approx(horizontals[0].bottom, 50.75);
1583        assert_approx(horizontals[1].top, 50.75);
1584        assert_approx(horizontals[1].bottom, 50.75);
1585    }
1586
1587    #[test]
1588    fn test_snap_nearby_vertical_lines() {
1589        // Two vertical edges at x=100.0 and x=101.0 (within tolerance 3.0)
1590        // Should snap to mean = 100.5
1591        let edges = vec![
1592            make_v_edge(100.0, 0.0, 200.0),
1593            make_v_edge(101.0, 0.0, 200.0),
1594        ];
1595        let result = snap_edges(edges, 3.0, 3.0);
1596
1597        let verticals: Vec<&Edge> = result
1598            .iter()
1599            .filter(|e| e.orientation == Orientation::Vertical)
1600            .collect();
1601        assert_eq!(verticals.len(), 2);
1602        assert_approx(verticals[0].x0, 100.5);
1603        assert_approx(verticals[0].x1, 100.5);
1604        assert_approx(verticals[1].x0, 100.5);
1605        assert_approx(verticals[1].x1, 100.5);
1606    }
1607
1608    #[test]
1609    fn test_snap_edges_far_apart_remain_unchanged() {
1610        // Two horizontal edges at y=50.0 and y=100.0 (far apart, beyond tolerance 3.0)
1611        let edges = vec![
1612            make_h_edge(0.0, 50.0, 100.0),
1613            make_h_edge(0.0, 100.0, 100.0),
1614        ];
1615        let result = snap_edges(edges, 3.0, 3.0);
1616
1617        let horizontals: Vec<&Edge> = result
1618            .iter()
1619            .filter(|e| e.orientation == Orientation::Horizontal)
1620            .collect();
1621        assert_eq!(horizontals.len(), 2);
1622        // They should remain at their original positions
1623        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1624        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1625        assert_approx(ys[0], 50.0);
1626        assert_approx(ys[1], 100.0);
1627    }
1628
1629    #[test]
1630    fn test_snap_edges_separate_x_y_tolerance() {
1631        // Horizontal edges within 2.0 of each other, snap_y_tolerance=1.0 (NOT within)
1632        // Should NOT snap
1633        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 52.0, 100.0)];
1634        let result = snap_edges(edges, 3.0, 1.0);
1635
1636        let horizontals: Vec<&Edge> = result
1637            .iter()
1638            .filter(|e| e.orientation == Orientation::Horizontal)
1639            .collect();
1640        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1641        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1642        assert_approx(ys[0], 50.0);
1643        assert_approx(ys[1], 52.0);
1644    }
1645
1646    #[test]
1647    fn test_snap_edges_separate_x_tolerance() {
1648        // Vertical edges within 2.0 of each other, snap_x_tolerance=1.0 (NOT within)
1649        // Should NOT snap
1650        let edges = vec![
1651            make_v_edge(100.0, 0.0, 200.0),
1652            make_v_edge(102.0, 0.0, 200.0),
1653        ];
1654        let result = snap_edges(edges, 1.0, 3.0);
1655
1656        let verticals: Vec<&Edge> = result
1657            .iter()
1658            .filter(|e| e.orientation == Orientation::Vertical)
1659            .collect();
1660        let mut xs: Vec<f64> = verticals.iter().map(|e| e.x0).collect();
1661        xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
1662        assert_approx(xs[0], 100.0);
1663        assert_approx(xs[1], 102.0);
1664    }
1665
1666    #[test]
1667    fn test_snap_edges_does_not_merge() {
1668        // Three horizontal edges within tolerance should snap but remain 3 separate edges
1669        let edges = vec![
1670            make_h_edge(0.0, 50.0, 100.0),
1671            make_h_edge(10.0, 51.0, 90.0),
1672            make_h_edge(20.0, 50.5, 80.0),
1673        ];
1674        let result = snap_edges(edges, 3.0, 3.0);
1675
1676        let horizontals: Vec<&Edge> = result
1677            .iter()
1678            .filter(|e| e.orientation == Orientation::Horizontal)
1679            .collect();
1680        // Still 3 edges - snap does not merge
1681        assert_eq!(horizontals.len(), 3);
1682        // All snapped to mean of 50.0, 51.0, 50.5 = 50.5
1683        for h in &horizontals {
1684            assert_approx(h.top, 50.5);
1685            assert_approx(h.bottom, 50.5);
1686        }
1687    }
1688
1689    #[test]
1690    fn test_snap_edges_preserves_along_axis_coords() {
1691        // Snapping horizontal edges should only change y, not x
1692        let edges = vec![
1693            make_h_edge(10.0, 50.0, 200.0),
1694            make_h_edge(30.0, 51.0, 180.0),
1695        ];
1696        let result = snap_edges(edges, 3.0, 3.0);
1697
1698        let horizontals: Vec<&Edge> = result
1699            .iter()
1700            .filter(|e| e.orientation == Orientation::Horizontal)
1701            .collect();
1702        // x-coordinates should be unchanged
1703        let mut found_10 = false;
1704        let mut found_30 = false;
1705        for h in &horizontals {
1706            if (h.x0 - 10.0).abs() < 1e-6 {
1707                assert_approx(h.x1, 200.0);
1708                found_10 = true;
1709            }
1710            if (h.x0 - 30.0).abs() < 1e-6 {
1711                assert_approx(h.x1, 180.0);
1712                found_30 = true;
1713            }
1714        }
1715        assert!(found_10 && found_30, "x-coordinates should be preserved");
1716    }
1717
1718    #[test]
1719    fn test_snap_edges_mixed_orientations() {
1720        // Mix of horizontal and vertical edges, each group snaps independently
1721        let edges = vec![
1722            make_h_edge(0.0, 50.0, 100.0),
1723            make_h_edge(0.0, 51.0, 100.0),
1724            make_v_edge(200.0, 0.0, 100.0),
1725            make_v_edge(201.0, 0.0, 100.0),
1726        ];
1727        let result = snap_edges(edges, 3.0, 3.0);
1728        assert_eq!(result.len(), 4);
1729
1730        let horizontals: Vec<&Edge> = result
1731            .iter()
1732            .filter(|e| e.orientation == Orientation::Horizontal)
1733            .collect();
1734        let verticals: Vec<&Edge> = result
1735            .iter()
1736            .filter(|e| e.orientation == Orientation::Vertical)
1737            .collect();
1738
1739        // Horizontal snapped to mean(50, 51) = 50.5
1740        for h in &horizontals {
1741            assert_approx(h.top, 50.5);
1742        }
1743        // Vertical snapped to mean(200, 201) = 200.5
1744        for v in &verticals {
1745            assert_approx(v.x0, 200.5);
1746        }
1747    }
1748
1749    #[test]
1750    fn test_snap_edges_multiple_clusters() {
1751        // Three groups of horizontal edges, well separated
1752        let edges = vec![
1753            make_h_edge(0.0, 10.0, 100.0),
1754            make_h_edge(0.0, 11.0, 100.0),
1755            // gap
1756            make_h_edge(0.0, 50.0, 100.0),
1757            make_h_edge(0.0, 51.0, 100.0),
1758            // gap
1759            make_h_edge(0.0, 100.0, 100.0),
1760            make_h_edge(0.0, 101.0, 100.0),
1761        ];
1762        let result = snap_edges(edges, 3.0, 3.0);
1763
1764        let horizontals: Vec<&Edge> = result
1765            .iter()
1766            .filter(|e| e.orientation == Orientation::Horizontal)
1767            .collect();
1768        assert_eq!(horizontals.len(), 6);
1769
1770        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1771        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1772        // Cluster 1: mean(10, 11) = 10.5
1773        assert_approx(ys[0], 10.5);
1774        assert_approx(ys[1], 10.5);
1775        // Cluster 2: mean(50, 51) = 50.5
1776        assert_approx(ys[2], 50.5);
1777        assert_approx(ys[3], 50.5);
1778        // Cluster 3: mean(100, 101) = 100.5
1779        assert_approx(ys[4], 100.5);
1780        assert_approx(ys[5], 100.5);
1781    }
1782
1783    #[test]
1784    fn test_snap_edges_single_edge_unchanged() {
1785        let edges = vec![make_h_edge(0.0, 50.0, 100.0)];
1786        let result = snap_edges(edges, 3.0, 3.0);
1787        assert_eq!(result.len(), 1);
1788        assert_approx(result[0].top, 50.0);
1789        assert_approx(result[0].bottom, 50.0);
1790    }
1791
1792    #[test]
1793    fn test_snap_edges_diagonal_passed_through() {
1794        let edges = vec![
1795            Edge {
1796                x0: 0.0,
1797                top: 0.0,
1798                x1: 100.0,
1799                bottom: 100.0,
1800                orientation: Orientation::Diagonal,
1801                source: crate::edges::EdgeSource::Curve,
1802            },
1803            make_h_edge(0.0, 50.0, 100.0),
1804        ];
1805        let result = snap_edges(edges, 3.0, 3.0);
1806        assert_eq!(result.len(), 2);
1807
1808        let diagonals: Vec<&Edge> = result
1809            .iter()
1810            .filter(|e| e.orientation == Orientation::Diagonal)
1811            .collect();
1812        assert_eq!(diagonals.len(), 1);
1813        // Diagonal edge unchanged
1814        assert_approx(diagonals[0].x0, 0.0);
1815        assert_approx(diagonals[0].top, 0.0);
1816        assert_approx(diagonals[0].x1, 100.0);
1817        assert_approx(diagonals[0].bottom, 100.0);
1818    }
1819
1820    // --- join_edge_group tests ---
1821
1822    #[test]
1823    fn test_join_edge_group_empty() {
1824        let result = join_edge_group(Vec::new(), 3.0, 3.0);
1825        assert!(result.is_empty());
1826    }
1827
1828    #[test]
1829    fn test_join_edge_group_single_edge_unchanged() {
1830        let edges = vec![make_h_edge(10.0, 50.0, 80.0)];
1831        let result = join_edge_group(edges, 3.0, 3.0);
1832        assert_eq!(result.len(), 1);
1833        assert_approx(result[0].x0, 10.0);
1834        assert_approx(result[0].x1, 80.0);
1835    }
1836
1837    #[test]
1838    fn test_join_two_overlapping_horizontal_edges() {
1839        // Two horizontal edges at y=50 that overlap: [10..60] and [40..90]
1840        // Should merge into [10..90]
1841        let edges = vec![make_h_edge(10.0, 50.0, 60.0), make_h_edge(40.0, 50.0, 90.0)];
1842        let result = join_edge_group(edges, 3.0, 3.0);
1843        assert_eq!(result.len(), 1);
1844        assert_approx(result[0].x0, 10.0);
1845        assert_approx(result[0].x1, 90.0);
1846        assert_approx(result[0].top, 50.0);
1847    }
1848
1849    #[test]
1850    fn test_join_two_adjacent_horizontal_edges_within_tolerance() {
1851        // Two horizontal edges at y=50: [10..50] and [52..90]
1852        // Gap is 2.0, within join_x_tolerance=3.0 → merge to [10..90]
1853        let edges = vec![make_h_edge(10.0, 50.0, 50.0), make_h_edge(52.0, 50.0, 90.0)];
1854        let result = join_edge_group(edges, 3.0, 3.0);
1855        assert_eq!(result.len(), 1);
1856        assert_approx(result[0].x0, 10.0);
1857        assert_approx(result[0].x1, 90.0);
1858    }
1859
1860    #[test]
1861    fn test_join_distant_horizontal_edges_not_merged() {
1862        // Two horizontal edges at y=50: [10..40] and [60..90]
1863        // Gap is 20.0, beyond tolerance → remain separate
1864        let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(60.0, 50.0, 90.0)];
1865        let result = join_edge_group(edges, 3.0, 3.0);
1866        assert_eq!(result.len(), 2);
1867    }
1868
1869    #[test]
1870    fn test_join_chain_of_three_horizontal_segments() {
1871        // Three segments on y=50: [10..40], [38..70], [68..100]
1872        // All overlap pairwise → chain merge to [10..100]
1873        let edges = vec![
1874            make_h_edge(10.0, 50.0, 40.0),
1875            make_h_edge(38.0, 50.0, 70.0),
1876            make_h_edge(68.0, 50.0, 100.0),
1877        ];
1878        let result = join_edge_group(edges, 3.0, 3.0);
1879        assert_eq!(result.len(), 1);
1880        assert_approx(result[0].x0, 10.0);
1881        assert_approx(result[0].x1, 100.0);
1882    }
1883
1884    #[test]
1885    fn test_join_two_overlapping_vertical_edges() {
1886        // Two vertical edges at x=50: [10..60] and [40..90]
1887        // Should merge into [10..90]
1888        let edges = vec![make_v_edge(50.0, 10.0, 60.0), make_v_edge(50.0, 40.0, 90.0)];
1889        let result = join_edge_group(edges, 3.0, 3.0);
1890        assert_eq!(result.len(), 1);
1891        assert_approx(result[0].top, 10.0);
1892        assert_approx(result[0].bottom, 90.0);
1893        assert_approx(result[0].x0, 50.0);
1894    }
1895
1896    #[test]
1897    fn test_join_adjacent_vertical_edges_within_tolerance() {
1898        // Two vertical edges at x=50: [10..50] and [52..90]
1899        // Gap is 2.0, within join_y_tolerance=3.0 → merge
1900        let edges = vec![make_v_edge(50.0, 10.0, 50.0), make_v_edge(50.0, 52.0, 90.0)];
1901        let result = join_edge_group(edges, 3.0, 3.0);
1902        assert_eq!(result.len(), 1);
1903        assert_approx(result[0].top, 10.0);
1904        assert_approx(result[0].bottom, 90.0);
1905    }
1906
1907    #[test]
1908    fn test_join_groups_by_collinear_position() {
1909        // Two groups of horizontal edges at different y positions
1910        // Group 1: y=50, [10..50] and [48..90] → merge to [10..90]
1911        // Group 2: y=100, [10..40] and [60..90] → gap too big, stay separate
1912        let edges = vec![
1913            make_h_edge(10.0, 50.0, 50.0),
1914            make_h_edge(48.0, 50.0, 90.0),
1915            make_h_edge(10.0, 100.0, 40.0),
1916            make_h_edge(60.0, 100.0, 90.0),
1917        ];
1918        let result = join_edge_group(edges, 3.0, 3.0);
1919        assert_eq!(result.len(), 3);
1920
1921        let at_50: Vec<&Edge> = result
1922            .iter()
1923            .filter(|e| (e.top - 50.0).abs() < 1e-6)
1924            .collect();
1925        assert_eq!(at_50.len(), 1);
1926        assert_approx(at_50[0].x0, 10.0);
1927        assert_approx(at_50[0].x1, 90.0);
1928
1929        let at_100: Vec<&Edge> = result
1930            .iter()
1931            .filter(|e| (e.top - 100.0).abs() < 1e-6)
1932            .collect();
1933        assert_eq!(at_100.len(), 2);
1934    }
1935
1936    #[test]
1937    fn test_join_mixed_orientations() {
1938        // Mix of horizontal and vertical edges: each group joins independently
1939        let edges = vec![
1940            make_h_edge(10.0, 50.0, 50.0),
1941            make_h_edge(48.0, 50.0, 90.0),
1942            make_v_edge(200.0, 10.0, 50.0),
1943            make_v_edge(200.0, 48.0, 90.0),
1944        ];
1945        let result = join_edge_group(edges, 3.0, 3.0);
1946        assert_eq!(result.len(), 2);
1947
1948        let horizontals: Vec<&Edge> = result
1949            .iter()
1950            .filter(|e| e.orientation == Orientation::Horizontal)
1951            .collect();
1952        assert_eq!(horizontals.len(), 1);
1953        assert_approx(horizontals[0].x0, 10.0);
1954        assert_approx(horizontals[0].x1, 90.0);
1955
1956        let verticals: Vec<&Edge> = result
1957            .iter()
1958            .filter(|e| e.orientation == Orientation::Vertical)
1959            .collect();
1960        assert_eq!(verticals.len(), 1);
1961        assert_approx(verticals[0].top, 10.0);
1962        assert_approx(verticals[0].bottom, 90.0);
1963    }
1964
1965    #[test]
1966    fn test_join_separate_x_y_tolerance() {
1967        // Horizontal edges: gap=4.0, join_x_tolerance=3.0 → NOT merged
1968        let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(44.0, 50.0, 80.0)];
1969        let result = join_edge_group(edges, 3.0, 3.0);
1970        assert_eq!(result.len(), 2);
1971
1972        // Vertical edges: gap=4.0, join_y_tolerance=5.0 → merged
1973        let edges = vec![make_v_edge(50.0, 10.0, 40.0), make_v_edge(50.0, 44.0, 80.0)];
1974        let result = join_edge_group(edges, 3.0, 5.0);
1975        assert_eq!(result.len(), 1);
1976    }
1977
1978    #[test]
1979    fn test_join_diagonal_edges_pass_through() {
1980        let diag = Edge {
1981            x0: 0.0,
1982            top: 0.0,
1983            x1: 100.0,
1984            bottom: 100.0,
1985            orientation: Orientation::Diagonal,
1986            source: crate::edges::EdgeSource::Curve,
1987        };
1988        let edges = vec![diag.clone(), make_h_edge(10.0, 50.0, 90.0)];
1989        let result = join_edge_group(edges, 3.0, 3.0);
1990        assert_eq!(result.len(), 2);
1991
1992        let diagonals: Vec<&Edge> = result
1993            .iter()
1994            .filter(|e| e.orientation == Orientation::Diagonal)
1995            .collect();
1996        assert_eq!(diagonals.len(), 1);
1997        assert_approx(diagonals[0].x0, 0.0);
1998        assert_approx(diagonals[0].bottom, 100.0);
1999    }
2000
2001    #[test]
2002    fn test_snap_edges_zero_tolerance() {
2003        // With zero tolerance, only exact matches snap
2004        let edges = vec![
2005            make_h_edge(0.0, 50.0, 100.0),
2006            make_h_edge(0.0, 50.0, 100.0), // exact same y
2007            make_h_edge(0.0, 50.1, 100.0), // different y
2008        ];
2009        let result = snap_edges(edges, 0.0, 0.0);
2010
2011        let horizontals: Vec<&Edge> = result
2012            .iter()
2013            .filter(|e| e.orientation == Orientation::Horizontal)
2014            .collect();
2015        assert_eq!(horizontals.len(), 3);
2016        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
2017        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
2018        assert_approx(ys[0], 50.0);
2019        assert_approx(ys[1], 50.0);
2020        assert_approx(ys[2], 50.1);
2021    }
2022
2023    // --- edges_to_intersections tests ---
2024
2025    fn has_intersection(intersections: &[Intersection], x: f64, y: f64) -> bool {
2026        intersections
2027            .iter()
2028            .any(|i| (i.x - x).abs() < 1e-6 && (i.y - y).abs() < 1e-6)
2029    }
2030
2031    #[test]
2032    fn test_intersections_empty_edges() {
2033        let result = edges_to_intersections(&[], 3.0, 3.0);
2034        assert!(result.is_empty());
2035    }
2036
2037    #[test]
2038    fn test_intersections_simple_cross() {
2039        // Horizontal edge at y=50 from x=0 to x=100
2040        // Vertical edge at x=50 from y=0 to y=100
2041        // Should intersect at (50, 50)
2042        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 100.0)];
2043        let result = edges_to_intersections(&edges, 3.0, 3.0);
2044        assert_eq!(result.len(), 1);
2045        assert!(has_intersection(&result, 50.0, 50.0));
2046    }
2047
2048    #[test]
2049    fn test_intersections_t_intersection() {
2050        // Horizontal edge at y=50 from x=0 to x=100
2051        // Vertical edge at x=50 from y=50 to y=100 (starts at the horizontal edge)
2052        // Should intersect at (50, 50)
2053        let edges = vec![
2054            make_h_edge(0.0, 50.0, 100.0),
2055            make_v_edge(50.0, 50.0, 100.0),
2056        ];
2057        let result = edges_to_intersections(&edges, 3.0, 3.0);
2058        assert_eq!(result.len(), 1);
2059        assert!(has_intersection(&result, 50.0, 50.0));
2060    }
2061
2062    #[test]
2063    fn test_intersections_l_intersection_corner() {
2064        // Horizontal edge at y=50 from x=50 to x=100
2065        // Vertical edge at x=50 from y=0 to y=50
2066        // Corner at (50, 50)
2067        let edges = vec![make_h_edge(50.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 50.0)];
2068        let result = edges_to_intersections(&edges, 3.0, 3.0);
2069        assert_eq!(result.len(), 1);
2070        assert!(has_intersection(&result, 50.0, 50.0));
2071    }
2072
2073    #[test]
2074    fn test_intersections_no_intersection_parallel() {
2075        // Two parallel horizontal edges — no intersections
2076        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 80.0, 100.0)];
2077        let result = edges_to_intersections(&edges, 3.0, 3.0);
2078        assert!(result.is_empty());
2079    }
2080
2081    #[test]
2082    fn test_intersections_no_intersection_non_overlapping() {
2083        // Horizontal edge at y=50 from x=0 to x=40
2084        // Vertical edge at x=60 from y=0 to y=100
2085        // They don't overlap in x-range (40 < 60 with tolerance 3 → 40+3=43 < 60)
2086        let edges = vec![make_h_edge(0.0, 50.0, 40.0), make_v_edge(60.0, 0.0, 100.0)];
2087        let result = edges_to_intersections(&edges, 3.0, 3.0);
2088        assert!(result.is_empty());
2089    }
2090
2091    #[test]
2092    fn test_intersections_tolerance_based() {
2093        // Horizontal edge at y=50 from x=0 to x=48
2094        // Vertical edge at x=50 from y=0 to y=100
2095        // Gap in x: 50 - 48 = 2, within tolerance 3 → should intersect
2096        let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
2097        let result = edges_to_intersections(&edges, 3.0, 3.0);
2098        assert_eq!(result.len(), 1);
2099        assert!(has_intersection(&result, 50.0, 50.0));
2100    }
2101
2102    #[test]
2103    fn test_intersections_tolerance_y_based() {
2104        // Horizontal edge at y=50 from x=0 to x=100
2105        // Vertical edge at x=50 from y=0 to y=48
2106        // Gap in y: 50 - 48 = 2, within tolerance 3 → should intersect
2107        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 48.0)];
2108        let result = edges_to_intersections(&edges, 3.0, 3.0);
2109        assert_eq!(result.len(), 1);
2110        assert!(has_intersection(&result, 50.0, 50.0));
2111    }
2112
2113    #[test]
2114    fn test_intersections_beyond_tolerance_no_match() {
2115        // Horizontal edge at y=50 from x=0 to x=45
2116        // Vertical edge at x=50 from y=0 to y=100
2117        // Gap in x: 50 - 45 = 5, beyond tolerance 3 → no intersection
2118        let edges = vec![make_h_edge(0.0, 50.0, 45.0), make_v_edge(50.0, 0.0, 100.0)];
2119        let result = edges_to_intersections(&edges, 3.0, 3.0);
2120        assert!(result.is_empty());
2121    }
2122
2123    #[test]
2124    fn test_intersections_grid_2x2() {
2125        // 2x2 grid: 3 horizontal edges × 3 vertical edges = 9 intersections
2126        // H: y=0, y=50, y=100 (all from x=0 to x=100)
2127        // V: x=0, x=50, x=100 (all from y=0 to y=100)
2128        let edges = vec![
2129            make_h_edge(0.0, 0.0, 100.0),
2130            make_h_edge(0.0, 50.0, 100.0),
2131            make_h_edge(0.0, 100.0, 100.0),
2132            make_v_edge(0.0, 0.0, 100.0),
2133            make_v_edge(50.0, 0.0, 100.0),
2134            make_v_edge(100.0, 0.0, 100.0),
2135        ];
2136        let result = edges_to_intersections(&edges, 3.0, 3.0);
2137        assert_eq!(result.len(), 9);
2138        // Check corners
2139        assert!(has_intersection(&result, 0.0, 0.0));
2140        assert!(has_intersection(&result, 100.0, 0.0));
2141        assert!(has_intersection(&result, 0.0, 100.0));
2142        assert!(has_intersection(&result, 100.0, 100.0));
2143        // Check center
2144        assert!(has_intersection(&result, 50.0, 50.0));
2145    }
2146
2147    #[test]
2148    fn test_intersections_ignores_diagonal_edges() {
2149        // Diagonal edge should be ignored
2150        let edges = vec![
2151            Edge {
2152                x0: 0.0,
2153                top: 0.0,
2154                x1: 100.0,
2155                bottom: 100.0,
2156                orientation: Orientation::Diagonal,
2157                source: crate::edges::EdgeSource::Curve,
2158            },
2159            make_h_edge(0.0, 50.0, 100.0),
2160        ];
2161        let result = edges_to_intersections(&edges, 3.0, 3.0);
2162        assert!(result.is_empty());
2163    }
2164
2165    #[test]
2166    fn test_intersections_multiple_h_one_v() {
2167        // Three horizontal edges at y=10, y=50, y=90 (x=0..100)
2168        // One vertical edge at x=50 (y=0..100)
2169        // Should yield 3 intersections at (50,10), (50,50), (50,90)
2170        let edges = vec![
2171            make_h_edge(0.0, 10.0, 100.0),
2172            make_h_edge(0.0, 50.0, 100.0),
2173            make_h_edge(0.0, 90.0, 100.0),
2174            make_v_edge(50.0, 0.0, 100.0),
2175        ];
2176        let result = edges_to_intersections(&edges, 3.0, 3.0);
2177        assert_eq!(result.len(), 3);
2178        assert!(has_intersection(&result, 50.0, 10.0));
2179        assert!(has_intersection(&result, 50.0, 50.0));
2180        assert!(has_intersection(&result, 50.0, 90.0));
2181    }
2182
2183    #[test]
2184    fn test_intersections_separate_x_y_tolerance() {
2185        // Horizontal edge at y=50, x=0..48
2186        // Vertical edge at x=50, y=0..100
2187        // Gap in x is 2.0. With x_tolerance=1.0, should NOT intersect
2188        let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
2189        let result = edges_to_intersections(&edges, 1.0, 3.0);
2190        assert!(result.is_empty());
2191
2192        // Same setup but with x_tolerance=3.0, should intersect
2193        let result = edges_to_intersections(&edges, 3.0, 3.0);
2194        assert_eq!(result.len(), 1);
2195    }
2196
2197    #[test]
2198    fn test_intersections_no_duplicate_points() {
2199        // Two horizontal edges at the same y, one vertical edge
2200        // Both horizontals cross the vertical at the same point
2201        // Should produce only one intersection point (deduplicated)
2202        let edges = vec![
2203            make_h_edge(0.0, 50.0, 100.0),
2204            make_h_edge(20.0, 50.0, 80.0),
2205            make_v_edge(50.0, 0.0, 100.0),
2206        ];
2207        let result = edges_to_intersections(&edges, 3.0, 3.0);
2208        // Both horizontals at y=50 cross vertical at x=50 → same point (50, 50)
2209        // Should be deduplicated to 1 intersection
2210        assert_eq!(result.len(), 1);
2211        assert!(has_intersection(&result, 50.0, 50.0));
2212    }
2213
2214    // --- intersections_to_cells tests ---
2215
2216    fn make_intersection(x: f64, y: f64) -> Intersection {
2217        Intersection { x, y }
2218    }
2219
2220    #[test]
2221    fn test_intersections_to_cells_empty() {
2222        let result = intersections_to_cells(&[]);
2223        assert!(result.is_empty());
2224    }
2225
2226    #[test]
2227    fn test_intersections_to_cells_simple_2x2_grid() {
2228        // 2x2 grid of intersections → 1 cell
2229        // (0,0) (100,0)
2230        // (0,50) (100,50)
2231        let intersections = vec![
2232            make_intersection(0.0, 0.0),
2233            make_intersection(100.0, 0.0),
2234            make_intersection(0.0, 50.0),
2235            make_intersection(100.0, 50.0),
2236        ];
2237        let cells = intersections_to_cells(&intersections);
2238        assert_eq!(cells.len(), 1);
2239        assert_approx(cells[0].bbox.x0, 0.0);
2240        assert_approx(cells[0].bbox.top, 0.0);
2241        assert_approx(cells[0].bbox.x1, 100.0);
2242        assert_approx(cells[0].bbox.bottom, 50.0);
2243        assert!(cells[0].text.is_none());
2244    }
2245
2246    #[test]
2247    fn test_intersections_to_cells_3x3_grid() {
2248        // 3x3 grid of intersections → 4 cells
2249        //  (0,0)  (50,0)  (100,0)
2250        //  (0,30) (50,30) (100,30)
2251        //  (0,60) (50,60) (100,60)
2252        let intersections = vec![
2253            make_intersection(0.0, 0.0),
2254            make_intersection(50.0, 0.0),
2255            make_intersection(100.0, 0.0),
2256            make_intersection(0.0, 30.0),
2257            make_intersection(50.0, 30.0),
2258            make_intersection(100.0, 30.0),
2259            make_intersection(0.0, 60.0),
2260            make_intersection(50.0, 60.0),
2261            make_intersection(100.0, 60.0),
2262        ];
2263        let cells = intersections_to_cells(&intersections);
2264        assert_eq!(cells.len(), 4);
2265
2266        // Top-left cell
2267        assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2268            && (c.bbox.top - 0.0).abs() < 1e-6
2269            && (c.bbox.x1 - 50.0).abs() < 1e-6
2270            && (c.bbox.bottom - 30.0).abs() < 1e-6));
2271        // Top-right cell
2272        assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2273            && (c.bbox.top - 0.0).abs() < 1e-6
2274            && (c.bbox.x1 - 100.0).abs() < 1e-6
2275            && (c.bbox.bottom - 30.0).abs() < 1e-6));
2276        // Bottom-left cell
2277        assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2278            && (c.bbox.top - 30.0).abs() < 1e-6
2279            && (c.bbox.x1 - 50.0).abs() < 1e-6
2280            && (c.bbox.bottom - 60.0).abs() < 1e-6));
2281        // Bottom-right cell
2282        assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2283            && (c.bbox.top - 30.0).abs() < 1e-6
2284            && (c.bbox.x1 - 100.0).abs() < 1e-6
2285            && (c.bbox.bottom - 60.0).abs() < 1e-6));
2286    }
2287
2288    #[test]
2289    fn test_intersections_to_cells_missing_corner() {
2290        // 2x2 grid but missing the bottom-right corner → 0 cells
2291        // (0,0) (100,0)
2292        // (0,50) ---missing---
2293        let intersections = vec![
2294            make_intersection(0.0, 0.0),
2295            make_intersection(100.0, 0.0),
2296            make_intersection(0.0, 50.0),
2297        ];
2298        let cells = intersections_to_cells(&intersections);
2299        assert!(cells.is_empty());
2300    }
2301
2302    #[test]
2303    fn test_intersections_to_cells_irregular_grid() {
2304        // 3x3 grid but missing center intersection → only corners that form complete rectangles
2305        // (0,0)  (50,0)  (100,0)
2306        // (0,30) ---X--- (100,30)
2307        // (0,60) (50,60) (100,60)
2308        // Without (50,30): top-left and bottom-left cells lose a corner.
2309        // Only (0,0)-(100,0)-(0,30)-(100,30) is complete → 1 big cell top row
2310        // And (0,30)-(100,30)-(0,60)-(100,60) is complete → 1 big cell bottom row
2311        // Plus (0,60)-(50,60) and (50,60)-(100,60) don't have top corners at 50,30
2312        // So we get: cells that have all 4 corners present
2313        let intersections = vec![
2314            make_intersection(0.0, 0.0),
2315            make_intersection(50.0, 0.0),
2316            make_intersection(100.0, 0.0),
2317            make_intersection(0.0, 30.0),
2318            // (50, 30) missing
2319            make_intersection(100.0, 30.0),
2320            make_intersection(0.0, 60.0),
2321            make_intersection(50.0, 60.0),
2322            make_intersection(100.0, 60.0),
2323        ];
2324        let cells = intersections_to_cells(&intersections);
2325        // Top row: (0,0)-(50,0)-(0,30)-(50,30)? No, (50,30) missing → skip
2326        //          (50,0)-(100,0)-(50,30)-(100,30)? No, (50,30) missing → skip
2327        //          (0,0)-(100,0)-(0,30)-(100,30)? The grid only checks adjacent columns.
2328        //            xs = [0, 50, 100], adjacent pairs are (0,50) and (50,100)
2329        //            So this cell would not be formed from the adjacent pair logic.
2330        // Bottom row: (0,30)-(50,30)? (50,30) missing → skip
2331        //             (50,30)-(100,30)? (50,30) missing → skip
2332        // Bottom row with y=30..60: (0,30)-(50,30) missing → skip; (50,30)-(100,30) missing → skip
2333        //   But (0,30)-(100,30)-(0,60)-(100,60) is NOT adjacent columns
2334        // Result: 0 cells (because the missing center breaks all adjacent cell formations)
2335        // Wait - let me reconsider:
2336        // xs = [0, 50, 100], ys = [0, 30, 60]
2337        // (0,50) x (0,30): corners (0,0),(50,0),(0,30),(50,30) → (50,30) missing → skip
2338        // (50,100) x (0,30): corners (50,0),(100,0),(50,30),(100,30) → (50,30) missing → skip
2339        // (0,50) x (30,60): corners (0,30),(50,30),(0,60),(50,60) → (50,30) missing → skip
2340        // (50,100) x (30,60): corners (50,30),(100,30),(50,60),(100,60) → (50,30) missing → skip
2341        // All cells need (50,30) which is missing → 0 cells
2342        assert_eq!(cells.len(), 0);
2343    }
2344
2345    #[test]
2346    fn test_intersections_to_cells_partial_grid_with_valid_cells() {
2347        // L-shaped grid where some cells are complete
2348        // (0,0) (50,0)
2349        // (0,30) (50,30) (100,30)
2350        //                (100,60)
2351        // Only the top-left cell (0,0)-(50,0)-(0,30)-(50,30) has all 4 corners
2352        let intersections = vec![
2353            make_intersection(0.0, 0.0),
2354            make_intersection(50.0, 0.0),
2355            make_intersection(0.0, 30.0),
2356            make_intersection(50.0, 30.0),
2357            make_intersection(100.0, 30.0),
2358            make_intersection(100.0, 60.0),
2359        ];
2360        let cells = intersections_to_cells(&intersections);
2361        assert_eq!(cells.len(), 1);
2362        assert_approx(cells[0].bbox.x0, 0.0);
2363        assert_approx(cells[0].bbox.top, 0.0);
2364        assert_approx(cells[0].bbox.x1, 50.0);
2365        assert_approx(cells[0].bbox.bottom, 30.0);
2366    }
2367
2368    #[test]
2369    fn test_intersections_to_cells_single_point() {
2370        // Single intersection point → no cells
2371        let intersections = vec![make_intersection(50.0, 50.0)];
2372        let cells = intersections_to_cells(&intersections);
2373        assert!(cells.is_empty());
2374    }
2375
2376    #[test]
2377    fn test_intersections_to_cells_collinear_points() {
2378        // Points along a single line (no area) → no cells
2379        let intersections = vec![
2380            make_intersection(0.0, 50.0),
2381            make_intersection(50.0, 50.0),
2382            make_intersection(100.0, 50.0),
2383        ];
2384        let cells = intersections_to_cells(&intersections);
2385        assert!(cells.is_empty());
2386    }
2387
2388    #[test]
2389    fn test_intersections_to_cells_4x3_grid() {
2390        // 4 columns × 3 rows → 3×2 = 6 cells
2391        let mut intersections = Vec::new();
2392        for &x in &[0.0, 40.0, 80.0, 120.0] {
2393            for &y in &[0.0, 30.0, 60.0] {
2394                intersections.push(make_intersection(x, y));
2395            }
2396        }
2397        let cells = intersections_to_cells(&intersections);
2398        assert_eq!(cells.len(), 6);
2399    }
2400
2401    #[test]
2402    fn test_intersections_to_cells_text_is_none() {
2403        // All cells should have text = None (text extraction is a separate step)
2404        let intersections = vec![
2405            make_intersection(0.0, 0.0),
2406            make_intersection(100.0, 0.0),
2407            make_intersection(0.0, 50.0),
2408            make_intersection(100.0, 50.0),
2409        ];
2410        let cells = intersections_to_cells(&intersections);
2411        for cell in &cells {
2412            assert!(cell.text.is_none());
2413        }
2414    }
2415
2416    // --- cells_to_tables tests ---
2417
2418    fn make_cell(x0: f64, top: f64, x1: f64, bottom: f64) -> Cell {
2419        Cell {
2420            bbox: BBox::new(x0, top, x1, bottom),
2421            text: None,
2422        }
2423    }
2424
2425    #[test]
2426    fn test_cells_to_tables_empty() {
2427        let tables = cells_to_tables(Vec::new());
2428        assert!(tables.is_empty());
2429    }
2430
2431    #[test]
2432    fn test_cells_to_tables_single_cell() {
2433        // A single cell forms a single table
2434        let cells = vec![make_cell(0.0, 0.0, 50.0, 30.0)];
2435        let tables = cells_to_tables(cells);
2436        assert_eq!(tables.len(), 1);
2437        assert_approx(tables[0].bbox.x0, 0.0);
2438        assert_approx(tables[0].bbox.top, 0.0);
2439        assert_approx(tables[0].bbox.x1, 50.0);
2440        assert_approx(tables[0].bbox.bottom, 30.0);
2441        assert_eq!(tables[0].cells.len(), 1);
2442        assert_eq!(tables[0].rows.len(), 1);
2443        assert_eq!(tables[0].rows[0].len(), 1);
2444        assert_eq!(tables[0].columns.len(), 1);
2445        assert_eq!(tables[0].columns[0].len(), 1);
2446    }
2447
2448    #[test]
2449    fn test_cells_to_tables_single_table_2x2() {
2450        // 2x2 grid: 4 cells sharing edges → 1 table
2451        let cells = vec![
2452            make_cell(0.0, 0.0, 50.0, 30.0),
2453            make_cell(50.0, 0.0, 100.0, 30.0),
2454            make_cell(0.0, 30.0, 50.0, 60.0),
2455            make_cell(50.0, 30.0, 100.0, 60.0),
2456        ];
2457        let tables = cells_to_tables(cells);
2458        assert_eq!(tables.len(), 1);
2459        assert_approx(tables[0].bbox.x0, 0.0);
2460        assert_approx(tables[0].bbox.top, 0.0);
2461        assert_approx(tables[0].bbox.x1, 100.0);
2462        assert_approx(tables[0].bbox.bottom, 60.0);
2463        assert_eq!(tables[0].cells.len(), 4);
2464        // 2 rows, each with 2 cells
2465        assert_eq!(tables[0].rows.len(), 2);
2466        assert_eq!(tables[0].rows[0].len(), 2);
2467        assert_eq!(tables[0].rows[1].len(), 2);
2468        // 2 columns, each with 2 cells
2469        assert_eq!(tables[0].columns.len(), 2);
2470        assert_eq!(tables[0].columns[0].len(), 2);
2471        assert_eq!(tables[0].columns[1].len(), 2);
2472    }
2473
2474    #[test]
2475    fn test_cells_to_tables_single_table_rows_ordered() {
2476        // Verify rows are ordered top-to-bottom, left-to-right
2477        let cells = vec![
2478            make_cell(50.0, 30.0, 100.0, 60.0), // bottom-right (given first to test ordering)
2479            make_cell(0.0, 0.0, 50.0, 30.0),    // top-left
2480            make_cell(50.0, 0.0, 100.0, 30.0),  // top-right
2481            make_cell(0.0, 30.0, 50.0, 60.0),   // bottom-left
2482        ];
2483        let tables = cells_to_tables(cells);
2484        assert_eq!(tables.len(), 1);
2485        // Row 0 (top): left then right
2486        assert_approx(tables[0].rows[0][0].bbox.x0, 0.0);
2487        assert_approx(tables[0].rows[0][1].bbox.x0, 50.0);
2488        // Row 1 (bottom): left then right
2489        assert_approx(tables[0].rows[1][0].bbox.x0, 0.0);
2490        assert_approx(tables[0].rows[1][1].bbox.x0, 50.0);
2491    }
2492
2493    #[test]
2494    fn test_cells_to_tables_single_table_columns_ordered() {
2495        // Verify columns are ordered left-to-right, top-to-bottom
2496        let cells = vec![
2497            make_cell(0.0, 0.0, 50.0, 30.0),
2498            make_cell(50.0, 0.0, 100.0, 30.0),
2499            make_cell(0.0, 30.0, 50.0, 60.0),
2500            make_cell(50.0, 30.0, 100.0, 60.0),
2501        ];
2502        let tables = cells_to_tables(cells);
2503        assert_eq!(tables.len(), 1);
2504        // Column 0 (left): top then bottom
2505        assert_approx(tables[0].columns[0][0].bbox.top, 0.0);
2506        assert_approx(tables[0].columns[0][1].bbox.top, 30.0);
2507        // Column 1 (right): top then bottom
2508        assert_approx(tables[0].columns[1][0].bbox.top, 0.0);
2509        assert_approx(tables[0].columns[1][1].bbox.top, 30.0);
2510    }
2511
2512    #[test]
2513    fn test_cells_to_tables_two_separate_tables() {
2514        // Two tables far apart on the same page
2515        // Table 1: top-left area
2516        // Table 2: bottom-right area (no shared edges)
2517        let cells = vec![
2518            // Table 1
2519            make_cell(0.0, 0.0, 50.0, 30.0),
2520            make_cell(50.0, 0.0, 100.0, 30.0),
2521            // Table 2
2522            make_cell(200.0, 200.0, 250.0, 230.0),
2523            make_cell(250.0, 200.0, 300.0, 230.0),
2524        ];
2525        let tables = cells_to_tables(cells);
2526        assert_eq!(tables.len(), 2);
2527
2528        // Sort tables by x0 to get deterministic order
2529        let mut tables = tables;
2530        tables.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
2531
2532        // Table 1
2533        assert_approx(tables[0].bbox.x0, 0.0);
2534        assert_approx(tables[0].bbox.x1, 100.0);
2535        assert_eq!(tables[0].cells.len(), 2);
2536        assert_eq!(tables[0].rows.len(), 1);
2537        assert_eq!(tables[0].columns.len(), 2);
2538
2539        // Table 2
2540        assert_approx(tables[1].bbox.x0, 200.0);
2541        assert_approx(tables[1].bbox.x1, 300.0);
2542        assert_eq!(tables[1].cells.len(), 2);
2543        assert_eq!(tables[1].rows.len(), 1);
2544        assert_eq!(tables[1].columns.len(), 2);
2545    }
2546
2547    #[test]
2548    fn test_cells_to_tables_3x3_grid() {
2549        // 3 cols × 3 rows = 9 cells, all connected → 1 table
2550        let mut cells = Vec::new();
2551        for row in 0..3 {
2552            for col in 0..3 {
2553                let x0 = col as f64 * 40.0;
2554                let top = row as f64 * 30.0;
2555                cells.push(make_cell(x0, top, x0 + 40.0, top + 30.0));
2556            }
2557        }
2558        let tables = cells_to_tables(cells);
2559        assert_eq!(tables.len(), 1);
2560        assert_eq!(tables[0].cells.len(), 9);
2561        assert_eq!(tables[0].rows.len(), 3);
2562        for row in &tables[0].rows {
2563            assert_eq!(row.len(), 3);
2564        }
2565        assert_eq!(tables[0].columns.len(), 3);
2566        for col in &tables[0].columns {
2567            assert_eq!(col.len(), 3);
2568        }
2569        assert_approx(tables[0].bbox.x0, 0.0);
2570        assert_approx(tables[0].bbox.top, 0.0);
2571        assert_approx(tables[0].bbox.x1, 120.0);
2572        assert_approx(tables[0].bbox.bottom, 90.0);
2573    }
2574
2575    #[test]
2576    fn test_cells_to_tables_single_row() {
2577        // 3 cells in a single row → 1 table with 1 row, 3 columns
2578        let cells = vec![
2579            make_cell(0.0, 0.0, 40.0, 30.0),
2580            make_cell(40.0, 0.0, 80.0, 30.0),
2581            make_cell(80.0, 0.0, 120.0, 30.0),
2582        ];
2583        let tables = cells_to_tables(cells);
2584        assert_eq!(tables.len(), 1);
2585        assert_eq!(tables[0].rows.len(), 1);
2586        assert_eq!(tables[0].rows[0].len(), 3);
2587        assert_eq!(tables[0].columns.len(), 3);
2588        for col in &tables[0].columns {
2589            assert_eq!(col.len(), 1);
2590        }
2591    }
2592
2593    #[test]
2594    fn test_cells_to_tables_single_column() {
2595        // 3 cells in a single column → 1 table with 3 rows, 1 column
2596        let cells = vec![
2597            make_cell(0.0, 0.0, 50.0, 30.0),
2598            make_cell(0.0, 30.0, 50.0, 60.0),
2599            make_cell(0.0, 60.0, 50.0, 90.0),
2600        ];
2601        let tables = cells_to_tables(cells);
2602        assert_eq!(tables.len(), 1);
2603        assert_eq!(tables[0].rows.len(), 3);
2604        for row in &tables[0].rows {
2605            assert_eq!(row.len(), 1);
2606        }
2607        assert_eq!(tables[0].columns.len(), 1);
2608        assert_eq!(tables[0].columns[0].len(), 3);
2609    }
2610
2611    // --- US-035: Lattice strategy - full pipeline tests ---
2612
2613    fn make_h_edge_src(x0: f64, y: f64, x1: f64, source: crate::edges::EdgeSource) -> Edge {
2614        Edge {
2615            x0,
2616            top: y,
2617            x1,
2618            bottom: y,
2619            orientation: Orientation::Horizontal,
2620            source,
2621        }
2622    }
2623
2624    fn make_v_edge_src(x: f64, top: f64, bottom: f64, source: crate::edges::EdgeSource) -> Edge {
2625        Edge {
2626            x0: x,
2627            top,
2628            x1: x,
2629            bottom,
2630            orientation: Orientation::Vertical,
2631            source,
2632        }
2633    }
2634
2635    #[test]
2636    fn test_lattice_simple_bordered_table() {
2637        // Simple 2x2 table from line edges forming a grid:
2638        // 3 horizontal lines at y=0, y=30, y=60 (from x=0 to x=100)
2639        // 3 vertical lines at x=0, x=50, x=100 (from y=0 to y=60)
2640        // Should produce 1 table with 4 cells (2 rows × 2 cols)
2641        let edges = vec![
2642            make_h_edge(0.0, 0.0, 100.0),
2643            make_h_edge(0.0, 30.0, 100.0),
2644            make_h_edge(0.0, 60.0, 100.0),
2645            make_v_edge(0.0, 0.0, 60.0),
2646            make_v_edge(50.0, 0.0, 60.0),
2647            make_v_edge(100.0, 0.0, 60.0),
2648        ];
2649        let settings = TableSettings::default();
2650        let finder = TableFinder::new(edges, settings);
2651        let tables = finder.find_tables();
2652
2653        assert_eq!(tables.len(), 1);
2654        assert_eq!(tables[0].cells.len(), 4);
2655        assert_eq!(tables[0].rows.len(), 2);
2656        assert_eq!(tables[0].rows[0].len(), 2);
2657        assert_eq!(tables[0].rows[1].len(), 2);
2658        assert_approx(tables[0].bbox.x0, 0.0);
2659        assert_approx(tables[0].bbox.top, 0.0);
2660        assert_approx(tables[0].bbox.x1, 100.0);
2661        assert_approx(tables[0].bbox.bottom, 60.0);
2662    }
2663
2664    #[test]
2665    fn test_lattice_with_rect_edges() {
2666        // Lattice strategy includes rect-sourced edges.
2667        // Build edges from rect sources that form a 1-cell table.
2668        let edges = vec![
2669            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2670            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2671            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2672            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2673        ];
2674        let settings = TableSettings {
2675            strategy: Strategy::Lattice,
2676            ..TableSettings::default()
2677        };
2678        let finder = TableFinder::new(edges, settings);
2679        let tables = finder.find_tables();
2680
2681        // Lattice includes rect edges → should find 1 table with 1 cell
2682        assert_eq!(tables.len(), 1);
2683        assert_eq!(tables[0].cells.len(), 1);
2684    }
2685
2686    #[test]
2687    fn test_lattice_strict_excludes_rect_edges() {
2688        // LatticeStrict should exclude rect-sourced edges.
2689        // Only line-sourced edges should be used.
2690        let edges = vec![
2691            // These rect-sourced edges form a grid by themselves
2692            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2693            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2694            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2695            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2696        ];
2697        let settings = TableSettings {
2698            strategy: Strategy::LatticeStrict,
2699            ..TableSettings::default()
2700        };
2701        let finder = TableFinder::new(edges, settings);
2702        let tables = finder.find_tables();
2703
2704        // LatticeStrict excludes rect edges → no line edges → no tables
2705        assert!(tables.is_empty());
2706    }
2707
2708    #[test]
2709    fn test_lattice_strict_with_line_edges() {
2710        // LatticeStrict with line-sourced edges should detect tables.
2711        let edges = vec![
2712            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2713            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2714            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2715            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2716        ];
2717        let settings = TableSettings {
2718            strategy: Strategy::LatticeStrict,
2719            ..TableSettings::default()
2720        };
2721        let finder = TableFinder::new(edges, settings);
2722        let tables = finder.find_tables();
2723
2724        assert_eq!(tables.len(), 1);
2725        assert_eq!(tables[0].cells.len(), 1);
2726    }
2727
2728    #[test]
2729    fn test_lattice_edge_min_length_filtering() {
2730        // Edges shorter than edge_min_length should be filtered out.
2731        // Short edges (length 2.0) should be removed with min_length=3.0
2732        let edges = vec![
2733            // These form a valid grid
2734            make_h_edge(0.0, 0.0, 100.0),  // length 100, kept
2735            make_h_edge(0.0, 50.0, 100.0), // length 100, kept
2736            make_v_edge(0.0, 0.0, 50.0),   // length 50, kept
2737            make_v_edge(100.0, 0.0, 50.0), // length 50, kept
2738            // Short edges that should be filtered
2739            make_h_edge(200.0, 0.0, 201.0), // length 1, filtered
2740            make_v_edge(200.0, 0.0, 2.0),   // length 2, filtered
2741        ];
2742        let settings = TableSettings {
2743            edge_min_length: 3.0,
2744            ..TableSettings::default()
2745        };
2746        let finder = TableFinder::new(edges, settings);
2747        let tables = finder.find_tables();
2748
2749        // Only the main grid edges remain → 1 table
2750        assert_eq!(tables.len(), 1);
2751        assert_eq!(tables[0].cells.len(), 1);
2752    }
2753
2754    #[test]
2755    fn test_lattice_edge_min_length_filters_all() {
2756        // If all edges are too short, no tables should be detected.
2757        let edges = vec![
2758            make_h_edge(0.0, 0.0, 2.0),   // length 2
2759            make_h_edge(0.0, 50.0, 1.5),  // length 1.5
2760            make_v_edge(0.0, 0.0, 2.5),   // length 2.5
2761            make_v_edge(100.0, 0.0, 1.0), // length 1
2762        ];
2763        let settings = TableSettings {
2764            edge_min_length: 3.0,
2765            ..TableSettings::default()
2766        };
2767        let finder = TableFinder::new(edges, settings);
2768        let tables = finder.find_tables();
2769
2770        assert!(tables.is_empty());
2771    }
2772
2773    #[test]
2774    fn test_lattice_full_pipeline_snap_and_join() {
2775        // Edges that are slightly misaligned and segmented.
2776        // After snap and join, they should form a valid grid.
2777        //
2778        // Two horizontal edges at y≈0 (slightly off) and y≈50:
2779        //   h1: y=0.5, x=0..60
2780        //   h2: y=-0.3, x=55..100  (same line as h1 after snap, overlapping after join)
2781        //   h3: y=50.0, x=0..100
2782        //
2783        // Two vertical edges at x≈0 and x≈100:
2784        //   v1: x=0.0, y=0..50
2785        //   v2: x=100.2, y=0..25
2786        //   v3: x=99.8, y=23..50  (same line as v2 after snap, overlapping after join)
2787        let edges = vec![
2788            make_h_edge(0.0, 0.5, 60.0),
2789            make_h_edge(55.0, -0.3, 100.0),
2790            make_h_edge(0.0, 50.0, 100.0),
2791            make_v_edge(0.0, 0.0, 50.0),
2792            make_v_edge(100.2, 0.0, 25.0),
2793            make_v_edge(99.8, 23.0, 50.0),
2794        ];
2795        let settings = TableSettings::default(); // snap/join tolerances = 3.0
2796        let finder = TableFinder::new(edges, settings);
2797        let tables = finder.find_tables();
2798
2799        // After snap+join, should form 1 table with 1 cell
2800        assert_eq!(tables.len(), 1);
2801        assert_eq!(tables[0].cells.len(), 1);
2802    }
2803
2804    #[test]
2805    fn test_lattice_empty_edges() {
2806        // No edges → no tables
2807        let finder = TableFinder::new(Vec::new(), TableSettings::default());
2808        let tables = finder.find_tables();
2809        assert!(tables.is_empty());
2810    }
2811
2812    #[test]
2813    fn test_lattice_no_intersections() {
2814        // Parallel edges that don't intersect → no tables
2815        let edges = vec![
2816            make_h_edge(0.0, 0.0, 100.0),
2817            make_h_edge(0.0, 50.0, 100.0),
2818            // No vertical edges → no intersections
2819        ];
2820        let finder = TableFinder::new(edges, TableSettings::default());
2821        let tables = finder.find_tables();
2822        assert!(tables.is_empty());
2823    }
2824
2825    #[test]
2826    fn test_lattice_strict_mixed_line_and_rect_edges() {
2827        // LatticeStrict should use line edges but not rect edges.
2828        // Mix of both: only line edges should be used.
2829        let edges = vec![
2830            // Line edges forming top/bottom
2831            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2832            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2833            // Line edges forming left/right
2834            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2835            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2836            // Rect edge adding a middle vertical line (should be ignored in strict mode)
2837            make_v_edge_src(50.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2838        ];
2839        let settings = TableSettings {
2840            strategy: Strategy::LatticeStrict,
2841            ..TableSettings::default()
2842        };
2843        let finder = TableFinder::new(edges, settings);
2844        let tables = finder.find_tables();
2845
2846        // Only line edges used → 1 table with 1 cell (not 2 cells)
2847        assert_eq!(tables.len(), 1);
2848        assert_eq!(tables[0].cells.len(), 1);
2849    }
2850
2851    // --- extract_text_for_cells tests (US-036) ---
2852
2853    fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Char {
2854        Char {
2855            text: text.to_string(),
2856            bbox: BBox::new(x0, top, x1, bottom),
2857            fontname: "TestFont".to_string(),
2858            size: 12.0,
2859            doctop: top,
2860            upright: true,
2861            direction: crate::text::TextDirection::Ltr,
2862            stroking_color: None,
2863            non_stroking_color: None,
2864            ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
2865            char_code: 0,
2866            mcid: None,
2867            tag: None,
2868        }
2869    }
2870
2871    #[test]
2872    fn test_extract_text_single_word_in_cell() {
2873        // Cell: (0,0)-(100,50), chars spelling "Hi" centered in cell
2874        let mut cells = vec![Cell {
2875            bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2876            text: None,
2877        }];
2878        let chars = vec![
2879            make_char("H", 10.0, 15.0, 20.0, 27.0),
2880            make_char("i", 20.0, 15.0, 28.0, 27.0),
2881        ];
2882        extract_text_for_cells(&mut cells, &chars);
2883        assert_eq!(cells[0].text, Some("Hi".to_string()));
2884    }
2885
2886    #[test]
2887    fn test_extract_text_empty_cell() {
2888        // Cell with no characters inside → text should remain None
2889        let mut cells = vec![Cell {
2890            bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2891            text: None,
2892        }];
2893        let chars: Vec<Char> = vec![];
2894        extract_text_for_cells(&mut cells, &chars);
2895        assert_eq!(cells[0].text, None);
2896    }
2897
2898    #[test]
2899    fn test_extract_text_chars_outside_cell() {
2900        // All characters are outside the cell bbox → text should be None
2901        let mut cells = vec![Cell {
2902            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2903            text: None,
2904        }];
2905        // Chars at x=200, far outside cell
2906        let chars = vec![
2907            make_char("A", 200.0, 10.0, 210.0, 22.0),
2908            make_char("B", 210.0, 10.0, 220.0, 22.0),
2909        ];
2910        extract_text_for_cells(&mut cells, &chars);
2911        assert_eq!(cells[0].text, None);
2912    }
2913
2914    #[test]
2915    fn test_extract_text_center_point_containment() {
2916        // Char bbox partially overlaps cell but center is outside → not included
2917        // Cell: (0,0)-(50,30)
2918        // Char bbox: (48,10)-(60,22) → center = (54, 16) → outside cell
2919        let mut cells = vec![Cell {
2920            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2921            text: None,
2922        }];
2923        let chars = vec![make_char("X", 48.0, 10.0, 60.0, 22.0)];
2924        extract_text_for_cells(&mut cells, &chars);
2925        assert_eq!(cells[0].text, None);
2926    }
2927
2928    #[test]
2929    fn test_extract_text_center_inside_cell() {
2930        // Char bbox extends past cell border but center is inside → included
2931        // Cell: (0,0)-(50,30)
2932        // Char bbox: (40,10)-(52,22) → center = (46, 16) → inside cell
2933        let mut cells = vec![Cell {
2934            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2935            text: None,
2936        }];
2937        let chars = vec![make_char("Y", 40.0, 10.0, 52.0, 22.0)];
2938        extract_text_for_cells(&mut cells, &chars);
2939        assert_eq!(cells[0].text, Some("Y".to_string()));
2940    }
2941
2942    #[test]
2943    fn test_extract_text_multiple_words_in_cell() {
2944        // Cell with two words separated by a space char
2945        let mut cells = vec![Cell {
2946            bbox: BBox::new(0.0, 0.0, 200.0, 50.0),
2947            text: None,
2948        }];
2949        let chars = vec![
2950            make_char("H", 10.0, 15.0, 20.0, 27.0),
2951            make_char("i", 20.0, 15.0, 28.0, 27.0),
2952            make_char(" ", 28.0, 15.0, 33.0, 27.0),
2953            make_char("B", 33.0, 15.0, 43.0, 27.0),
2954            make_char("o", 43.0, 15.0, 51.0, 27.0),
2955            make_char("b", 51.0, 15.0, 59.0, 27.0),
2956        ];
2957        extract_text_for_cells(&mut cells, &chars);
2958        assert_eq!(cells[0].text, Some("Hi Bob".to_string()));
2959    }
2960
2961    #[test]
2962    fn test_extract_text_multiple_lines_in_cell() {
2963        // Cell with text on two lines (different y positions)
2964        let mut cells = vec![Cell {
2965            bbox: BBox::new(0.0, 0.0, 200.0, 80.0),
2966            text: None,
2967        }];
2968        let chars = vec![
2969            // Line 1: "AB" at y=10
2970            make_char("A", 10.0, 10.0, 20.0, 22.0),
2971            make_char("B", 20.0, 10.0, 30.0, 22.0),
2972            // Line 2: "CD" at y=40
2973            make_char("C", 10.0, 40.0, 20.0, 52.0),
2974            make_char("D", 20.0, 40.0, 30.0, 52.0),
2975        ];
2976        extract_text_for_cells(&mut cells, &chars);
2977        assert_eq!(cells[0].text, Some("AB\nCD".to_string()));
2978    }
2979
2980    #[test]
2981    fn test_extract_text_two_cells() {
2982        // Two cells, each with different text
2983        let mut cells = vec![
2984            Cell {
2985                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2986                text: None,
2987            },
2988            Cell {
2989                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
2990                text: None,
2991            },
2992        ];
2993        let chars = vec![
2994            // "A" in cell 0 (center at (15, 16))
2995            make_char("A", 10.0, 10.0, 20.0, 22.0),
2996            // "B" in cell 1 (center at (65, 16))
2997            make_char("B", 60.0, 10.0, 70.0, 22.0),
2998        ];
2999        extract_text_for_cells(&mut cells, &chars);
3000        assert_eq!(cells[0].text, Some("A".to_string()));
3001        assert_eq!(cells[1].text, Some("B".to_string()));
3002    }
3003
3004    #[test]
3005    fn test_extract_text_no_cells() {
3006        // Empty cells slice → should not panic
3007        let mut cells: Vec<Cell> = vec![];
3008        let chars = vec![make_char("A", 10.0, 10.0, 20.0, 22.0)];
3009        extract_text_for_cells(&mut cells, &chars);
3010        assert!(cells.is_empty());
3011    }
3012
3013    #[test]
3014    fn test_extract_text_mixed_empty_and_populated_cells() {
3015        // Three cells: first has text, second is empty, third has text
3016        let mut cells = vec![
3017            Cell {
3018                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3019                text: None,
3020            },
3021            Cell {
3022                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3023                text: None,
3024            },
3025            Cell {
3026                bbox: BBox::new(100.0, 0.0, 150.0, 30.0),
3027                text: None,
3028            },
3029        ];
3030        let chars = vec![
3031            make_char("X", 10.0, 10.0, 20.0, 22.0), // in cell 0
3032            make_char("Z", 110.0, 10.0, 120.0, 22.0), // in cell 2
3033                                                    // No chars in cell 1
3034        ];
3035        extract_text_for_cells(&mut cells, &chars);
3036        assert_eq!(cells[0].text, Some("X".to_string()));
3037        assert_eq!(cells[1].text, None);
3038        assert_eq!(cells[2].text, Some("Z".to_string()));
3039    }
3040
3041    // --- Stream strategy tests (US-037) ---
3042
3043    fn make_word(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Word {
3044        Word {
3045            text: text.to_string(),
3046            bbox: BBox::new(x0, top, x1, bottom),
3047            doctop: top,
3048            direction: crate::text::TextDirection::Ltr,
3049            chars: vec![],
3050        }
3051    }
3052
3053    #[test]
3054    fn test_words_to_edges_stream_empty() {
3055        let edges = words_to_edges_stream(&[], 3.0, 3.0, 3, 1);
3056        assert!(edges.is_empty());
3057    }
3058
3059    #[test]
3060    fn test_words_to_edges_stream_vertical_x0_alignment() {
3061        // 3 words with x0 at ~10.0 (within tolerance 3.0)
3062        // Should produce a vertical edge at ~10.0
3063        let words = vec![
3064            make_word("A", 10.0, 10.0, 30.0, 22.0),
3065            make_word("B", 10.0, 30.0, 35.0, 42.0),
3066            make_word("C", 10.0, 50.0, 40.0, 62.0),
3067        ];
3068        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3069
3070        // Should have at least one vertical edge from x0 alignment
3071        let v_edges: Vec<&Edge> = edges
3072            .iter()
3073            .filter(|e| e.orientation == Orientation::Vertical)
3074            .collect();
3075        assert!(
3076            !v_edges.is_empty(),
3077            "Should produce vertical edges from x0 alignment"
3078        );
3079
3080        // The vertical edge at x0≈10.0 should span from top=10.0 to bottom=62.0
3081        let v_edge = v_edges
3082            .iter()
3083            .find(|e| (e.x0 - 10.0).abs() < 1.0)
3084            .expect("Should have a vertical edge near x=10");
3085        assert!((v_edge.top - 10.0).abs() < 0.01);
3086        assert!((v_edge.bottom - 62.0).abs() < 0.01);
3087        assert_eq!(v_edge.source, EdgeSource::Stream);
3088    }
3089
3090    #[test]
3091    fn test_words_to_edges_stream_vertical_x1_alignment() {
3092        // 3 words with x1 at ~50.0
3093        let words = vec![
3094            make_word("A", 10.0, 10.0, 50.0, 22.0),
3095            make_word("B", 20.0, 30.0, 50.0, 42.0),
3096            make_word("C", 15.0, 50.0, 50.0, 62.0),
3097        ];
3098        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3099
3100        let v_edges: Vec<&Edge> = edges
3101            .iter()
3102            .filter(|e| e.orientation == Orientation::Vertical)
3103            .collect();
3104        assert!(
3105            !v_edges.is_empty(),
3106            "Should produce vertical edges from x1 alignment"
3107        );
3108
3109        let v_edge = v_edges
3110            .iter()
3111            .find(|e| (e.x0 - 50.0).abs() < 1.0)
3112            .expect("Should have a vertical edge near x=50");
3113        assert!((v_edge.top - 10.0).abs() < 0.01);
3114        assert!((v_edge.bottom - 62.0).abs() < 0.01);
3115    }
3116
3117    #[test]
3118    fn test_words_to_edges_stream_horizontal_top_alignment() {
3119        // 3 words with top at ~10.0 (min_words_horizontal = 1, but 3 words align)
3120        let words = vec![
3121            make_word("A", 10.0, 10.0, 30.0, 22.0),
3122            make_word("B", 40.0, 10.0, 60.0, 22.0),
3123            make_word("C", 70.0, 10.0, 90.0, 22.0),
3124        ];
3125        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3126
3127        let h_edges: Vec<&Edge> = edges
3128            .iter()
3129            .filter(|e| e.orientation == Orientation::Horizontal)
3130            .collect();
3131        assert!(
3132            !h_edges.is_empty(),
3133            "Should produce horizontal edges from top alignment"
3134        );
3135
3136        // The horizontal edge at y≈10.0 should span from x0=10.0 to x1=90.0
3137        let h_edge = h_edges
3138            .iter()
3139            .find(|e| (e.top - 10.0).abs() < 1.0)
3140            .expect("Should have a horizontal edge near y=10");
3141        assert!((h_edge.x0 - 10.0).abs() < 0.01);
3142        assert!((h_edge.x1 - 90.0).abs() < 0.01);
3143    }
3144
3145    #[test]
3146    fn test_words_to_edges_stream_horizontal_bottom_alignment() {
3147        // 3 words with bottom at ~22.0
3148        let words = vec![
3149            make_word("A", 10.0, 10.0, 30.0, 22.0),
3150            make_word("B", 40.0, 12.0, 60.0, 22.0),
3151            make_word("C", 70.0, 8.0, 90.0, 22.0),
3152        ];
3153        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3154
3155        let h_edges: Vec<&Edge> = edges
3156            .iter()
3157            .filter(|e| e.orientation == Orientation::Horizontal)
3158            .collect();
3159        assert!(
3160            !h_edges.is_empty(),
3161            "Should produce horizontal edges from bottom alignment"
3162        );
3163
3164        let h_edge = h_edges
3165            .iter()
3166            .find(|e| (e.top - 22.0).abs() < 1.0)
3167            .expect("Should have a horizontal edge near y=22");
3168        assert!((h_edge.x0 - 10.0).abs() < 0.01);
3169        assert!((h_edge.x1 - 90.0).abs() < 0.01);
3170    }
3171
3172    #[test]
3173    fn test_words_to_edges_stream_threshold_filtering_vertical() {
3174        // Only 2 words with aligned x0, but min_words_vertical=3 → no vertical edge
3175        let words = vec![
3176            make_word("A", 10.0, 10.0, 30.0, 22.0),
3177            make_word("B", 10.0, 30.0, 35.0, 42.0),
3178        ];
3179        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3180
3181        let v_edges: Vec<&Edge> = edges
3182            .iter()
3183            .filter(|e| e.orientation == Orientation::Vertical)
3184            .collect();
3185        assert!(
3186            v_edges.is_empty(),
3187            "Should not produce vertical edges below threshold"
3188        );
3189    }
3190
3191    #[test]
3192    fn test_words_to_edges_stream_threshold_filtering_horizontal() {
3193        // Only 2 words with aligned top, but min_words_horizontal=3 → no horizontal edge
3194        let words = vec![
3195            make_word("A", 10.0, 10.0, 30.0, 22.0),
3196            make_word("B", 40.0, 10.0, 60.0, 22.0),
3197        ];
3198        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 3);
3199
3200        let h_edges: Vec<&Edge> = edges
3201            .iter()
3202            .filter(|e| e.orientation == Orientation::Horizontal)
3203            .collect();
3204        assert!(
3205            h_edges.is_empty(),
3206            "Should not produce horizontal edges below threshold"
3207        );
3208    }
3209
3210    #[test]
3211    fn test_words_to_edges_stream_tolerance_grouping() {
3212        // Words with x0 slightly different but within tolerance
3213        let words = vec![
3214            make_word("A", 10.0, 10.0, 30.0, 22.0),
3215            make_word("B", 11.5, 30.0, 35.0, 42.0),
3216            make_word("C", 12.0, 50.0, 40.0, 62.0),
3217        ];
3218        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3219
3220        let v_edges: Vec<&Edge> = edges
3221            .iter()
3222            .filter(|e| e.orientation == Orientation::Vertical)
3223            .collect();
3224        // Should group x0 values 10.0, 11.5, 12.0 into one cluster (all within 3.0 tolerance)
3225        assert!(
3226            !v_edges.is_empty(),
3227            "Should group nearby x0 values within tolerance"
3228        );
3229    }
3230
3231    #[test]
3232    fn test_words_to_edges_stream_no_grouping_beyond_tolerance() {
3233        // Words with x0 values far apart → no cluster of 3
3234        let words = vec![
3235            make_word("A", 10.0, 10.0, 30.0, 22.0),
3236            make_word("B", 50.0, 30.0, 70.0, 42.0),
3237            make_word("C", 90.0, 50.0, 110.0, 62.0),
3238        ];
3239        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3240
3241        let v_edges: Vec<&Edge> = edges
3242            .iter()
3243            .filter(|e| e.orientation == Orientation::Vertical)
3244            .collect();
3245        assert!(
3246            v_edges.is_empty(),
3247            "Should not group x0 values that are far apart"
3248        );
3249    }
3250
3251    #[test]
3252    fn test_stream_strategy_full_pipeline() {
3253        // Simulate a 3-column borderless table with 3 rows:
3254        //   Row 0: "A"  "B"  "C"  (top=10, bottom=22)
3255        //   Row 1: "D"  "E"  "F"  (top=30, bottom=42)
3256        //   Row 2: "G"  "H"  "I"  (top=50, bottom=62)
3257        // Columns: x0=10, x0=50, x0=90 → left edges
3258        //          x1=30, x1=70, x1=110 → right edges
3259        let words = vec![
3260            // Row 0
3261            make_word("A", 10.0, 10.0, 30.0, 22.0),
3262            make_word("B", 50.0, 10.0, 70.0, 22.0),
3263            make_word("C", 90.0, 10.0, 110.0, 22.0),
3264            // Row 1
3265            make_word("D", 10.0, 30.0, 30.0, 42.0),
3266            make_word("E", 50.0, 30.0, 70.0, 42.0),
3267            make_word("F", 90.0, 30.0, 110.0, 42.0),
3268            // Row 2
3269            make_word("G", 10.0, 50.0, 30.0, 62.0),
3270            make_word("H", 50.0, 50.0, 70.0, 62.0),
3271            make_word("I", 90.0, 50.0, 110.0, 62.0),
3272        ];
3273
3274        let settings = TableSettings {
3275            strategy: Strategy::Stream,
3276            min_words_vertical: 3,
3277            min_words_horizontal: 3,
3278            ..TableSettings::default()
3279        };
3280
3281        let finder = TableFinder::new_with_words(vec![], words, settings);
3282        let tables = finder.find_tables();
3283
3284        // Should detect at least one table
3285        assert!(!tables.is_empty(), "Stream strategy should detect a table");
3286
3287        // The table should have cells
3288        assert!(
3289            !tables[0].cells.is_empty(),
3290            "Table should have detected cells"
3291        );
3292    }
3293
3294    #[test]
3295    fn test_stream_strategy_with_no_words() {
3296        // Empty words → no synthetic edges → no tables
3297        let settings = TableSettings {
3298            strategy: Strategy::Stream,
3299            ..TableSettings::default()
3300        };
3301        let finder = TableFinder::new_with_words(vec![], vec![], settings);
3302        let tables = finder.find_tables();
3303        assert!(tables.is_empty());
3304    }
3305
3306    #[test]
3307    fn test_stream_edge_source_is_stream() {
3308        // All synthetic edges from Stream should have EdgeSource::Stream
3309        let words = vec![
3310            make_word("A", 10.0, 10.0, 30.0, 22.0),
3311            make_word("B", 10.0, 30.0, 35.0, 42.0),
3312            make_word("C", 10.0, 50.0, 40.0, 62.0),
3313        ];
3314        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3315        for edge in &edges {
3316            assert_eq!(
3317                edge.source,
3318                EdgeSource::Stream,
3319                "All stream edges should have EdgeSource::Stream"
3320            );
3321        }
3322    }
3323
3324    #[test]
3325    fn test_stream_strategy_min_words_horizontal_default() {
3326        // Default min_words_horizontal=1 means even a single row produces horizontal edges
3327        let words = vec![
3328            make_word("A", 10.0, 10.0, 30.0, 22.0),
3329            make_word("B", 50.0, 10.0, 70.0, 22.0),
3330            make_word("C", 90.0, 10.0, 110.0, 22.0),
3331        ];
3332        // min_words_horizontal=1 means each group of 1+ word at same y produces horizontal edges
3333        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3334
3335        let h_edges: Vec<&Edge> = edges
3336            .iter()
3337            .filter(|e| e.orientation == Orientation::Horizontal)
3338            .collect();
3339        assert!(
3340            !h_edges.is_empty(),
3341            "min_words_horizontal=1 should produce horizontal edges for 3 aligned words"
3342        );
3343    }
3344
3345    // --- US-038: Explicit strategy tests ---
3346
3347    #[test]
3348    fn test_explicit_lines_to_edges_basic() {
3349        // A 3x3 grid (3 horizontal + 3 vertical lines) should produce edges
3350        let explicit = ExplicitLines {
3351            horizontal_lines: vec![10.0, 30.0, 50.0],
3352            vertical_lines: vec![100.0, 200.0, 300.0],
3353        };
3354        let edges = explicit_lines_to_edges(&explicit);
3355
3356        // 3 horizontal + 3 vertical = 6 edges
3357        assert_eq!(edges.len(), 6);
3358
3359        let h_edges: Vec<&Edge> = edges
3360            .iter()
3361            .filter(|e| e.orientation == Orientation::Horizontal)
3362            .collect();
3363        let v_edges: Vec<&Edge> = edges
3364            .iter()
3365            .filter(|e| e.orientation == Orientation::Vertical)
3366            .collect();
3367        assert_eq!(h_edges.len(), 3);
3368        assert_eq!(v_edges.len(), 3);
3369
3370        // Horizontal edges span from min_x to max_x of vertical lines
3371        for h in &h_edges {
3372            assert_eq!(h.x0, 100.0);
3373            assert_eq!(h.x1, 300.0);
3374        }
3375        // Vertical edges span from min_y to max_y of horizontal lines
3376        for v in &v_edges {
3377            assert_eq!(v.top, 10.0);
3378            assert_eq!(v.bottom, 50.0);
3379        }
3380    }
3381
3382    #[test]
3383    fn test_explicit_lines_to_edges_empty_horizontal() {
3384        let explicit = ExplicitLines {
3385            horizontal_lines: vec![],
3386            vertical_lines: vec![100.0, 200.0],
3387        };
3388        let edges = explicit_lines_to_edges(&explicit);
3389        // No horizontal lines means no span for verticals either → no edges
3390        assert!(edges.is_empty());
3391    }
3392
3393    #[test]
3394    fn test_explicit_lines_to_edges_empty_vertical() {
3395        let explicit = ExplicitLines {
3396            horizontal_lines: vec![10.0, 20.0],
3397            vertical_lines: vec![],
3398        };
3399        let edges = explicit_lines_to_edges(&explicit);
3400        // No vertical lines means no span for horizontals either → no edges
3401        assert!(edges.is_empty());
3402    }
3403
3404    #[test]
3405    fn test_explicit_lines_to_edges_both_empty() {
3406        let explicit = ExplicitLines {
3407            horizontal_lines: vec![],
3408            vertical_lines: vec![],
3409        };
3410        let edges = explicit_lines_to_edges(&explicit);
3411        assert!(edges.is_empty());
3412    }
3413
3414    #[test]
3415    fn test_explicit_edge_source_is_explicit() {
3416        let explicit = ExplicitLines {
3417            horizontal_lines: vec![10.0, 50.0],
3418            vertical_lines: vec![100.0, 200.0],
3419        };
3420        let edges = explicit_lines_to_edges(&explicit);
3421        for edge in &edges {
3422            assert_eq!(edge.source, EdgeSource::Explicit);
3423        }
3424    }
3425
3426    #[test]
3427    fn test_explicit_grid_detection() {
3428        // A 3x3 grid should produce 4 cells
3429        let explicit = ExplicitLines {
3430            horizontal_lines: vec![0.0, 20.0, 40.0],
3431            vertical_lines: vec![0.0, 50.0, 100.0],
3432        };
3433        let settings = TableSettings {
3434            strategy: Strategy::Explicit,
3435            explicit_lines: Some(explicit),
3436            ..TableSettings::default()
3437        };
3438        let finder = TableFinder::new(vec![], settings);
3439        let tables = finder.find_tables();
3440
3441        assert_eq!(tables.len(), 1);
3442        assert_eq!(tables[0].cells.len(), 4);
3443        assert_eq!(tables[0].rows.len(), 2);
3444        assert_eq!(tables[0].columns.len(), 2);
3445    }
3446
3447    #[test]
3448    fn test_explicit_2x2_grid() {
3449        // A 2x2 grid (2 horizontal + 2 vertical) → 1 cell
3450        let explicit = ExplicitLines {
3451            horizontal_lines: vec![10.0, 50.0],
3452            vertical_lines: vec![100.0, 300.0],
3453        };
3454        let settings = TableSettings {
3455            strategy: Strategy::Explicit,
3456            explicit_lines: Some(explicit),
3457            ..TableSettings::default()
3458        };
3459        let finder = TableFinder::new(vec![], settings);
3460        let tables = finder.find_tables();
3461
3462        assert_eq!(tables.len(), 1);
3463        assert_eq!(tables[0].cells.len(), 1);
3464        let cell = &tables[0].cells[0];
3465        assert_eq!(cell.bbox.x0, 100.0);
3466        assert_eq!(cell.bbox.top, 10.0);
3467        assert_eq!(cell.bbox.x1, 300.0);
3468        assert_eq!(cell.bbox.bottom, 50.0);
3469    }
3470
3471    #[test]
3472    fn test_explicit_strategy_no_explicit_lines() {
3473        // Explicit strategy with no explicit_lines should return no tables
3474        let settings = TableSettings {
3475            strategy: Strategy::Explicit,
3476            explicit_lines: None,
3477            ..TableSettings::default()
3478        };
3479        let finder = TableFinder::new(vec![], settings);
3480        let tables = finder.find_tables();
3481        assert!(tables.is_empty());
3482    }
3483
3484    #[test]
3485    fn test_explicit_mixing_with_detected_edges() {
3486        // Detected edges form partial grid; explicit lines complete it
3487        // Detected: two vertical edges at x=0 and x=100
3488        let detected_edges = vec![make_v_edge(0.0, 0.0, 40.0), make_v_edge(100.0, 0.0, 40.0)];
3489        // Explicit: add horizontal lines at y=0 and y=40
3490        let explicit = ExplicitLines {
3491            horizontal_lines: vec![0.0, 40.0],
3492            vertical_lines: vec![], // no explicit verticals
3493        };
3494        let settings = TableSettings {
3495            strategy: Strategy::Explicit,
3496            explicit_lines: Some(explicit),
3497            ..TableSettings::default()
3498        };
3499        let finder = TableFinder::new(detected_edges, settings);
3500        let tables = finder.find_tables();
3501
3502        // The explicit horizontal lines + detected vertical edges form a complete grid → 1 cell
3503        assert_eq!(tables.len(), 1);
3504        assert_eq!(tables[0].cells.len(), 1);
3505    }
3506
3507    #[test]
3508    fn test_explicit_single_line_each() {
3509        // Only 1 horizontal + 1 vertical → no cells (need at least 2×2 grid)
3510        let explicit = ExplicitLines {
3511            horizontal_lines: vec![10.0],
3512            vertical_lines: vec![100.0],
3513        };
3514        let settings = TableSettings {
3515            strategy: Strategy::Explicit,
3516            explicit_lines: Some(explicit),
3517            ..TableSettings::default()
3518        };
3519        let finder = TableFinder::new(vec![], settings);
3520        let tables = finder.find_tables();
3521        assert!(tables.is_empty());
3522    }
3523
3524    #[test]
3525    fn test_explicit_unsorted_coordinates() {
3526        // Coordinates provided in unsorted order should still work
3527        let explicit = ExplicitLines {
3528            horizontal_lines: vec![40.0, 0.0, 20.0],
3529            vertical_lines: vec![100.0, 0.0, 50.0],
3530        };
3531        let settings = TableSettings {
3532            strategy: Strategy::Explicit,
3533            explicit_lines: Some(explicit),
3534            ..TableSettings::default()
3535        };
3536        let finder = TableFinder::new(vec![], settings);
3537        let tables = finder.find_tables();
3538
3539        assert_eq!(tables.len(), 1);
3540        assert_eq!(tables[0].cells.len(), 4); // 3x3 grid → 4 cells
3541    }
3542
3543    // --- US-069 tests: TableFinderDebug ---
3544
3545    #[test]
3546    fn test_find_tables_debug_returns_intermediate_results() {
3547        // Build a simple 2x2 grid
3548        let edges = vec![
3549            // Horizontal edges
3550            Edge {
3551                x0: 0.0,
3552                top: 0.0,
3553                x1: 100.0,
3554                bottom: 0.0,
3555                orientation: Orientation::Horizontal,
3556                source: EdgeSource::Line,
3557            },
3558            Edge {
3559                x0: 0.0,
3560                top: 50.0,
3561                x1: 100.0,
3562                bottom: 50.0,
3563                orientation: Orientation::Horizontal,
3564                source: EdgeSource::Line,
3565            },
3566            Edge {
3567                x0: 0.0,
3568                top: 100.0,
3569                x1: 100.0,
3570                bottom: 100.0,
3571                orientation: Orientation::Horizontal,
3572                source: EdgeSource::Line,
3573            },
3574            // Vertical edges
3575            Edge {
3576                x0: 0.0,
3577                top: 0.0,
3578                x1: 0.0,
3579                bottom: 100.0,
3580                orientation: Orientation::Vertical,
3581                source: EdgeSource::Line,
3582            },
3583            Edge {
3584                x0: 50.0,
3585                top: 0.0,
3586                x1: 50.0,
3587                bottom: 100.0,
3588                orientation: Orientation::Vertical,
3589                source: EdgeSource::Line,
3590            },
3591            Edge {
3592                x0: 100.0,
3593                top: 0.0,
3594                x1: 100.0,
3595                bottom: 100.0,
3596                orientation: Orientation::Vertical,
3597                source: EdgeSource::Line,
3598            },
3599        ];
3600
3601        let finder = TableFinder::new(edges, TableSettings::default());
3602        let debug = finder.find_tables_debug();
3603
3604        // Should have edges from the pipeline
3605        assert!(!debug.edges.is_empty(), "Should have processed edges");
3606        // Should have intersections (6 edges in a grid = 9 intersections)
3607        assert!(!debug.intersections.is_empty(), "Should have intersections");
3608        // Should have cells
3609        assert!(!debug.cells.is_empty(), "Should have cells");
3610        // Should have tables
3611        assert!(!debug.tables.is_empty(), "Should have tables");
3612        // The tables from debug should match find_tables()
3613        let tables = finder.find_tables();
3614        assert_eq!(debug.tables.len(), tables.len());
3615    }
3616
3617    #[test]
3618    fn test_find_tables_debug_no_edges() {
3619        let finder = TableFinder::new(vec![], TableSettings::default());
3620        let debug = finder.find_tables_debug();
3621
3622        assert!(debug.edges.is_empty());
3623        assert!(debug.intersections.is_empty());
3624        assert!(debug.cells.is_empty());
3625        assert!(debug.tables.is_empty());
3626    }
3627
3628    #[test]
3629    fn test_find_tables_debug_struct_fields() {
3630        let edges = vec![
3631            Edge {
3632                x0: 0.0,
3633                top: 0.0,
3634                x1: 100.0,
3635                bottom: 0.0,
3636                orientation: Orientation::Horizontal,
3637                source: EdgeSource::Line,
3638            },
3639            Edge {
3640                x0: 0.0,
3641                top: 0.0,
3642                x1: 0.0,
3643                bottom: 100.0,
3644                orientation: Orientation::Vertical,
3645                source: EdgeSource::Line,
3646            },
3647        ];
3648
3649        let finder = TableFinder::new(edges, TableSettings::default());
3650        let debug = finder.find_tables_debug();
3651
3652        // Should have edges (at least the 2 input edges after processing)
3653        assert!(!debug.edges.is_empty());
3654        // Should have at least one intersection (where the edges meet)
3655        assert!(!debug.intersections.is_empty());
3656        assert_eq!(debug.intersections[0].x, 0.0);
3657        assert_eq!(debug.intersections[0].y, 0.0);
3658    }
3659
3660    // ---- TableQuality / accuracy / whitespace tests ----
3661
3662    #[test]
3663    fn test_accuracy_all_cells_filled() {
3664        let table = Table {
3665            bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
3666            cells: vec![
3667                Cell {
3668                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3669                    text: Some("A".into()),
3670                },
3671                Cell {
3672                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3673                    text: Some("B".into()),
3674                },
3675                Cell {
3676                    bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
3677                    text: Some("C".into()),
3678                },
3679                Cell {
3680                    bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
3681                    text: Some("D".into()),
3682                },
3683            ],
3684            rows: vec![],
3685            columns: vec![],
3686        };
3687        assert!((table.accuracy() - 1.0).abs() < f64::EPSILON);
3688    }
3689
3690    #[test]
3691    fn test_accuracy_half_empty() {
3692        let table = Table {
3693            bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
3694            cells: vec![
3695                Cell {
3696                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3697                    text: Some("A".into()),
3698                },
3699                Cell {
3700                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3701                    text: None,
3702                },
3703                Cell {
3704                    bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
3705                    text: Some("C".into()),
3706                },
3707                Cell {
3708                    bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
3709                    text: None,
3710                },
3711            ],
3712            rows: vec![],
3713            columns: vec![],
3714        };
3715        assert!((table.accuracy() - 0.5).abs() < f64::EPSILON);
3716    }
3717
3718    #[test]
3719    fn test_accuracy_all_empty() {
3720        let table = Table {
3721            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3722            cells: vec![
3723                Cell {
3724                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3725                    text: None,
3726                },
3727                Cell {
3728                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3729                    text: None,
3730                },
3731            ],
3732            rows: vec![],
3733            columns: vec![],
3734        };
3735        assert!((table.accuracy()).abs() < f64::EPSILON);
3736    }
3737
3738    #[test]
3739    fn test_accuracy_whitespace_only_treated_as_empty() {
3740        let table = Table {
3741            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3742            cells: vec![
3743                Cell {
3744                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3745                    text: Some("A".into()),
3746                },
3747                Cell {
3748                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3749                    text: Some("  ".into()),
3750                },
3751            ],
3752            rows: vec![],
3753            columns: vec![],
3754        };
3755        assert!((table.accuracy() - 0.5).abs() < f64::EPSILON);
3756    }
3757
3758    #[test]
3759    fn test_accuracy_no_cells() {
3760        let table = Table {
3761            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3762            cells: vec![],
3763            rows: vec![],
3764            columns: vec![],
3765        };
3766        assert!((table.accuracy()).abs() < f64::EPSILON);
3767    }
3768
3769    #[test]
3770    fn test_whitespace_no_whitespace() {
3771        let table = Table {
3772            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3773            cells: vec![
3774                Cell {
3775                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3776                    text: Some("ABC".into()),
3777                },
3778                Cell {
3779                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3780                    text: Some("DEF".into()),
3781                },
3782            ],
3783            rows: vec![],
3784            columns: vec![],
3785        };
3786        assert!((table.whitespace()).abs() < f64::EPSILON);
3787    }
3788
3789    #[test]
3790    fn test_whitespace_all_spaces() {
3791        let table = Table {
3792            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3793            cells: vec![Cell {
3794                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3795                text: Some("   ".into()),
3796            }],
3797            rows: vec![],
3798            columns: vec![],
3799        };
3800        assert!((table.whitespace() - 1.0).abs() < f64::EPSILON);
3801    }
3802
3803    #[test]
3804    fn test_whitespace_mixed() {
3805        // "A B" = 1 whitespace / 3 chars = 0.333...
3806        // "CD"  = 0 whitespace / 2 chars = 0.0
3807        // average = (0.333... + 0.0) / 2 = 0.1666...
3808        let table = Table {
3809            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3810            cells: vec![
3811                Cell {
3812                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3813                    text: Some("A B".into()),
3814                },
3815                Cell {
3816                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3817                    text: Some("CD".into()),
3818                },
3819            ],
3820            rows: vec![],
3821            columns: vec![],
3822        };
3823        let expected = (1.0 / 3.0 + 0.0) / 2.0;
3824        assert!((table.whitespace() - expected).abs() < 1e-10);
3825    }
3826
3827    #[test]
3828    fn test_whitespace_skips_empty_cells() {
3829        // Only cells with text contribute to the whitespace average
3830        let table = Table {
3831            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3832            cells: vec![
3833                Cell {
3834                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3835                    text: Some("ABC".into()),
3836                },
3837                Cell {
3838                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3839                    text: None,
3840                },
3841            ],
3842            rows: vec![],
3843            columns: vec![],
3844        };
3845        assert!((table.whitespace()).abs() < f64::EPSILON);
3846    }
3847
3848    #[test]
3849    fn test_whitespace_no_text_cells() {
3850        let table = Table {
3851            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
3852            cells: vec![
3853                Cell {
3854                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3855                    text: None,
3856                },
3857                Cell {
3858                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3859                    text: None,
3860                },
3861            ],
3862            rows: vec![],
3863            columns: vec![],
3864        };
3865        assert!((table.whitespace()).abs() < f64::EPSILON);
3866    }
3867
3868    #[test]
3869    fn test_quality_combined() {
3870        let table = Table {
3871            bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
3872            cells: vec![
3873                Cell {
3874                    bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
3875                    text: Some("Hello".into()),
3876                },
3877                Cell {
3878                    bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
3879                    text: None,
3880                },
3881                Cell {
3882                    bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
3883                    text: Some("World".into()),
3884                },
3885                Cell {
3886                    bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
3887                    text: Some("Test".into()),
3888                },
3889            ],
3890            rows: vec![],
3891            columns: vec![],
3892        };
3893        let q = table.quality();
3894        assert!((q.accuracy - 0.75).abs() < f64::EPSILON);
3895        assert!((q.whitespace).abs() < f64::EPSILON);
3896    }
3897
3898    #[test]
3899    fn test_min_accuracy_filtering() {
3900        // Test that TableSettings::min_accuracy exists and defaults to None
3901        let settings = TableSettings::default();
3902        assert_eq!(settings.min_accuracy, None);
3903
3904        // Test that min_accuracy can be set
3905        let settings = TableSettings {
3906            min_accuracy: Some(0.5),
3907            ..TableSettings::default()
3908        };
3909        assert_eq!(settings.min_accuracy, Some(0.5));
3910    }
3911}