Skip to main content

pdfplumber_core/
table.rs

1//! Table detection types and pipeline.
2//!
3//! This module provides the configuration types, data structures, and orchestration
4//! for detecting tables in PDF pages using Lattice, Stream, or Explicit strategies.
5
6use crate::edges::{Edge, EdgeSource};
7use crate::geometry::{BBox, Orientation};
8use crate::text::Char;
9use crate::words::{Word, WordExtractor, WordOptions};
10
11/// Strategy for table detection.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum Strategy {
15    /// Detect tables using visible lines and rect edges.
16    #[default]
17    Lattice,
18    /// Detect tables using only visible lines (no rect edges).
19    LatticeStrict,
20    /// Detect tables from text alignment patterns (no visible borders needed).
21    Stream,
22    /// Detect tables using user-provided line coordinates.
23    Explicit,
24}
25
26/// Configuration for table detection.
27///
28/// All tolerance values default to 3.0, matching Python pdfplumber defaults.
29#[derive(Debug, Clone, PartialEq)]
30#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31pub struct TableSettings {
32    /// Table detection strategy.
33    pub strategy: Strategy,
34    /// General snap tolerance for aligning nearby edges.
35    pub snap_tolerance: f64,
36    /// Snap tolerance for horizontal alignment.
37    pub snap_x_tolerance: f64,
38    /// Snap tolerance for vertical alignment.
39    pub snap_y_tolerance: f64,
40    /// General join tolerance for merging collinear edges.
41    pub join_tolerance: f64,
42    /// Join tolerance for horizontal edges.
43    pub join_x_tolerance: f64,
44    /// Join tolerance for vertical edges.
45    pub join_y_tolerance: f64,
46    /// Minimum edge length to consider for table detection.
47    pub edge_min_length: f64,
48    /// Minimum number of words sharing a vertical alignment for Stream strategy.
49    pub min_words_vertical: usize,
50    /// Minimum number of words sharing a horizontal alignment for Stream strategy.
51    pub min_words_horizontal: usize,
52    /// General text tolerance for assigning text to cells.
53    pub text_tolerance: f64,
54    /// Text tolerance along x-axis.
55    pub text_x_tolerance: f64,
56    /// Text tolerance along y-axis.
57    pub text_y_tolerance: f64,
58    /// General intersection tolerance for detecting edge crossings.
59    pub intersection_tolerance: f64,
60    /// Intersection tolerance along x-axis.
61    pub intersection_x_tolerance: f64,
62    /// Intersection tolerance along y-axis.
63    pub intersection_y_tolerance: f64,
64    /// Optional explicit line coordinates for Explicit strategy.
65    pub explicit_lines: Option<ExplicitLines>,
66}
67
68impl Default for TableSettings {
69    fn default() -> Self {
70        Self {
71            strategy: Strategy::default(),
72            snap_tolerance: 3.0,
73            snap_x_tolerance: 3.0,
74            snap_y_tolerance: 3.0,
75            join_tolerance: 3.0,
76            join_x_tolerance: 3.0,
77            join_y_tolerance: 3.0,
78            edge_min_length: 3.0,
79            min_words_vertical: 3,
80            min_words_horizontal: 1,
81            text_tolerance: 3.0,
82            text_x_tolerance: 3.0,
83            text_y_tolerance: 3.0,
84            intersection_tolerance: 3.0,
85            intersection_x_tolerance: 3.0,
86            intersection_y_tolerance: 3.0,
87            explicit_lines: None,
88        }
89    }
90}
91
92/// User-provided line coordinates for Explicit strategy.
93#[derive(Debug, Clone, PartialEq)]
94#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
95pub struct ExplicitLines {
96    /// Y-coordinates for horizontal lines.
97    pub horizontal_lines: Vec<f64>,
98    /// X-coordinates for vertical lines.
99    pub vertical_lines: Vec<f64>,
100}
101
102/// A detected table cell.
103#[derive(Debug, Clone, PartialEq)]
104#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
105pub struct Cell {
106    /// Bounding box of the cell.
107    pub bbox: BBox,
108    /// Text content within the cell, if any.
109    pub text: Option<String>,
110}
111
112/// A detected table.
113#[derive(Debug, Clone, PartialEq)]
114#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
115pub struct Table {
116    /// Bounding box enclosing the entire table.
117    pub bbox: BBox,
118    /// All cells in the table.
119    pub cells: Vec<Cell>,
120    /// Cells organized into rows (top-to-bottom, left-to-right within each row).
121    pub rows: Vec<Vec<Cell>>,
122    /// Cells organized into columns (left-to-right, top-to-bottom within each column).
123    pub columns: Vec<Vec<Cell>>,
124}
125
126/// Snap nearby parallel edges to aligned positions.
127///
128/// Groups edges by orientation and clusters them along the perpendicular axis.
129/// For horizontal edges, clusters by y-coordinate within `snap_y_tolerance`.
130/// For vertical edges, clusters by x-coordinate within `snap_x_tolerance`.
131/// Clustered edges have their perpendicular coordinates replaced with the cluster mean.
132/// Diagonal edges pass through unchanged.
133///
134/// This does **not** merge edges — it only aligns their positions.
135pub fn snap_edges(edges: Vec<Edge>, snap_x_tolerance: f64, snap_y_tolerance: f64) -> Vec<Edge> {
136    let mut result = Vec::with_capacity(edges.len());
137    let mut horizontals: Vec<Edge> = Vec::new();
138    let mut verticals: Vec<Edge> = Vec::new();
139
140    for edge in edges {
141        match edge.orientation {
142            Orientation::Horizontal => horizontals.push(edge),
143            Orientation::Vertical => verticals.push(edge),
144            Orientation::Diagonal => result.push(edge),
145        }
146    }
147
148    // Snap horizontal edges: cluster by y-coordinate (top/bottom)
149    snap_group(
150        &mut horizontals,
151        snap_y_tolerance,
152        |e| e.top,
153        |e, v| {
154            e.top = v;
155            e.bottom = v;
156        },
157    );
158    result.extend(horizontals);
159
160    // Snap vertical edges: cluster by x-coordinate (x0/x1)
161    snap_group(
162        &mut verticals,
163        snap_x_tolerance,
164        |e| e.x0,
165        |e, v| {
166            e.x0 = v;
167            e.x1 = v;
168        },
169    );
170    result.extend(verticals);
171
172    result
173}
174
175/// Cluster edges along a single axis and snap each cluster to its mean.
176fn snap_group<F, G>(edges: &mut [Edge], tolerance: f64, key: F, mut set: G)
177where
178    F: Fn(&Edge) -> f64,
179    G: FnMut(&mut Edge, f64),
180{
181    if edges.is_empty() {
182        return;
183    }
184
185    // Sort by the perpendicular coordinate
186    edges.sort_by(|a, b| key(a).partial_cmp(&key(b)).unwrap());
187
188    // Build clusters of consecutive edges within tolerance
189    let mut cluster_start = 0;
190    for i in 1..=edges.len() {
191        let end_of_cluster =
192            i == edges.len() || (key(&edges[i]) - key(&edges[cluster_start])).abs() > tolerance;
193        if end_of_cluster {
194            // Compute mean of the cluster
195            let sum: f64 = (cluster_start..i).map(|j| key(&edges[j])).sum();
196            let mean = sum / (i - cluster_start) as f64;
197            for edge in &mut edges[cluster_start..i] {
198                set(edge, mean);
199            }
200            cluster_start = i;
201        }
202    }
203}
204
205/// Merge overlapping or adjacent collinear edge segments.
206///
207/// Groups edges by orientation and collinear position, then merges segments
208/// within each group when their gap is within the join tolerance.
209/// For horizontal edges, segments on the same y-line merge when the gap along x
210/// is within `join_x_tolerance`. For vertical edges, segments on the same x-line
211/// merge when the gap along y is within `join_y_tolerance`.
212/// Diagonal edges pass through unchanged.
213pub fn join_edge_group(
214    edges: Vec<Edge>,
215    join_x_tolerance: f64,
216    join_y_tolerance: f64,
217) -> Vec<Edge> {
218    let mut result: Vec<Edge> = Vec::new();
219    let mut horizontals: Vec<Edge> = Vec::new();
220    let mut verticals: Vec<Edge> = Vec::new();
221
222    for edge in edges {
223        match edge.orientation {
224            Orientation::Horizontal => horizontals.push(edge),
225            Orientation::Vertical => verticals.push(edge),
226            Orientation::Diagonal => result.push(edge),
227        }
228    }
229
230    // Join horizontal edges: group by y-coordinate, merge along x-axis
231    result.extend(join_collinear(
232        horizontals,
233        |e| e.top,
234        |e| (e.x0, e.x1),
235        |proto, start, end| Edge {
236            x0: start,
237            top: proto.top,
238            x1: end,
239            bottom: proto.bottom,
240            orientation: proto.orientation,
241            source: proto.source,
242        },
243        join_x_tolerance,
244    ));
245
246    // Join vertical edges: group by x-coordinate, merge along y-axis
247    result.extend(join_collinear(
248        verticals,
249        |e| e.x0,
250        |e| (e.top, e.bottom),
251        |proto, start, end| Edge {
252            x0: proto.x0,
253            top: start,
254            x1: proto.x1,
255            bottom: end,
256            orientation: proto.orientation,
257            source: proto.source,
258        },
259        join_y_tolerance,
260    ));
261
262    result
263}
264
265/// Group edges by a collinear key, then merge overlapping/adjacent segments within each group.
266fn join_collinear<K, S, B>(
267    mut edges: Vec<Edge>,
268    key: K,
269    span: S,
270    build: B,
271    tolerance: f64,
272) -> Vec<Edge>
273where
274    K: Fn(&Edge) -> f64,
275    S: Fn(&Edge) -> (f64, f64),
276    B: Fn(&Edge, f64, f64) -> Edge,
277{
278    if edges.is_empty() {
279        return Vec::new();
280    }
281
282    // Sort by collinear key first, then by span start
283    edges.sort_by(|a, b| {
284        key(a)
285            .partial_cmp(&key(b))
286            .unwrap()
287            .then_with(|| span(a).0.partial_cmp(&span(b).0).unwrap())
288    });
289
290    let mut result = Vec::new();
291    let mut i = 0;
292
293    while i < edges.len() {
294        // Collect group of edges on the same collinear line (exact match after snapping)
295        let group_key = key(&edges[i]);
296        let mut j = i + 1;
297        while j < edges.len() && (key(&edges[j]) - group_key).abs() < 1e-9 {
298            j += 1;
299        }
300
301        // Merge segments within this collinear group
302        let (mut cur_start, mut cur_end) = span(&edges[i]);
303        let mut proto_idx = i;
304
305        for k in (i + 1)..j {
306            let (s, e) = span(&edges[k]);
307            if s <= cur_end + tolerance {
308                // Overlapping or within tolerance — extend
309                if e > cur_end {
310                    cur_end = e;
311                }
312            } else {
313                // Gap too large — emit current merged edge, start new one
314                result.push(build(&edges[proto_idx], cur_start, cur_end));
315                cur_start = s;
316                cur_end = e;
317                proto_idx = k;
318            }
319        }
320        result.push(build(&edges[proto_idx], cur_start, cur_end));
321
322        i = j;
323    }
324
325    result
326}
327
328/// An intersection point between horizontal and vertical edges.
329#[derive(Debug, Clone, PartialEq)]
330#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
331pub struct Intersection {
332    /// X coordinate of the intersection point.
333    pub x: f64,
334    /// Y coordinate of the intersection point.
335    pub y: f64,
336}
337
338/// Find all intersection points between horizontal and vertical edges.
339///
340/// An intersection exists when a vertical edge's x-coordinate falls within a
341/// horizontal edge's x-span (within `x_tolerance`) AND the horizontal edge's
342/// y-coordinate falls within the vertical edge's y-span (within `y_tolerance`).
343///
344/// Only considers actual overlapping segments, not infinite line extensions.
345/// Diagonal edges are ignored.
346pub fn edges_to_intersections(
347    edges: &[Edge],
348    x_tolerance: f64,
349    y_tolerance: f64,
350) -> Vec<Intersection> {
351    let horizontals: Vec<&Edge> = edges
352        .iter()
353        .filter(|e| e.orientation == Orientation::Horizontal)
354        .collect();
355    let verticals: Vec<&Edge> = edges
356        .iter()
357        .filter(|e| e.orientation == Orientation::Vertical)
358        .collect();
359
360    let mut intersections = Vec::new();
361
362    for h in &horizontals {
363        let h_y = h.top; // horizontal edge: top == bottom
364        for v in &verticals {
365            let v_x = v.x0; // vertical edge: x0 == x1
366
367            // Check that the vertical's x is within the horizontal's x-span (with tolerance)
368            // and the horizontal's y is within the vertical's y-span (with tolerance)
369            if v_x >= h.x0 - x_tolerance
370                && v_x <= h.x1 + x_tolerance
371                && h_y >= v.top - y_tolerance
372                && h_y <= v.bottom + y_tolerance
373            {
374                intersections.push(Intersection { x: v_x, y: h_y });
375            }
376        }
377    }
378
379    // Sort and deduplicate intersection points at the same location
380    intersections.sort_by(|a, b| {
381        a.x.partial_cmp(&b.x)
382            .unwrap()
383            .then_with(|| a.y.partial_cmp(&b.y).unwrap())
384    });
385    intersections.dedup_by(|a, b| (a.x - b.x).abs() < 1e-9 && (a.y - b.y).abs() < 1e-9);
386
387    intersections
388}
389
390/// Construct rectangular cells from a grid of intersection points.
391///
392/// Groups intersection points into a grid of unique y-rows and x-columns (sorted).
393/// For each pair of adjacent rows and adjacent columns, checks if all 4 corner
394/// intersections exist. If so, creates a [`Cell`] with the corresponding bounding box.
395/// Missing corners are skipped gracefully.
396pub fn intersections_to_cells(intersections: &[Intersection]) -> Vec<Cell> {
397    if intersections.is_empty() {
398        return Vec::new();
399    }
400
401    // Collect unique x and y coordinates (sorted)
402    let mut xs: Vec<f64> = Vec::new();
403    let mut ys: Vec<f64> = Vec::new();
404
405    for pt in intersections {
406        if !xs.iter().any(|&x| (x - pt.x).abs() < 1e-9) {
407            xs.push(pt.x);
408        }
409        if !ys.iter().any(|&y| (y - pt.y).abs() < 1e-9) {
410            ys.push(pt.y);
411        }
412    }
413
414    xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
415    ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
416
417    // Helper to check if an intersection exists at (x, y)
418    let has_point = |x: f64, y: f64| -> bool {
419        intersections
420            .iter()
421            .any(|pt| (pt.x - x).abs() < 1e-9 && (pt.y - y).abs() < 1e-9)
422    };
423
424    let mut cells = Vec::new();
425
426    // For each pair of adjacent rows and columns, check all 4 corners
427    for yi in 0..ys.len().saturating_sub(1) {
428        for xi in 0..xs.len().saturating_sub(1) {
429            let x0 = xs[xi];
430            let x1 = xs[xi + 1];
431            let top = ys[yi];
432            let bottom = ys[yi + 1];
433
434            if has_point(x0, top)
435                && has_point(x1, top)
436                && has_point(x0, bottom)
437                && has_point(x1, bottom)
438            {
439                cells.push(Cell {
440                    bbox: BBox::new(x0, top, x1, bottom),
441                    text: None,
442                });
443            }
444        }
445    }
446
447    cells
448}
449
450/// Group adjacent cells into distinct tables.
451///
452/// Cells that share an edge (same x-boundary or y-boundary) are grouped into
453/// the same table using a union-find algorithm. Each table receives:
454/// - A `bbox` that is the union of all its cells' bounding boxes
455/// - `rows`: cells organized by y-coordinate (top-to-bottom), sorted left-to-right within each row
456/// - `columns`: cells organized by x-coordinate (left-to-right), sorted top-to-bottom within each column
457pub fn cells_to_tables(cells: Vec<Cell>) -> Vec<Table> {
458    if cells.is_empty() {
459        return Vec::new();
460    }
461
462    let n = cells.len();
463
464    // Union-Find to group cells sharing edges
465    let mut parent: Vec<usize> = (0..n).collect();
466
467    fn find(parent: &mut [usize], mut i: usize) -> usize {
468        while parent[i] != i {
469            parent[i] = parent[parent[i]]; // path compression
470            i = parent[i];
471        }
472        i
473    }
474
475    fn union(parent: &mut [usize], a: usize, b: usize) {
476        let ra = find(parent, a);
477        let rb = find(parent, b);
478        if ra != rb {
479            parent[rb] = ra;
480        }
481    }
482
483    // Two cells share an edge if they have a common boundary segment:
484    // - Same x0/x1 boundary AND overlapping y-ranges, or
485    // - Same top/bottom boundary AND overlapping x-ranges
486    for i in 0..n {
487        for j in (i + 1)..n {
488            if cells_share_edge(&cells[i], &cells[j]) {
489                union(&mut parent, i, j);
490            }
491        }
492    }
493
494    // Group cells by their root
495    let mut groups: std::collections::HashMap<usize, Vec<usize>> = std::collections::HashMap::new();
496    for i in 0..n {
497        let root = find(&mut parent, i);
498        groups.entry(root).or_default().push(i);
499    }
500
501    // Build a Table from each group
502    let mut tables: Vec<Table> = groups
503        .into_values()
504        .map(|indices| {
505            let group_cells: Vec<Cell> = indices.iter().map(|&i| cells[i].clone()).collect();
506
507            // Compute union bbox
508            let mut bbox = group_cells[0].bbox;
509            for cell in &group_cells[1..] {
510                bbox = bbox.union(&cell.bbox);
511            }
512
513            // Organize into rows: group by top coordinate, sort left-to-right
514            let mut row_map: std::collections::BTreeMap<i64, Vec<Cell>> =
515                std::collections::BTreeMap::new();
516            for cell in &group_cells {
517                let key = float_key(cell.bbox.top);
518                row_map.entry(key).or_default().push(cell.clone());
519            }
520            let rows: Vec<Vec<Cell>> = row_map
521                .into_values()
522                .map(|mut row| {
523                    row.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
524                    row
525                })
526                .collect();
527
528            // Organize into columns: group by x0 coordinate, sort top-to-bottom
529            let mut col_map: std::collections::BTreeMap<i64, Vec<Cell>> =
530                std::collections::BTreeMap::new();
531            for cell in &group_cells {
532                let key = float_key(cell.bbox.x0);
533                col_map.entry(key).or_default().push(cell.clone());
534            }
535            let columns: Vec<Vec<Cell>> = col_map
536                .into_values()
537                .map(|mut col| {
538                    col.sort_by(|a, b| a.bbox.top.partial_cmp(&b.bbox.top).unwrap());
539                    col
540                })
541                .collect();
542
543            Table {
544                bbox,
545                cells: group_cells,
546                rows,
547                columns,
548            }
549        })
550        .collect();
551
552    // Sort tables by position for deterministic output (top-to-bottom, left-to-right)
553    tables.sort_by(|a, b| {
554        a.bbox
555            .top
556            .partial_cmp(&b.bbox.top)
557            .unwrap()
558            .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
559    });
560
561    tables
562}
563
564/// Check if two cells share an edge (a common boundary segment).
565fn cells_share_edge(a: &Cell, b: &Cell) -> bool {
566    let eps = 1e-6;
567
568    // Check for shared vertical boundary (one cell's x1 == other's x0 or vice versa)
569    // with overlapping y-ranges
570    let shared_vertical = ((a.bbox.x1 - b.bbox.x0).abs() < eps
571        || (a.bbox.x0 - b.bbox.x1).abs() < eps)
572        && a.bbox.top < b.bbox.bottom + eps
573        && b.bbox.top < a.bbox.bottom + eps;
574
575    // Check for shared horizontal boundary (one cell's bottom == other's top or vice versa)
576    // with overlapping x-ranges
577    let shared_horizontal = ((a.bbox.bottom - b.bbox.top).abs() < eps
578        || (a.bbox.top - b.bbox.bottom).abs() < eps)
579        && a.bbox.x0 < b.bbox.x1 + eps
580        && b.bbox.x0 < a.bbox.x1 + eps;
581
582    shared_vertical || shared_horizontal
583}
584
585/// Convert a float to an integer key for grouping (multiply by 1000 to preserve 3 decimal places).
586fn float_key(v: f64) -> i64 {
587    (v * 1000.0).round() as i64
588}
589
590/// Compute the length of an edge along its primary axis.
591fn edge_length(edge: &Edge) -> f64 {
592    let dx = edge.x1 - edge.x0;
593    let dy = edge.bottom - edge.top;
594    (dx * dx + dy * dy).sqrt()
595}
596
597/// Extract text content for each cell by finding characters within the cell bbox.
598///
599/// For each cell, finds all [`Char`]s whose bbox center point falls within the
600/// cell's bounding box. Characters are grouped into words using [`WordExtractor`],
601/// then joined into text with spaces between words on the same line and newlines
602/// between lines.
603///
604/// Cells with no matching characters get `text = None`.
605pub fn extract_text_for_cells(cells: &mut [Cell], chars: &[Char]) {
606    let options = WordOptions::default();
607
608    for cell in cells.iter_mut() {
609        // Find chars whose bbox center falls within this cell
610        let cell_chars: Vec<Char> = chars
611            .iter()
612            .filter(|ch| {
613                let cx = (ch.bbox.x0 + ch.bbox.x1) / 2.0;
614                let cy = (ch.bbox.top + ch.bbox.bottom) / 2.0;
615                cx >= cell.bbox.x0
616                    && cx <= cell.bbox.x1
617                    && cy >= cell.bbox.top
618                    && cy <= cell.bbox.bottom
619            })
620            .cloned()
621            .collect();
622
623        if cell_chars.is_empty() {
624            cell.text = None;
625            continue;
626        }
627
628        // Group chars into words
629        let words = WordExtractor::extract(&cell_chars, &options);
630        if words.is_empty() {
631            cell.text = None;
632            continue;
633        }
634
635        // Group words into lines by y-coordinate proximity
636        let mut sorted_words: Vec<&crate::words::Word> = words.iter().collect();
637        sorted_words.sort_by(|a, b| {
638            a.bbox
639                .top
640                .partial_cmp(&b.bbox.top)
641                .unwrap()
642                .then_with(|| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
643        });
644
645        let mut lines: Vec<Vec<&crate::words::Word>> = Vec::new();
646        for word in &sorted_words {
647            let added = lines.last_mut().and_then(|line| {
648                let last_top = line[0].bbox.top;
649                if (word.bbox.top - last_top).abs() <= options.y_tolerance {
650                    line.push(word);
651                    Some(())
652                } else {
653                    None
654                }
655            });
656            if added.is_none() {
657                lines.push(vec![word]);
658            }
659        }
660
661        // Join: words within a line separated by spaces, lines separated by newlines
662        let text: String = lines
663            .iter()
664            .map(|line| {
665                line.iter()
666                    .map(|w| w.text.as_str())
667                    .collect::<Vec<_>>()
668                    .join(" ")
669            })
670            .collect::<Vec<_>>()
671            .join("\n");
672
673        cell.text = Some(text);
674    }
675}
676
677/// Generate synthetic edges from text alignment patterns for the Stream strategy.
678///
679/// Analyzes word positions to detect vertical and horizontal text alignments:
680/// - Words sharing similar x0 or x1 coordinates → synthetic vertical edges
681/// - Words sharing similar top or bottom coordinates → synthetic horizontal edges
682///
683/// Groups must meet the minimum word count thresholds (`min_words_vertical` for
684/// vertical edges, `min_words_horizontal` for horizontal edges) to produce an edge.
685///
686/// Each synthetic edge spans the full extent of the aligned words in the
687/// perpendicular direction.
688pub fn words_to_edges_stream(
689    words: &[Word],
690    text_x_tolerance: f64,
691    text_y_tolerance: f64,
692    min_words_vertical: usize,
693    min_words_horizontal: usize,
694) -> Vec<Edge> {
695    if words.is_empty() {
696        return Vec::new();
697    }
698
699    let mut edges = Vec::new();
700
701    // Vertical edges from x0 alignment (left edges of words)
702    edges.extend(cluster_words_to_edges(
703        words,
704        |w| w.bbox.x0,
705        text_x_tolerance,
706        min_words_vertical,
707        EdgeKind::Vertical,
708    ));
709
710    // Vertical edges from x1 alignment (right edges of words)
711    edges.extend(cluster_words_to_edges(
712        words,
713        |w| w.bbox.x1,
714        text_x_tolerance,
715        min_words_vertical,
716        EdgeKind::Vertical,
717    ));
718
719    // Horizontal edges from top alignment
720    edges.extend(cluster_words_to_edges(
721        words,
722        |w| w.bbox.top,
723        text_y_tolerance,
724        min_words_horizontal,
725        EdgeKind::Horizontal,
726    ));
727
728    // Horizontal edges from bottom alignment
729    edges.extend(cluster_words_to_edges(
730        words,
731        |w| w.bbox.bottom,
732        text_y_tolerance,
733        min_words_horizontal,
734        EdgeKind::Horizontal,
735    ));
736
737    edges
738}
739
740/// Internal enum to specify what kind of edge to produce from word clusters.
741enum EdgeKind {
742    Vertical,
743    Horizontal,
744}
745
746/// Cluster words by a coordinate accessor, then produce synthetic edges for qualifying clusters.
747fn cluster_words_to_edges<F>(
748    words: &[Word],
749    key: F,
750    tolerance: f64,
751    min_words: usize,
752    kind: EdgeKind,
753) -> Vec<Edge>
754where
755    F: Fn(&Word) -> f64,
756{
757    if words.is_empty() || min_words == 0 {
758        return Vec::new();
759    }
760
761    // Sort word indices by the key coordinate
762    let mut indices: Vec<usize> = (0..words.len()).collect();
763    indices.sort_by(|&a, &b| key(&words[a]).partial_cmp(&key(&words[b])).unwrap());
764
765    let mut edges = Vec::new();
766    let mut cluster_start = 0;
767
768    for i in 1..=indices.len() {
769        let end_of_cluster = i == indices.len()
770            || (key(&words[indices[i]]) - key(&words[indices[cluster_start]])).abs() > tolerance;
771
772        if end_of_cluster {
773            let cluster_size = i - cluster_start;
774            if cluster_size >= min_words {
775                // Compute the mean position for the cluster
776                let sum: f64 = (cluster_start..i).map(|j| key(&words[indices[j]])).sum();
777                let mean_pos = sum / cluster_size as f64;
778
779                // Compute the span in the perpendicular direction
780                let cluster_words: Vec<&Word> =
781                    (cluster_start..i).map(|j| &words[indices[j]]).collect();
782
783                match kind {
784                    EdgeKind::Vertical => {
785                        let min_top = cluster_words
786                            .iter()
787                            .map(|w| w.bbox.top)
788                            .fold(f64::INFINITY, f64::min);
789                        let max_bottom = cluster_words
790                            .iter()
791                            .map(|w| w.bbox.bottom)
792                            .fold(f64::NEG_INFINITY, f64::max);
793                        edges.push(Edge {
794                            x0: mean_pos,
795                            top: min_top,
796                            x1: mean_pos,
797                            bottom: max_bottom,
798                            orientation: Orientation::Vertical,
799                            source: EdgeSource::Stream,
800                        });
801                    }
802                    EdgeKind::Horizontal => {
803                        let min_x0 = cluster_words
804                            .iter()
805                            .map(|w| w.bbox.x0)
806                            .fold(f64::INFINITY, f64::min);
807                        let max_x1 = cluster_words
808                            .iter()
809                            .map(|w| w.bbox.x1)
810                            .fold(f64::NEG_INFINITY, f64::max);
811                        edges.push(Edge {
812                            x0: min_x0,
813                            top: mean_pos,
814                            x1: max_x1,
815                            bottom: mean_pos,
816                            orientation: Orientation::Horizontal,
817                            source: EdgeSource::Stream,
818                        });
819                    }
820                }
821            }
822            cluster_start = i;
823        }
824    }
825
826    edges
827}
828
829/// Convert user-provided explicit line coordinates into edges.
830///
831/// Horizontal lines (y-coordinates) become horizontal edges spanning the full
832/// x-range of the vertical lines. Vertical lines (x-coordinates) become
833/// vertical edges spanning the full y-range of the horizontal lines.
834///
835/// Returns an empty Vec if either list is empty (a grid requires both).
836pub fn explicit_lines_to_edges(explicit: &ExplicitLines) -> Vec<Edge> {
837    if explicit.horizontal_lines.is_empty() || explicit.vertical_lines.is_empty() {
838        return Vec::new();
839    }
840
841    let min_x = explicit
842        .vertical_lines
843        .iter()
844        .copied()
845        .fold(f64::INFINITY, f64::min);
846    let max_x = explicit
847        .vertical_lines
848        .iter()
849        .copied()
850        .fold(f64::NEG_INFINITY, f64::max);
851    let min_y = explicit
852        .horizontal_lines
853        .iter()
854        .copied()
855        .fold(f64::INFINITY, f64::min);
856    let max_y = explicit
857        .horizontal_lines
858        .iter()
859        .copied()
860        .fold(f64::NEG_INFINITY, f64::max);
861
862    let mut edges = Vec::new();
863
864    // Horizontal edges: each y-coordinate spans from min_x to max_x
865    for &y in &explicit.horizontal_lines {
866        edges.push(Edge {
867            x0: min_x,
868            top: y,
869            x1: max_x,
870            bottom: y,
871            orientation: Orientation::Horizontal,
872            source: EdgeSource::Explicit,
873        });
874    }
875
876    // Vertical edges: each x-coordinate spans from min_y to max_y
877    for &x in &explicit.vertical_lines {
878        edges.push(Edge {
879            x0: x,
880            top: min_y,
881            x1: x,
882            bottom: max_y,
883            orientation: Orientation::Vertical,
884            source: EdgeSource::Explicit,
885        });
886    }
887
888    edges
889}
890
891/// Orchestrator for the table detection pipeline.
892///
893/// Takes edges (and optionally words/chars) and settings, then runs
894/// the appropriate detection strategy to produce tables.
895pub struct TableFinder {
896    /// Edges available for table detection.
897    edges: Vec<Edge>,
898    /// Words for Stream strategy text alignment detection.
899    words: Vec<Word>,
900    /// Configuration settings.
901    settings: TableSettings,
902}
903
904impl TableFinder {
905    /// Create a new TableFinder with the given edges and settings.
906    pub fn new(edges: Vec<Edge>, settings: TableSettings) -> Self {
907        Self {
908            edges,
909            words: Vec::new(),
910            settings,
911        }
912    }
913
914    /// Create a new TableFinder with edges, words, and settings.
915    ///
916    /// The words are used by the Stream strategy to generate synthetic edges
917    /// from text alignment patterns.
918    pub fn new_with_words(edges: Vec<Edge>, words: Vec<Word>, settings: TableSettings) -> Self {
919        Self {
920            edges,
921            words,
922            settings,
923        }
924    }
925
926    /// Get a reference to the settings.
927    pub fn settings(&self) -> &TableSettings {
928        &self.settings
929    }
930
931    /// Get a reference to the edges.
932    pub fn edges(&self) -> &[Edge] {
933        &self.edges
934    }
935
936    /// Run the table detection pipeline and return detected tables.
937    ///
938    /// Pipeline: filter edges → snap → join → intersections → cells → tables.
939    ///
940    /// For **Lattice** strategy, all edges (lines + rect edges) are used.
941    /// For **LatticeStrict** strategy, only line-sourced edges are used (no rect edges).
942    /// For **Stream** strategy, synthetic edges are generated from word alignment patterns.
943    /// For **Explicit** strategy, edges from user-provided coordinates are used,
944    /// combined with any detected edges passed to the finder (mixing).
945    pub fn find_tables(&self) -> Vec<Table> {
946        // Step 1: Select edges based on strategy
947        let edges: Vec<Edge> = match self.settings.strategy {
948            Strategy::LatticeStrict => self
949                .edges
950                .iter()
951                .filter(|e| e.source == EdgeSource::Line)
952                .cloned()
953                .collect(),
954            Strategy::Stream => {
955                // Generate synthetic edges from word alignment patterns
956                words_to_edges_stream(
957                    &self.words,
958                    self.settings.text_x_tolerance,
959                    self.settings.text_y_tolerance,
960                    self.settings.min_words_vertical,
961                    self.settings.min_words_horizontal,
962                )
963            }
964            Strategy::Explicit => {
965                // Start with detected edges (for mixing)
966                let mut edges = self.edges.clone();
967
968                if let Some(ref explicit) = self.settings.explicit_lines {
969                    // Compute the overall bounding range from detected edges + explicit coords
970                    let mut min_x = f64::INFINITY;
971                    let mut max_x = f64::NEG_INFINITY;
972                    let mut min_y = f64::INFINITY;
973                    let mut max_y = f64::NEG_INFINITY;
974
975                    for e in &edges {
976                        min_x = min_x.min(e.x0);
977                        max_x = max_x.max(e.x1);
978                        min_y = min_y.min(e.top);
979                        max_y = max_y.max(e.bottom);
980                    }
981                    for &x in &explicit.vertical_lines {
982                        min_x = min_x.min(x);
983                        max_x = max_x.max(x);
984                    }
985                    for &y in &explicit.horizontal_lines {
986                        min_y = min_y.min(y);
987                        max_y = max_y.max(y);
988                    }
989
990                    if min_x <= max_x && min_y <= max_y {
991                        for &y in &explicit.horizontal_lines {
992                            edges.push(Edge {
993                                x0: min_x,
994                                top: y,
995                                x1: max_x,
996                                bottom: y,
997                                orientation: Orientation::Horizontal,
998                                source: EdgeSource::Explicit,
999                            });
1000                        }
1001                        for &x in &explicit.vertical_lines {
1002                            edges.push(Edge {
1003                                x0: x,
1004                                top: min_y,
1005                                x1: x,
1006                                bottom: max_y,
1007                                orientation: Orientation::Vertical,
1008                                source: EdgeSource::Explicit,
1009                            });
1010                        }
1011                    }
1012                }
1013
1014                edges
1015            }
1016            // Lattice (default): use all edges
1017            Strategy::Lattice => self.edges.clone(),
1018        };
1019
1020        // Step 2: Filter edges by minimum length
1021        let min_len = self.settings.edge_min_length;
1022        let edges: Vec<Edge> = edges
1023            .into_iter()
1024            .filter(|e| edge_length(e) >= min_len)
1025            .collect();
1026
1027        if edges.is_empty() {
1028            return Vec::new();
1029        }
1030
1031        // Step 3: Snap nearby parallel edges
1032        let edges = snap_edges(
1033            edges,
1034            self.settings.snap_x_tolerance,
1035            self.settings.snap_y_tolerance,
1036        );
1037
1038        // Step 4: Join collinear edge segments
1039        let edges = join_edge_group(
1040            edges,
1041            self.settings.join_x_tolerance,
1042            self.settings.join_y_tolerance,
1043        );
1044
1045        // Step 5: Find intersections
1046        let intersections = edges_to_intersections(
1047            &edges,
1048            self.settings.intersection_x_tolerance,
1049            self.settings.intersection_y_tolerance,
1050        );
1051
1052        // Step 6: Build cells from intersections
1053        let cells = intersections_to_cells(&intersections);
1054
1055        // Step 7: Group cells into tables
1056        cells_to_tables(cells)
1057    }
1058}
1059
1060#[cfg(test)]
1061mod tests {
1062    use super::*;
1063    use crate::geometry::Orientation;
1064
1065    // --- Strategy tests ---
1066
1067    #[test]
1068    fn test_strategy_default_is_lattice() {
1069        assert_eq!(Strategy::default(), Strategy::Lattice);
1070    }
1071
1072    #[test]
1073    fn test_strategy_variants_are_distinct() {
1074        let strategies = [
1075            Strategy::Lattice,
1076            Strategy::LatticeStrict,
1077            Strategy::Stream,
1078            Strategy::Explicit,
1079        ];
1080        for i in 0..strategies.len() {
1081            for j in (i + 1)..strategies.len() {
1082                assert_ne!(strategies[i], strategies[j]);
1083            }
1084        }
1085    }
1086
1087    #[test]
1088    fn test_strategy_copy() {
1089        let s = Strategy::Stream;
1090        let s2 = s;
1091        assert_eq!(s, s2);
1092    }
1093
1094    // --- TableSettings tests ---
1095
1096    #[test]
1097    fn test_table_settings_default_values() {
1098        let settings = TableSettings::default();
1099        assert_eq!(settings.strategy, Strategy::Lattice);
1100        assert_eq!(settings.snap_tolerance, 3.0);
1101        assert_eq!(settings.snap_x_tolerance, 3.0);
1102        assert_eq!(settings.snap_y_tolerance, 3.0);
1103        assert_eq!(settings.join_tolerance, 3.0);
1104        assert_eq!(settings.join_x_tolerance, 3.0);
1105        assert_eq!(settings.join_y_tolerance, 3.0);
1106        assert_eq!(settings.edge_min_length, 3.0);
1107        assert_eq!(settings.min_words_vertical, 3);
1108        assert_eq!(settings.min_words_horizontal, 1);
1109        assert_eq!(settings.text_tolerance, 3.0);
1110        assert_eq!(settings.text_x_tolerance, 3.0);
1111        assert_eq!(settings.text_y_tolerance, 3.0);
1112        assert_eq!(settings.intersection_tolerance, 3.0);
1113        assert_eq!(settings.intersection_x_tolerance, 3.0);
1114        assert_eq!(settings.intersection_y_tolerance, 3.0);
1115        assert!(settings.explicit_lines.is_none());
1116    }
1117
1118    #[test]
1119    fn test_table_settings_custom_construction() {
1120        let settings = TableSettings {
1121            strategy: Strategy::Stream,
1122            snap_tolerance: 5.0,
1123            min_words_vertical: 5,
1124            min_words_horizontal: 2,
1125            ..TableSettings::default()
1126        };
1127        assert_eq!(settings.strategy, Strategy::Stream);
1128        assert_eq!(settings.snap_tolerance, 5.0);
1129        assert_eq!(settings.min_words_vertical, 5);
1130        assert_eq!(settings.min_words_horizontal, 2);
1131        // Other fields should still be defaults
1132        assert_eq!(settings.join_tolerance, 3.0);
1133        assert_eq!(settings.edge_min_length, 3.0);
1134    }
1135
1136    #[test]
1137    fn test_table_settings_with_explicit_lines() {
1138        let settings = TableSettings {
1139            strategy: Strategy::Explicit,
1140            explicit_lines: Some(ExplicitLines {
1141                horizontal_lines: vec![10.0, 50.0, 100.0],
1142                vertical_lines: vec![20.0, 80.0, 140.0],
1143            }),
1144            ..TableSettings::default()
1145        };
1146        assert_eq!(settings.strategy, Strategy::Explicit);
1147        let lines = settings.explicit_lines.as_ref().unwrap();
1148        assert_eq!(lines.horizontal_lines.len(), 3);
1149        assert_eq!(lines.vertical_lines.len(), 3);
1150    }
1151
1152    #[test]
1153    fn test_table_settings_strategy_selection() {
1154        for strategy in [
1155            Strategy::Lattice,
1156            Strategy::LatticeStrict,
1157            Strategy::Stream,
1158            Strategy::Explicit,
1159        ] {
1160            let settings = TableSettings {
1161                strategy,
1162                ..TableSettings::default()
1163            };
1164            assert_eq!(settings.strategy, strategy);
1165        }
1166    }
1167
1168    // --- Cell tests ---
1169
1170    #[test]
1171    fn test_cell_with_text() {
1172        let cell = Cell {
1173            bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1174            text: Some("Hello".to_string()),
1175        };
1176        assert_eq!(cell.bbox.x0, 10.0);
1177        assert_eq!(cell.text.as_deref(), Some("Hello"));
1178    }
1179
1180    #[test]
1181    fn test_cell_without_text() {
1182        let cell = Cell {
1183            bbox: BBox::new(10.0, 20.0, 100.0, 40.0),
1184            text: None,
1185        };
1186        assert!(cell.text.is_none());
1187    }
1188
1189    // --- Table tests ---
1190
1191    #[test]
1192    fn test_table_construction() {
1193        let cells = vec![
1194            Cell {
1195                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1196                text: Some("A".to_string()),
1197            },
1198            Cell {
1199                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1200                text: Some("B".to_string()),
1201            },
1202        ];
1203        let table = Table {
1204            bbox: BBox::new(0.0, 0.0, 100.0, 30.0),
1205            cells: cells.clone(),
1206            rows: vec![cells.clone()],
1207            columns: vec![vec![cells[0].clone()], vec![cells[1].clone()]],
1208        };
1209        assert_eq!(table.bbox.x0, 0.0);
1210        assert_eq!(table.bbox.x1, 100.0);
1211        assert_eq!(table.cells.len(), 2);
1212        assert_eq!(table.rows.len(), 1);
1213        assert_eq!(table.rows[0].len(), 2);
1214        assert_eq!(table.columns.len(), 2);
1215    }
1216
1217    #[test]
1218    fn test_table_multi_row() {
1219        let row1 = vec![
1220            Cell {
1221                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1222                text: Some("A1".to_string()),
1223            },
1224            Cell {
1225                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1226                text: Some("B1".to_string()),
1227            },
1228        ];
1229        let row2 = vec![
1230            Cell {
1231                bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1232                text: Some("A2".to_string()),
1233            },
1234            Cell {
1235                bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1236                text: Some("B2".to_string()),
1237            },
1238        ];
1239        let all_cells: Vec<Cell> = row1.iter().chain(row2.iter()).cloned().collect();
1240        let table = Table {
1241            bbox: BBox::new(0.0, 0.0, 100.0, 60.0),
1242            cells: all_cells,
1243            rows: vec![row1, row2],
1244            columns: vec![
1245                vec![
1246                    Cell {
1247                        bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
1248                        text: Some("A1".to_string()),
1249                    },
1250                    Cell {
1251                        bbox: BBox::new(0.0, 30.0, 50.0, 60.0),
1252                        text: Some("A2".to_string()),
1253                    },
1254                ],
1255                vec![
1256                    Cell {
1257                        bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
1258                        text: Some("B1".to_string()),
1259                    },
1260                    Cell {
1261                        bbox: BBox::new(50.0, 30.0, 100.0, 60.0),
1262                        text: Some("B2".to_string()),
1263                    },
1264                ],
1265            ],
1266        };
1267        assert_eq!(table.rows.len(), 2);
1268        assert_eq!(table.columns.len(), 2);
1269        assert_eq!(table.cells.len(), 4);
1270    }
1271
1272    // --- TableFinder tests ---
1273
1274    #[test]
1275    fn test_table_finder_construction() {
1276        let edges = vec![Edge {
1277            x0: 0.0,
1278            top: 50.0,
1279            x1: 100.0,
1280            bottom: 50.0,
1281            orientation: Orientation::Horizontal,
1282            source: crate::edges::EdgeSource::Line,
1283        }];
1284        let settings = TableSettings::default();
1285        let finder = TableFinder::new(edges.clone(), settings.clone());
1286
1287        assert_eq!(finder.edges().len(), 1);
1288        assert_eq!(finder.settings().strategy, Strategy::Lattice);
1289    }
1290
1291    #[test]
1292    fn test_table_finder_empty_edges() {
1293        let finder = TableFinder::new(Vec::new(), TableSettings::default());
1294        assert!(finder.edges().is_empty());
1295        let tables = finder.find_tables();
1296        assert!(tables.is_empty());
1297    }
1298
1299    #[test]
1300    fn test_table_finder_custom_settings() {
1301        let settings = TableSettings {
1302            strategy: Strategy::LatticeStrict,
1303            snap_tolerance: 5.0,
1304            ..TableSettings::default()
1305        };
1306        let finder = TableFinder::new(Vec::new(), settings);
1307        assert_eq!(finder.settings().strategy, Strategy::LatticeStrict);
1308        assert_eq!(finder.settings().snap_tolerance, 5.0);
1309    }
1310
1311    // --- ExplicitLines tests ---
1312
1313    #[test]
1314    fn test_explicit_lines_construction() {
1315        let lines = ExplicitLines {
1316            horizontal_lines: vec![0.0, 30.0, 60.0],
1317            vertical_lines: vec![0.0, 50.0, 100.0],
1318        };
1319        assert_eq!(lines.horizontal_lines.len(), 3);
1320        assert_eq!(lines.vertical_lines.len(), 3);
1321        assert_eq!(lines.horizontal_lines[1], 30.0);
1322        assert_eq!(lines.vertical_lines[2], 100.0);
1323    }
1324
1325    #[test]
1326    fn test_explicit_lines_empty() {
1327        let lines = ExplicitLines {
1328            horizontal_lines: Vec::new(),
1329            vertical_lines: Vec::new(),
1330        };
1331        assert!(lines.horizontal_lines.is_empty());
1332        assert!(lines.vertical_lines.is_empty());
1333    }
1334
1335    // --- snap_edges tests ---
1336
1337    fn make_h_edge(x0: f64, y: f64, x1: f64) -> Edge {
1338        Edge {
1339            x0,
1340            top: y,
1341            x1,
1342            bottom: y,
1343            orientation: Orientation::Horizontal,
1344            source: crate::edges::EdgeSource::Line,
1345        }
1346    }
1347
1348    fn make_v_edge(x: f64, top: f64, bottom: f64) -> Edge {
1349        Edge {
1350            x0: x,
1351            top,
1352            x1: x,
1353            bottom,
1354            orientation: Orientation::Vertical,
1355            source: crate::edges::EdgeSource::Line,
1356        }
1357    }
1358
1359    fn assert_approx(a: f64, b: f64) {
1360        assert!(
1361            (a - b).abs() < 1e-6,
1362            "expected {b}, got {a}, diff={}",
1363            (a - b).abs()
1364        );
1365    }
1366
1367    #[test]
1368    fn test_snap_edges_empty() {
1369        let result = snap_edges(Vec::new(), 3.0, 3.0);
1370        assert!(result.is_empty());
1371    }
1372
1373    #[test]
1374    fn test_snap_nearby_horizontal_lines() {
1375        // Two horizontal edges at y=50.0 and y=51.5 (within tolerance 3.0)
1376        // Should snap to mean = 50.75
1377        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 51.5, 100.0)];
1378        let result = snap_edges(edges, 3.0, 3.0);
1379
1380        let horizontals: Vec<&Edge> = result
1381            .iter()
1382            .filter(|e| e.orientation == Orientation::Horizontal)
1383            .collect();
1384        assert_eq!(horizontals.len(), 2);
1385        assert_approx(horizontals[0].top, 50.75);
1386        assert_approx(horizontals[0].bottom, 50.75);
1387        assert_approx(horizontals[1].top, 50.75);
1388        assert_approx(horizontals[1].bottom, 50.75);
1389    }
1390
1391    #[test]
1392    fn test_snap_nearby_vertical_lines() {
1393        // Two vertical edges at x=100.0 and x=101.0 (within tolerance 3.0)
1394        // Should snap to mean = 100.5
1395        let edges = vec![
1396            make_v_edge(100.0, 0.0, 200.0),
1397            make_v_edge(101.0, 0.0, 200.0),
1398        ];
1399        let result = snap_edges(edges, 3.0, 3.0);
1400
1401        let verticals: Vec<&Edge> = result
1402            .iter()
1403            .filter(|e| e.orientation == Orientation::Vertical)
1404            .collect();
1405        assert_eq!(verticals.len(), 2);
1406        assert_approx(verticals[0].x0, 100.5);
1407        assert_approx(verticals[0].x1, 100.5);
1408        assert_approx(verticals[1].x0, 100.5);
1409        assert_approx(verticals[1].x1, 100.5);
1410    }
1411
1412    #[test]
1413    fn test_snap_edges_far_apart_remain_unchanged() {
1414        // Two horizontal edges at y=50.0 and y=100.0 (far apart, beyond tolerance 3.0)
1415        let edges = vec![
1416            make_h_edge(0.0, 50.0, 100.0),
1417            make_h_edge(0.0, 100.0, 100.0),
1418        ];
1419        let result = snap_edges(edges, 3.0, 3.0);
1420
1421        let horizontals: Vec<&Edge> = result
1422            .iter()
1423            .filter(|e| e.orientation == Orientation::Horizontal)
1424            .collect();
1425        assert_eq!(horizontals.len(), 2);
1426        // They should remain at their original positions
1427        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1428        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1429        assert_approx(ys[0], 50.0);
1430        assert_approx(ys[1], 100.0);
1431    }
1432
1433    #[test]
1434    fn test_snap_edges_separate_x_y_tolerance() {
1435        // Horizontal edges within 2.0 of each other, snap_y_tolerance=1.0 (NOT within)
1436        // Should NOT snap
1437        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 52.0, 100.0)];
1438        let result = snap_edges(edges, 3.0, 1.0);
1439
1440        let horizontals: Vec<&Edge> = result
1441            .iter()
1442            .filter(|e| e.orientation == Orientation::Horizontal)
1443            .collect();
1444        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1445        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1446        assert_approx(ys[0], 50.0);
1447        assert_approx(ys[1], 52.0);
1448    }
1449
1450    #[test]
1451    fn test_snap_edges_separate_x_tolerance() {
1452        // Vertical edges within 2.0 of each other, snap_x_tolerance=1.0 (NOT within)
1453        // Should NOT snap
1454        let edges = vec![
1455            make_v_edge(100.0, 0.0, 200.0),
1456            make_v_edge(102.0, 0.0, 200.0),
1457        ];
1458        let result = snap_edges(edges, 1.0, 3.0);
1459
1460        let verticals: Vec<&Edge> = result
1461            .iter()
1462            .filter(|e| e.orientation == Orientation::Vertical)
1463            .collect();
1464        let mut xs: Vec<f64> = verticals.iter().map(|e| e.x0).collect();
1465        xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
1466        assert_approx(xs[0], 100.0);
1467        assert_approx(xs[1], 102.0);
1468    }
1469
1470    #[test]
1471    fn test_snap_edges_does_not_merge() {
1472        // Three horizontal edges within tolerance should snap but remain 3 separate edges
1473        let edges = vec![
1474            make_h_edge(0.0, 50.0, 100.0),
1475            make_h_edge(10.0, 51.0, 90.0),
1476            make_h_edge(20.0, 50.5, 80.0),
1477        ];
1478        let result = snap_edges(edges, 3.0, 3.0);
1479
1480        let horizontals: Vec<&Edge> = result
1481            .iter()
1482            .filter(|e| e.orientation == Orientation::Horizontal)
1483            .collect();
1484        // Still 3 edges - snap does not merge
1485        assert_eq!(horizontals.len(), 3);
1486        // All snapped to mean of 50.0, 51.0, 50.5 = 50.5
1487        for h in &horizontals {
1488            assert_approx(h.top, 50.5);
1489            assert_approx(h.bottom, 50.5);
1490        }
1491    }
1492
1493    #[test]
1494    fn test_snap_edges_preserves_along_axis_coords() {
1495        // Snapping horizontal edges should only change y, not x
1496        let edges = vec![
1497            make_h_edge(10.0, 50.0, 200.0),
1498            make_h_edge(30.0, 51.0, 180.0),
1499        ];
1500        let result = snap_edges(edges, 3.0, 3.0);
1501
1502        let horizontals: Vec<&Edge> = result
1503            .iter()
1504            .filter(|e| e.orientation == Orientation::Horizontal)
1505            .collect();
1506        // x-coordinates should be unchanged
1507        let mut found_10 = false;
1508        let mut found_30 = false;
1509        for h in &horizontals {
1510            if (h.x0 - 10.0).abs() < 1e-6 {
1511                assert_approx(h.x1, 200.0);
1512                found_10 = true;
1513            }
1514            if (h.x0 - 30.0).abs() < 1e-6 {
1515                assert_approx(h.x1, 180.0);
1516                found_30 = true;
1517            }
1518        }
1519        assert!(found_10 && found_30, "x-coordinates should be preserved");
1520    }
1521
1522    #[test]
1523    fn test_snap_edges_mixed_orientations() {
1524        // Mix of horizontal and vertical edges, each group snaps independently
1525        let edges = vec![
1526            make_h_edge(0.0, 50.0, 100.0),
1527            make_h_edge(0.0, 51.0, 100.0),
1528            make_v_edge(200.0, 0.0, 100.0),
1529            make_v_edge(201.0, 0.0, 100.0),
1530        ];
1531        let result = snap_edges(edges, 3.0, 3.0);
1532        assert_eq!(result.len(), 4);
1533
1534        let horizontals: Vec<&Edge> = result
1535            .iter()
1536            .filter(|e| e.orientation == Orientation::Horizontal)
1537            .collect();
1538        let verticals: Vec<&Edge> = result
1539            .iter()
1540            .filter(|e| e.orientation == Orientation::Vertical)
1541            .collect();
1542
1543        // Horizontal snapped to mean(50, 51) = 50.5
1544        for h in &horizontals {
1545            assert_approx(h.top, 50.5);
1546        }
1547        // Vertical snapped to mean(200, 201) = 200.5
1548        for v in &verticals {
1549            assert_approx(v.x0, 200.5);
1550        }
1551    }
1552
1553    #[test]
1554    fn test_snap_edges_multiple_clusters() {
1555        // Three groups of horizontal edges, well separated
1556        let edges = vec![
1557            make_h_edge(0.0, 10.0, 100.0),
1558            make_h_edge(0.0, 11.0, 100.0),
1559            // gap
1560            make_h_edge(0.0, 50.0, 100.0),
1561            make_h_edge(0.0, 51.0, 100.0),
1562            // gap
1563            make_h_edge(0.0, 100.0, 100.0),
1564            make_h_edge(0.0, 101.0, 100.0),
1565        ];
1566        let result = snap_edges(edges, 3.0, 3.0);
1567
1568        let horizontals: Vec<&Edge> = result
1569            .iter()
1570            .filter(|e| e.orientation == Orientation::Horizontal)
1571            .collect();
1572        assert_eq!(horizontals.len(), 6);
1573
1574        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1575        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1576        // Cluster 1: mean(10, 11) = 10.5
1577        assert_approx(ys[0], 10.5);
1578        assert_approx(ys[1], 10.5);
1579        // Cluster 2: mean(50, 51) = 50.5
1580        assert_approx(ys[2], 50.5);
1581        assert_approx(ys[3], 50.5);
1582        // Cluster 3: mean(100, 101) = 100.5
1583        assert_approx(ys[4], 100.5);
1584        assert_approx(ys[5], 100.5);
1585    }
1586
1587    #[test]
1588    fn test_snap_edges_single_edge_unchanged() {
1589        let edges = vec![make_h_edge(0.0, 50.0, 100.0)];
1590        let result = snap_edges(edges, 3.0, 3.0);
1591        assert_eq!(result.len(), 1);
1592        assert_approx(result[0].top, 50.0);
1593        assert_approx(result[0].bottom, 50.0);
1594    }
1595
1596    #[test]
1597    fn test_snap_edges_diagonal_passed_through() {
1598        let edges = vec![
1599            Edge {
1600                x0: 0.0,
1601                top: 0.0,
1602                x1: 100.0,
1603                bottom: 100.0,
1604                orientation: Orientation::Diagonal,
1605                source: crate::edges::EdgeSource::Curve,
1606            },
1607            make_h_edge(0.0, 50.0, 100.0),
1608        ];
1609        let result = snap_edges(edges, 3.0, 3.0);
1610        assert_eq!(result.len(), 2);
1611
1612        let diagonals: Vec<&Edge> = result
1613            .iter()
1614            .filter(|e| e.orientation == Orientation::Diagonal)
1615            .collect();
1616        assert_eq!(diagonals.len(), 1);
1617        // Diagonal edge unchanged
1618        assert_approx(diagonals[0].x0, 0.0);
1619        assert_approx(diagonals[0].top, 0.0);
1620        assert_approx(diagonals[0].x1, 100.0);
1621        assert_approx(diagonals[0].bottom, 100.0);
1622    }
1623
1624    // --- join_edge_group tests ---
1625
1626    #[test]
1627    fn test_join_edge_group_empty() {
1628        let result = join_edge_group(Vec::new(), 3.0, 3.0);
1629        assert!(result.is_empty());
1630    }
1631
1632    #[test]
1633    fn test_join_edge_group_single_edge_unchanged() {
1634        let edges = vec![make_h_edge(10.0, 50.0, 80.0)];
1635        let result = join_edge_group(edges, 3.0, 3.0);
1636        assert_eq!(result.len(), 1);
1637        assert_approx(result[0].x0, 10.0);
1638        assert_approx(result[0].x1, 80.0);
1639    }
1640
1641    #[test]
1642    fn test_join_two_overlapping_horizontal_edges() {
1643        // Two horizontal edges at y=50 that overlap: [10..60] and [40..90]
1644        // Should merge into [10..90]
1645        let edges = vec![make_h_edge(10.0, 50.0, 60.0), make_h_edge(40.0, 50.0, 90.0)];
1646        let result = join_edge_group(edges, 3.0, 3.0);
1647        assert_eq!(result.len(), 1);
1648        assert_approx(result[0].x0, 10.0);
1649        assert_approx(result[0].x1, 90.0);
1650        assert_approx(result[0].top, 50.0);
1651    }
1652
1653    #[test]
1654    fn test_join_two_adjacent_horizontal_edges_within_tolerance() {
1655        // Two horizontal edges at y=50: [10..50] and [52..90]
1656        // Gap is 2.0, within join_x_tolerance=3.0 → merge to [10..90]
1657        let edges = vec![make_h_edge(10.0, 50.0, 50.0), make_h_edge(52.0, 50.0, 90.0)];
1658        let result = join_edge_group(edges, 3.0, 3.0);
1659        assert_eq!(result.len(), 1);
1660        assert_approx(result[0].x0, 10.0);
1661        assert_approx(result[0].x1, 90.0);
1662    }
1663
1664    #[test]
1665    fn test_join_distant_horizontal_edges_not_merged() {
1666        // Two horizontal edges at y=50: [10..40] and [60..90]
1667        // Gap is 20.0, beyond tolerance → remain separate
1668        let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(60.0, 50.0, 90.0)];
1669        let result = join_edge_group(edges, 3.0, 3.0);
1670        assert_eq!(result.len(), 2);
1671    }
1672
1673    #[test]
1674    fn test_join_chain_of_three_horizontal_segments() {
1675        // Three segments on y=50: [10..40], [38..70], [68..100]
1676        // All overlap pairwise → chain merge to [10..100]
1677        let edges = vec![
1678            make_h_edge(10.0, 50.0, 40.0),
1679            make_h_edge(38.0, 50.0, 70.0),
1680            make_h_edge(68.0, 50.0, 100.0),
1681        ];
1682        let result = join_edge_group(edges, 3.0, 3.0);
1683        assert_eq!(result.len(), 1);
1684        assert_approx(result[0].x0, 10.0);
1685        assert_approx(result[0].x1, 100.0);
1686    }
1687
1688    #[test]
1689    fn test_join_two_overlapping_vertical_edges() {
1690        // Two vertical edges at x=50: [10..60] and [40..90]
1691        // Should merge into [10..90]
1692        let edges = vec![make_v_edge(50.0, 10.0, 60.0), make_v_edge(50.0, 40.0, 90.0)];
1693        let result = join_edge_group(edges, 3.0, 3.0);
1694        assert_eq!(result.len(), 1);
1695        assert_approx(result[0].top, 10.0);
1696        assert_approx(result[0].bottom, 90.0);
1697        assert_approx(result[0].x0, 50.0);
1698    }
1699
1700    #[test]
1701    fn test_join_adjacent_vertical_edges_within_tolerance() {
1702        // Two vertical edges at x=50: [10..50] and [52..90]
1703        // Gap is 2.0, within join_y_tolerance=3.0 → merge
1704        let edges = vec![make_v_edge(50.0, 10.0, 50.0), make_v_edge(50.0, 52.0, 90.0)];
1705        let result = join_edge_group(edges, 3.0, 3.0);
1706        assert_eq!(result.len(), 1);
1707        assert_approx(result[0].top, 10.0);
1708        assert_approx(result[0].bottom, 90.0);
1709    }
1710
1711    #[test]
1712    fn test_join_groups_by_collinear_position() {
1713        // Two groups of horizontal edges at different y positions
1714        // Group 1: y=50, [10..50] and [48..90] → merge to [10..90]
1715        // Group 2: y=100, [10..40] and [60..90] → gap too big, stay separate
1716        let edges = vec![
1717            make_h_edge(10.0, 50.0, 50.0),
1718            make_h_edge(48.0, 50.0, 90.0),
1719            make_h_edge(10.0, 100.0, 40.0),
1720            make_h_edge(60.0, 100.0, 90.0),
1721        ];
1722        let result = join_edge_group(edges, 3.0, 3.0);
1723        assert_eq!(result.len(), 3);
1724
1725        let at_50: Vec<&Edge> = result
1726            .iter()
1727            .filter(|e| (e.top - 50.0).abs() < 1e-6)
1728            .collect();
1729        assert_eq!(at_50.len(), 1);
1730        assert_approx(at_50[0].x0, 10.0);
1731        assert_approx(at_50[0].x1, 90.0);
1732
1733        let at_100: Vec<&Edge> = result
1734            .iter()
1735            .filter(|e| (e.top - 100.0).abs() < 1e-6)
1736            .collect();
1737        assert_eq!(at_100.len(), 2);
1738    }
1739
1740    #[test]
1741    fn test_join_mixed_orientations() {
1742        // Mix of horizontal and vertical edges: each group joins independently
1743        let edges = vec![
1744            make_h_edge(10.0, 50.0, 50.0),
1745            make_h_edge(48.0, 50.0, 90.0),
1746            make_v_edge(200.0, 10.0, 50.0),
1747            make_v_edge(200.0, 48.0, 90.0),
1748        ];
1749        let result = join_edge_group(edges, 3.0, 3.0);
1750        assert_eq!(result.len(), 2);
1751
1752        let horizontals: Vec<&Edge> = result
1753            .iter()
1754            .filter(|e| e.orientation == Orientation::Horizontal)
1755            .collect();
1756        assert_eq!(horizontals.len(), 1);
1757        assert_approx(horizontals[0].x0, 10.0);
1758        assert_approx(horizontals[0].x1, 90.0);
1759
1760        let verticals: Vec<&Edge> = result
1761            .iter()
1762            .filter(|e| e.orientation == Orientation::Vertical)
1763            .collect();
1764        assert_eq!(verticals.len(), 1);
1765        assert_approx(verticals[0].top, 10.0);
1766        assert_approx(verticals[0].bottom, 90.0);
1767    }
1768
1769    #[test]
1770    fn test_join_separate_x_y_tolerance() {
1771        // Horizontal edges: gap=4.0, join_x_tolerance=3.0 → NOT merged
1772        let edges = vec![make_h_edge(10.0, 50.0, 40.0), make_h_edge(44.0, 50.0, 80.0)];
1773        let result = join_edge_group(edges, 3.0, 3.0);
1774        assert_eq!(result.len(), 2);
1775
1776        // Vertical edges: gap=4.0, join_y_tolerance=5.0 → merged
1777        let edges = vec![make_v_edge(50.0, 10.0, 40.0), make_v_edge(50.0, 44.0, 80.0)];
1778        let result = join_edge_group(edges, 3.0, 5.0);
1779        assert_eq!(result.len(), 1);
1780    }
1781
1782    #[test]
1783    fn test_join_diagonal_edges_pass_through() {
1784        let diag = Edge {
1785            x0: 0.0,
1786            top: 0.0,
1787            x1: 100.0,
1788            bottom: 100.0,
1789            orientation: Orientation::Diagonal,
1790            source: crate::edges::EdgeSource::Curve,
1791        };
1792        let edges = vec![diag.clone(), make_h_edge(10.0, 50.0, 90.0)];
1793        let result = join_edge_group(edges, 3.0, 3.0);
1794        assert_eq!(result.len(), 2);
1795
1796        let diagonals: Vec<&Edge> = result
1797            .iter()
1798            .filter(|e| e.orientation == Orientation::Diagonal)
1799            .collect();
1800        assert_eq!(diagonals.len(), 1);
1801        assert_approx(diagonals[0].x0, 0.0);
1802        assert_approx(diagonals[0].bottom, 100.0);
1803    }
1804
1805    #[test]
1806    fn test_snap_edges_zero_tolerance() {
1807        // With zero tolerance, only exact matches snap
1808        let edges = vec![
1809            make_h_edge(0.0, 50.0, 100.0),
1810            make_h_edge(0.0, 50.0, 100.0), // exact same y
1811            make_h_edge(0.0, 50.1, 100.0), // different y
1812        ];
1813        let result = snap_edges(edges, 0.0, 0.0);
1814
1815        let horizontals: Vec<&Edge> = result
1816            .iter()
1817            .filter(|e| e.orientation == Orientation::Horizontal)
1818            .collect();
1819        assert_eq!(horizontals.len(), 3);
1820        let mut ys: Vec<f64> = horizontals.iter().map(|e| e.top).collect();
1821        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
1822        assert_approx(ys[0], 50.0);
1823        assert_approx(ys[1], 50.0);
1824        assert_approx(ys[2], 50.1);
1825    }
1826
1827    // --- edges_to_intersections tests ---
1828
1829    fn has_intersection(intersections: &[Intersection], x: f64, y: f64) -> bool {
1830        intersections
1831            .iter()
1832            .any(|i| (i.x - x).abs() < 1e-6 && (i.y - y).abs() < 1e-6)
1833    }
1834
1835    #[test]
1836    fn test_intersections_empty_edges() {
1837        let result = edges_to_intersections(&[], 3.0, 3.0);
1838        assert!(result.is_empty());
1839    }
1840
1841    #[test]
1842    fn test_intersections_simple_cross() {
1843        // Horizontal edge at y=50 from x=0 to x=100
1844        // Vertical edge at x=50 from y=0 to y=100
1845        // Should intersect at (50, 50)
1846        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 100.0)];
1847        let result = edges_to_intersections(&edges, 3.0, 3.0);
1848        assert_eq!(result.len(), 1);
1849        assert!(has_intersection(&result, 50.0, 50.0));
1850    }
1851
1852    #[test]
1853    fn test_intersections_t_intersection() {
1854        // Horizontal edge at y=50 from x=0 to x=100
1855        // Vertical edge at x=50 from y=50 to y=100 (starts at the horizontal edge)
1856        // Should intersect at (50, 50)
1857        let edges = vec![
1858            make_h_edge(0.0, 50.0, 100.0),
1859            make_v_edge(50.0, 50.0, 100.0),
1860        ];
1861        let result = edges_to_intersections(&edges, 3.0, 3.0);
1862        assert_eq!(result.len(), 1);
1863        assert!(has_intersection(&result, 50.0, 50.0));
1864    }
1865
1866    #[test]
1867    fn test_intersections_l_intersection_corner() {
1868        // Horizontal edge at y=50 from x=50 to x=100
1869        // Vertical edge at x=50 from y=0 to y=50
1870        // Corner at (50, 50)
1871        let edges = vec![make_h_edge(50.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 50.0)];
1872        let result = edges_to_intersections(&edges, 3.0, 3.0);
1873        assert_eq!(result.len(), 1);
1874        assert!(has_intersection(&result, 50.0, 50.0));
1875    }
1876
1877    #[test]
1878    fn test_intersections_no_intersection_parallel() {
1879        // Two parallel horizontal edges — no intersections
1880        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_h_edge(0.0, 80.0, 100.0)];
1881        let result = edges_to_intersections(&edges, 3.0, 3.0);
1882        assert!(result.is_empty());
1883    }
1884
1885    #[test]
1886    fn test_intersections_no_intersection_non_overlapping() {
1887        // Horizontal edge at y=50 from x=0 to x=40
1888        // Vertical edge at x=60 from y=0 to y=100
1889        // They don't overlap in x-range (40 < 60 with tolerance 3 → 40+3=43 < 60)
1890        let edges = vec![make_h_edge(0.0, 50.0, 40.0), make_v_edge(60.0, 0.0, 100.0)];
1891        let result = edges_to_intersections(&edges, 3.0, 3.0);
1892        assert!(result.is_empty());
1893    }
1894
1895    #[test]
1896    fn test_intersections_tolerance_based() {
1897        // Horizontal edge at y=50 from x=0 to x=48
1898        // Vertical edge at x=50 from y=0 to y=100
1899        // Gap in x: 50 - 48 = 2, within tolerance 3 → should intersect
1900        let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
1901        let result = edges_to_intersections(&edges, 3.0, 3.0);
1902        assert_eq!(result.len(), 1);
1903        assert!(has_intersection(&result, 50.0, 50.0));
1904    }
1905
1906    #[test]
1907    fn test_intersections_tolerance_y_based() {
1908        // Horizontal edge at y=50 from x=0 to x=100
1909        // Vertical edge at x=50 from y=0 to y=48
1910        // Gap in y: 50 - 48 = 2, within tolerance 3 → should intersect
1911        let edges = vec![make_h_edge(0.0, 50.0, 100.0), make_v_edge(50.0, 0.0, 48.0)];
1912        let result = edges_to_intersections(&edges, 3.0, 3.0);
1913        assert_eq!(result.len(), 1);
1914        assert!(has_intersection(&result, 50.0, 50.0));
1915    }
1916
1917    #[test]
1918    fn test_intersections_beyond_tolerance_no_match() {
1919        // Horizontal edge at y=50 from x=0 to x=45
1920        // Vertical edge at x=50 from y=0 to y=100
1921        // Gap in x: 50 - 45 = 5, beyond tolerance 3 → no intersection
1922        let edges = vec![make_h_edge(0.0, 50.0, 45.0), make_v_edge(50.0, 0.0, 100.0)];
1923        let result = edges_to_intersections(&edges, 3.0, 3.0);
1924        assert!(result.is_empty());
1925    }
1926
1927    #[test]
1928    fn test_intersections_grid_2x2() {
1929        // 2x2 grid: 3 horizontal edges × 3 vertical edges = 9 intersections
1930        // H: y=0, y=50, y=100 (all from x=0 to x=100)
1931        // V: x=0, x=50, x=100 (all from y=0 to y=100)
1932        let edges = vec![
1933            make_h_edge(0.0, 0.0, 100.0),
1934            make_h_edge(0.0, 50.0, 100.0),
1935            make_h_edge(0.0, 100.0, 100.0),
1936            make_v_edge(0.0, 0.0, 100.0),
1937            make_v_edge(50.0, 0.0, 100.0),
1938            make_v_edge(100.0, 0.0, 100.0),
1939        ];
1940        let result = edges_to_intersections(&edges, 3.0, 3.0);
1941        assert_eq!(result.len(), 9);
1942        // Check corners
1943        assert!(has_intersection(&result, 0.0, 0.0));
1944        assert!(has_intersection(&result, 100.0, 0.0));
1945        assert!(has_intersection(&result, 0.0, 100.0));
1946        assert!(has_intersection(&result, 100.0, 100.0));
1947        // Check center
1948        assert!(has_intersection(&result, 50.0, 50.0));
1949    }
1950
1951    #[test]
1952    fn test_intersections_ignores_diagonal_edges() {
1953        // Diagonal edge should be ignored
1954        let edges = vec![
1955            Edge {
1956                x0: 0.0,
1957                top: 0.0,
1958                x1: 100.0,
1959                bottom: 100.0,
1960                orientation: Orientation::Diagonal,
1961                source: crate::edges::EdgeSource::Curve,
1962            },
1963            make_h_edge(0.0, 50.0, 100.0),
1964        ];
1965        let result = edges_to_intersections(&edges, 3.0, 3.0);
1966        assert!(result.is_empty());
1967    }
1968
1969    #[test]
1970    fn test_intersections_multiple_h_one_v() {
1971        // Three horizontal edges at y=10, y=50, y=90 (x=0..100)
1972        // One vertical edge at x=50 (y=0..100)
1973        // Should yield 3 intersections at (50,10), (50,50), (50,90)
1974        let edges = vec![
1975            make_h_edge(0.0, 10.0, 100.0),
1976            make_h_edge(0.0, 50.0, 100.0),
1977            make_h_edge(0.0, 90.0, 100.0),
1978            make_v_edge(50.0, 0.0, 100.0),
1979        ];
1980        let result = edges_to_intersections(&edges, 3.0, 3.0);
1981        assert_eq!(result.len(), 3);
1982        assert!(has_intersection(&result, 50.0, 10.0));
1983        assert!(has_intersection(&result, 50.0, 50.0));
1984        assert!(has_intersection(&result, 50.0, 90.0));
1985    }
1986
1987    #[test]
1988    fn test_intersections_separate_x_y_tolerance() {
1989        // Horizontal edge at y=50, x=0..48
1990        // Vertical edge at x=50, y=0..100
1991        // Gap in x is 2.0. With x_tolerance=1.0, should NOT intersect
1992        let edges = vec![make_h_edge(0.0, 50.0, 48.0), make_v_edge(50.0, 0.0, 100.0)];
1993        let result = edges_to_intersections(&edges, 1.0, 3.0);
1994        assert!(result.is_empty());
1995
1996        // Same setup but with x_tolerance=3.0, should intersect
1997        let result = edges_to_intersections(&edges, 3.0, 3.0);
1998        assert_eq!(result.len(), 1);
1999    }
2000
2001    #[test]
2002    fn test_intersections_no_duplicate_points() {
2003        // Two horizontal edges at the same y, one vertical edge
2004        // Both horizontals cross the vertical at the same point
2005        // Should produce only one intersection point (deduplicated)
2006        let edges = vec![
2007            make_h_edge(0.0, 50.0, 100.0),
2008            make_h_edge(20.0, 50.0, 80.0),
2009            make_v_edge(50.0, 0.0, 100.0),
2010        ];
2011        let result = edges_to_intersections(&edges, 3.0, 3.0);
2012        // Both horizontals at y=50 cross vertical at x=50 → same point (50, 50)
2013        // Should be deduplicated to 1 intersection
2014        assert_eq!(result.len(), 1);
2015        assert!(has_intersection(&result, 50.0, 50.0));
2016    }
2017
2018    // --- intersections_to_cells tests ---
2019
2020    fn make_intersection(x: f64, y: f64) -> Intersection {
2021        Intersection { x, y }
2022    }
2023
2024    #[test]
2025    fn test_intersections_to_cells_empty() {
2026        let result = intersections_to_cells(&[]);
2027        assert!(result.is_empty());
2028    }
2029
2030    #[test]
2031    fn test_intersections_to_cells_simple_2x2_grid() {
2032        // 2x2 grid of intersections → 1 cell
2033        // (0,0) (100,0)
2034        // (0,50) (100,50)
2035        let intersections = vec![
2036            make_intersection(0.0, 0.0),
2037            make_intersection(100.0, 0.0),
2038            make_intersection(0.0, 50.0),
2039            make_intersection(100.0, 50.0),
2040        ];
2041        let cells = intersections_to_cells(&intersections);
2042        assert_eq!(cells.len(), 1);
2043        assert_approx(cells[0].bbox.x0, 0.0);
2044        assert_approx(cells[0].bbox.top, 0.0);
2045        assert_approx(cells[0].bbox.x1, 100.0);
2046        assert_approx(cells[0].bbox.bottom, 50.0);
2047        assert!(cells[0].text.is_none());
2048    }
2049
2050    #[test]
2051    fn test_intersections_to_cells_3x3_grid() {
2052        // 3x3 grid of intersections → 4 cells
2053        //  (0,0)  (50,0)  (100,0)
2054        //  (0,30) (50,30) (100,30)
2055        //  (0,60) (50,60) (100,60)
2056        let intersections = vec![
2057            make_intersection(0.0, 0.0),
2058            make_intersection(50.0, 0.0),
2059            make_intersection(100.0, 0.0),
2060            make_intersection(0.0, 30.0),
2061            make_intersection(50.0, 30.0),
2062            make_intersection(100.0, 30.0),
2063            make_intersection(0.0, 60.0),
2064            make_intersection(50.0, 60.0),
2065            make_intersection(100.0, 60.0),
2066        ];
2067        let cells = intersections_to_cells(&intersections);
2068        assert_eq!(cells.len(), 4);
2069
2070        // Top-left cell
2071        assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2072            && (c.bbox.top - 0.0).abs() < 1e-6
2073            && (c.bbox.x1 - 50.0).abs() < 1e-6
2074            && (c.bbox.bottom - 30.0).abs() < 1e-6));
2075        // Top-right cell
2076        assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2077            && (c.bbox.top - 0.0).abs() < 1e-6
2078            && (c.bbox.x1 - 100.0).abs() < 1e-6
2079            && (c.bbox.bottom - 30.0).abs() < 1e-6));
2080        // Bottom-left cell
2081        assert!(cells.iter().any(|c| (c.bbox.x0 - 0.0).abs() < 1e-6
2082            && (c.bbox.top - 30.0).abs() < 1e-6
2083            && (c.bbox.x1 - 50.0).abs() < 1e-6
2084            && (c.bbox.bottom - 60.0).abs() < 1e-6));
2085        // Bottom-right cell
2086        assert!(cells.iter().any(|c| (c.bbox.x0 - 50.0).abs() < 1e-6
2087            && (c.bbox.top - 30.0).abs() < 1e-6
2088            && (c.bbox.x1 - 100.0).abs() < 1e-6
2089            && (c.bbox.bottom - 60.0).abs() < 1e-6));
2090    }
2091
2092    #[test]
2093    fn test_intersections_to_cells_missing_corner() {
2094        // 2x2 grid but missing the bottom-right corner → 0 cells
2095        // (0,0) (100,0)
2096        // (0,50) ---missing---
2097        let intersections = vec![
2098            make_intersection(0.0, 0.0),
2099            make_intersection(100.0, 0.0),
2100            make_intersection(0.0, 50.0),
2101        ];
2102        let cells = intersections_to_cells(&intersections);
2103        assert!(cells.is_empty());
2104    }
2105
2106    #[test]
2107    fn test_intersections_to_cells_irregular_grid() {
2108        // 3x3 grid but missing center intersection → only corners that form complete rectangles
2109        // (0,0)  (50,0)  (100,0)
2110        // (0,30) ---X--- (100,30)
2111        // (0,60) (50,60) (100,60)
2112        // Without (50,30): top-left and bottom-left cells lose a corner.
2113        // Only (0,0)-(100,0)-(0,30)-(100,30) is complete → 1 big cell top row
2114        // And (0,30)-(100,30)-(0,60)-(100,60) is complete → 1 big cell bottom row
2115        // Plus (0,60)-(50,60) and (50,60)-(100,60) don't have top corners at 50,30
2116        // So we get: cells that have all 4 corners present
2117        let intersections = vec![
2118            make_intersection(0.0, 0.0),
2119            make_intersection(50.0, 0.0),
2120            make_intersection(100.0, 0.0),
2121            make_intersection(0.0, 30.0),
2122            // (50, 30) missing
2123            make_intersection(100.0, 30.0),
2124            make_intersection(0.0, 60.0),
2125            make_intersection(50.0, 60.0),
2126            make_intersection(100.0, 60.0),
2127        ];
2128        let cells = intersections_to_cells(&intersections);
2129        // Top row: (0,0)-(50,0)-(0,30)-(50,30)? No, (50,30) missing → skip
2130        //          (50,0)-(100,0)-(50,30)-(100,30)? No, (50,30) missing → skip
2131        //          (0,0)-(100,0)-(0,30)-(100,30)? The grid only checks adjacent columns.
2132        //            xs = [0, 50, 100], adjacent pairs are (0,50) and (50,100)
2133        //            So this cell would not be formed from the adjacent pair logic.
2134        // Bottom row: (0,30)-(50,30)? (50,30) missing → skip
2135        //             (50,30)-(100,30)? (50,30) missing → skip
2136        // Bottom row with y=30..60: (0,30)-(50,30) missing → skip; (50,30)-(100,30) missing → skip
2137        //   But (0,30)-(100,30)-(0,60)-(100,60) is NOT adjacent columns
2138        // Result: 0 cells (because the missing center breaks all adjacent cell formations)
2139        // Wait - let me reconsider:
2140        // xs = [0, 50, 100], ys = [0, 30, 60]
2141        // (0,50) x (0,30): corners (0,0),(50,0),(0,30),(50,30) → (50,30) missing → skip
2142        // (50,100) x (0,30): corners (50,0),(100,0),(50,30),(100,30) → (50,30) missing → skip
2143        // (0,50) x (30,60): corners (0,30),(50,30),(0,60),(50,60) → (50,30) missing → skip
2144        // (50,100) x (30,60): corners (50,30),(100,30),(50,60),(100,60) → (50,30) missing → skip
2145        // All cells need (50,30) which is missing → 0 cells
2146        assert_eq!(cells.len(), 0);
2147    }
2148
2149    #[test]
2150    fn test_intersections_to_cells_partial_grid_with_valid_cells() {
2151        // L-shaped grid where some cells are complete
2152        // (0,0) (50,0)
2153        // (0,30) (50,30) (100,30)
2154        //                (100,60)
2155        // Only the top-left cell (0,0)-(50,0)-(0,30)-(50,30) has all 4 corners
2156        let intersections = vec![
2157            make_intersection(0.0, 0.0),
2158            make_intersection(50.0, 0.0),
2159            make_intersection(0.0, 30.0),
2160            make_intersection(50.0, 30.0),
2161            make_intersection(100.0, 30.0),
2162            make_intersection(100.0, 60.0),
2163        ];
2164        let cells = intersections_to_cells(&intersections);
2165        assert_eq!(cells.len(), 1);
2166        assert_approx(cells[0].bbox.x0, 0.0);
2167        assert_approx(cells[0].bbox.top, 0.0);
2168        assert_approx(cells[0].bbox.x1, 50.0);
2169        assert_approx(cells[0].bbox.bottom, 30.0);
2170    }
2171
2172    #[test]
2173    fn test_intersections_to_cells_single_point() {
2174        // Single intersection point → no cells
2175        let intersections = vec![make_intersection(50.0, 50.0)];
2176        let cells = intersections_to_cells(&intersections);
2177        assert!(cells.is_empty());
2178    }
2179
2180    #[test]
2181    fn test_intersections_to_cells_collinear_points() {
2182        // Points along a single line (no area) → no cells
2183        let intersections = vec![
2184            make_intersection(0.0, 50.0),
2185            make_intersection(50.0, 50.0),
2186            make_intersection(100.0, 50.0),
2187        ];
2188        let cells = intersections_to_cells(&intersections);
2189        assert!(cells.is_empty());
2190    }
2191
2192    #[test]
2193    fn test_intersections_to_cells_4x3_grid() {
2194        // 4 columns × 3 rows → 3×2 = 6 cells
2195        let mut intersections = Vec::new();
2196        for &x in &[0.0, 40.0, 80.0, 120.0] {
2197            for &y in &[0.0, 30.0, 60.0] {
2198                intersections.push(make_intersection(x, y));
2199            }
2200        }
2201        let cells = intersections_to_cells(&intersections);
2202        assert_eq!(cells.len(), 6);
2203    }
2204
2205    #[test]
2206    fn test_intersections_to_cells_text_is_none() {
2207        // All cells should have text = None (text extraction is a separate step)
2208        let intersections = vec![
2209            make_intersection(0.0, 0.0),
2210            make_intersection(100.0, 0.0),
2211            make_intersection(0.0, 50.0),
2212            make_intersection(100.0, 50.0),
2213        ];
2214        let cells = intersections_to_cells(&intersections);
2215        for cell in &cells {
2216            assert!(cell.text.is_none());
2217        }
2218    }
2219
2220    // --- cells_to_tables tests ---
2221
2222    fn make_cell(x0: f64, top: f64, x1: f64, bottom: f64) -> Cell {
2223        Cell {
2224            bbox: BBox::new(x0, top, x1, bottom),
2225            text: None,
2226        }
2227    }
2228
2229    #[test]
2230    fn test_cells_to_tables_empty() {
2231        let tables = cells_to_tables(Vec::new());
2232        assert!(tables.is_empty());
2233    }
2234
2235    #[test]
2236    fn test_cells_to_tables_single_cell() {
2237        // A single cell forms a single table
2238        let cells = vec![make_cell(0.0, 0.0, 50.0, 30.0)];
2239        let tables = cells_to_tables(cells);
2240        assert_eq!(tables.len(), 1);
2241        assert_approx(tables[0].bbox.x0, 0.0);
2242        assert_approx(tables[0].bbox.top, 0.0);
2243        assert_approx(tables[0].bbox.x1, 50.0);
2244        assert_approx(tables[0].bbox.bottom, 30.0);
2245        assert_eq!(tables[0].cells.len(), 1);
2246        assert_eq!(tables[0].rows.len(), 1);
2247        assert_eq!(tables[0].rows[0].len(), 1);
2248        assert_eq!(tables[0].columns.len(), 1);
2249        assert_eq!(tables[0].columns[0].len(), 1);
2250    }
2251
2252    #[test]
2253    fn test_cells_to_tables_single_table_2x2() {
2254        // 2x2 grid: 4 cells sharing edges → 1 table
2255        let cells = vec![
2256            make_cell(0.0, 0.0, 50.0, 30.0),
2257            make_cell(50.0, 0.0, 100.0, 30.0),
2258            make_cell(0.0, 30.0, 50.0, 60.0),
2259            make_cell(50.0, 30.0, 100.0, 60.0),
2260        ];
2261        let tables = cells_to_tables(cells);
2262        assert_eq!(tables.len(), 1);
2263        assert_approx(tables[0].bbox.x0, 0.0);
2264        assert_approx(tables[0].bbox.top, 0.0);
2265        assert_approx(tables[0].bbox.x1, 100.0);
2266        assert_approx(tables[0].bbox.bottom, 60.0);
2267        assert_eq!(tables[0].cells.len(), 4);
2268        // 2 rows, each with 2 cells
2269        assert_eq!(tables[0].rows.len(), 2);
2270        assert_eq!(tables[0].rows[0].len(), 2);
2271        assert_eq!(tables[0].rows[1].len(), 2);
2272        // 2 columns, each with 2 cells
2273        assert_eq!(tables[0].columns.len(), 2);
2274        assert_eq!(tables[0].columns[0].len(), 2);
2275        assert_eq!(tables[0].columns[1].len(), 2);
2276    }
2277
2278    #[test]
2279    fn test_cells_to_tables_single_table_rows_ordered() {
2280        // Verify rows are ordered top-to-bottom, left-to-right
2281        let cells = vec![
2282            make_cell(50.0, 30.0, 100.0, 60.0), // bottom-right (given first to test ordering)
2283            make_cell(0.0, 0.0, 50.0, 30.0),    // top-left
2284            make_cell(50.0, 0.0, 100.0, 30.0),  // top-right
2285            make_cell(0.0, 30.0, 50.0, 60.0),   // bottom-left
2286        ];
2287        let tables = cells_to_tables(cells);
2288        assert_eq!(tables.len(), 1);
2289        // Row 0 (top): left then right
2290        assert_approx(tables[0].rows[0][0].bbox.x0, 0.0);
2291        assert_approx(tables[0].rows[0][1].bbox.x0, 50.0);
2292        // Row 1 (bottom): left then right
2293        assert_approx(tables[0].rows[1][0].bbox.x0, 0.0);
2294        assert_approx(tables[0].rows[1][1].bbox.x0, 50.0);
2295    }
2296
2297    #[test]
2298    fn test_cells_to_tables_single_table_columns_ordered() {
2299        // Verify columns are ordered left-to-right, top-to-bottom
2300        let cells = vec![
2301            make_cell(0.0, 0.0, 50.0, 30.0),
2302            make_cell(50.0, 0.0, 100.0, 30.0),
2303            make_cell(0.0, 30.0, 50.0, 60.0),
2304            make_cell(50.0, 30.0, 100.0, 60.0),
2305        ];
2306        let tables = cells_to_tables(cells);
2307        assert_eq!(tables.len(), 1);
2308        // Column 0 (left): top then bottom
2309        assert_approx(tables[0].columns[0][0].bbox.top, 0.0);
2310        assert_approx(tables[0].columns[0][1].bbox.top, 30.0);
2311        // Column 1 (right): top then bottom
2312        assert_approx(tables[0].columns[1][0].bbox.top, 0.0);
2313        assert_approx(tables[0].columns[1][1].bbox.top, 30.0);
2314    }
2315
2316    #[test]
2317    fn test_cells_to_tables_two_separate_tables() {
2318        // Two tables far apart on the same page
2319        // Table 1: top-left area
2320        // Table 2: bottom-right area (no shared edges)
2321        let cells = vec![
2322            // Table 1
2323            make_cell(0.0, 0.0, 50.0, 30.0),
2324            make_cell(50.0, 0.0, 100.0, 30.0),
2325            // Table 2
2326            make_cell(200.0, 200.0, 250.0, 230.0),
2327            make_cell(250.0, 200.0, 300.0, 230.0),
2328        ];
2329        let tables = cells_to_tables(cells);
2330        assert_eq!(tables.len(), 2);
2331
2332        // Sort tables by x0 to get deterministic order
2333        let mut tables = tables;
2334        tables.sort_by(|a, b| a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap());
2335
2336        // Table 1
2337        assert_approx(tables[0].bbox.x0, 0.0);
2338        assert_approx(tables[0].bbox.x1, 100.0);
2339        assert_eq!(tables[0].cells.len(), 2);
2340        assert_eq!(tables[0].rows.len(), 1);
2341        assert_eq!(tables[0].columns.len(), 2);
2342
2343        // Table 2
2344        assert_approx(tables[1].bbox.x0, 200.0);
2345        assert_approx(tables[1].bbox.x1, 300.0);
2346        assert_eq!(tables[1].cells.len(), 2);
2347        assert_eq!(tables[1].rows.len(), 1);
2348        assert_eq!(tables[1].columns.len(), 2);
2349    }
2350
2351    #[test]
2352    fn test_cells_to_tables_3x3_grid() {
2353        // 3 cols × 3 rows = 9 cells, all connected → 1 table
2354        let mut cells = Vec::new();
2355        for row in 0..3 {
2356            for col in 0..3 {
2357                let x0 = col as f64 * 40.0;
2358                let top = row as f64 * 30.0;
2359                cells.push(make_cell(x0, top, x0 + 40.0, top + 30.0));
2360            }
2361        }
2362        let tables = cells_to_tables(cells);
2363        assert_eq!(tables.len(), 1);
2364        assert_eq!(tables[0].cells.len(), 9);
2365        assert_eq!(tables[0].rows.len(), 3);
2366        for row in &tables[0].rows {
2367            assert_eq!(row.len(), 3);
2368        }
2369        assert_eq!(tables[0].columns.len(), 3);
2370        for col in &tables[0].columns {
2371            assert_eq!(col.len(), 3);
2372        }
2373        assert_approx(tables[0].bbox.x0, 0.0);
2374        assert_approx(tables[0].bbox.top, 0.0);
2375        assert_approx(tables[0].bbox.x1, 120.0);
2376        assert_approx(tables[0].bbox.bottom, 90.0);
2377    }
2378
2379    #[test]
2380    fn test_cells_to_tables_single_row() {
2381        // 3 cells in a single row → 1 table with 1 row, 3 columns
2382        let cells = vec![
2383            make_cell(0.0, 0.0, 40.0, 30.0),
2384            make_cell(40.0, 0.0, 80.0, 30.0),
2385            make_cell(80.0, 0.0, 120.0, 30.0),
2386        ];
2387        let tables = cells_to_tables(cells);
2388        assert_eq!(tables.len(), 1);
2389        assert_eq!(tables[0].rows.len(), 1);
2390        assert_eq!(tables[0].rows[0].len(), 3);
2391        assert_eq!(tables[0].columns.len(), 3);
2392        for col in &tables[0].columns {
2393            assert_eq!(col.len(), 1);
2394        }
2395    }
2396
2397    #[test]
2398    fn test_cells_to_tables_single_column() {
2399        // 3 cells in a single column → 1 table with 3 rows, 1 column
2400        let cells = vec![
2401            make_cell(0.0, 0.0, 50.0, 30.0),
2402            make_cell(0.0, 30.0, 50.0, 60.0),
2403            make_cell(0.0, 60.0, 50.0, 90.0),
2404        ];
2405        let tables = cells_to_tables(cells);
2406        assert_eq!(tables.len(), 1);
2407        assert_eq!(tables[0].rows.len(), 3);
2408        for row in &tables[0].rows {
2409            assert_eq!(row.len(), 1);
2410        }
2411        assert_eq!(tables[0].columns.len(), 1);
2412        assert_eq!(tables[0].columns[0].len(), 3);
2413    }
2414
2415    // --- US-035: Lattice strategy - full pipeline tests ---
2416
2417    fn make_h_edge_src(x0: f64, y: f64, x1: f64, source: crate::edges::EdgeSource) -> Edge {
2418        Edge {
2419            x0,
2420            top: y,
2421            x1,
2422            bottom: y,
2423            orientation: Orientation::Horizontal,
2424            source,
2425        }
2426    }
2427
2428    fn make_v_edge_src(x: f64, top: f64, bottom: f64, source: crate::edges::EdgeSource) -> Edge {
2429        Edge {
2430            x0: x,
2431            top,
2432            x1: x,
2433            bottom,
2434            orientation: Orientation::Vertical,
2435            source,
2436        }
2437    }
2438
2439    #[test]
2440    fn test_lattice_simple_bordered_table() {
2441        // Simple 2x2 table from line edges forming a grid:
2442        // 3 horizontal lines at y=0, y=30, y=60 (from x=0 to x=100)
2443        // 3 vertical lines at x=0, x=50, x=100 (from y=0 to y=60)
2444        // Should produce 1 table with 4 cells (2 rows × 2 cols)
2445        let edges = vec![
2446            make_h_edge(0.0, 0.0, 100.0),
2447            make_h_edge(0.0, 30.0, 100.0),
2448            make_h_edge(0.0, 60.0, 100.0),
2449            make_v_edge(0.0, 0.0, 60.0),
2450            make_v_edge(50.0, 0.0, 60.0),
2451            make_v_edge(100.0, 0.0, 60.0),
2452        ];
2453        let settings = TableSettings::default();
2454        let finder = TableFinder::new(edges, settings);
2455        let tables = finder.find_tables();
2456
2457        assert_eq!(tables.len(), 1);
2458        assert_eq!(tables[0].cells.len(), 4);
2459        assert_eq!(tables[0].rows.len(), 2);
2460        assert_eq!(tables[0].rows[0].len(), 2);
2461        assert_eq!(tables[0].rows[1].len(), 2);
2462        assert_approx(tables[0].bbox.x0, 0.0);
2463        assert_approx(tables[0].bbox.top, 0.0);
2464        assert_approx(tables[0].bbox.x1, 100.0);
2465        assert_approx(tables[0].bbox.bottom, 60.0);
2466    }
2467
2468    #[test]
2469    fn test_lattice_with_rect_edges() {
2470        // Lattice strategy includes rect-sourced edges.
2471        // Build edges from rect sources that form a 1-cell table.
2472        let edges = vec![
2473            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2474            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2475            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2476            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2477        ];
2478        let settings = TableSettings {
2479            strategy: Strategy::Lattice,
2480            ..TableSettings::default()
2481        };
2482        let finder = TableFinder::new(edges, settings);
2483        let tables = finder.find_tables();
2484
2485        // Lattice includes rect edges → should find 1 table with 1 cell
2486        assert_eq!(tables.len(), 1);
2487        assert_eq!(tables[0].cells.len(), 1);
2488    }
2489
2490    #[test]
2491    fn test_lattice_strict_excludes_rect_edges() {
2492        // LatticeStrict should exclude rect-sourced edges.
2493        // Only line-sourced edges should be used.
2494        let edges = vec![
2495            // These rect-sourced edges form a grid by themselves
2496            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::RectTop),
2497            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::RectBottom),
2498            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2499            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::RectRight),
2500        ];
2501        let settings = TableSettings {
2502            strategy: Strategy::LatticeStrict,
2503            ..TableSettings::default()
2504        };
2505        let finder = TableFinder::new(edges, settings);
2506        let tables = finder.find_tables();
2507
2508        // LatticeStrict excludes rect edges → no line edges → no tables
2509        assert!(tables.is_empty());
2510    }
2511
2512    #[test]
2513    fn test_lattice_strict_with_line_edges() {
2514        // LatticeStrict with line-sourced edges should detect tables.
2515        let edges = vec![
2516            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2517            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2518            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2519            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2520        ];
2521        let settings = TableSettings {
2522            strategy: Strategy::LatticeStrict,
2523            ..TableSettings::default()
2524        };
2525        let finder = TableFinder::new(edges, settings);
2526        let tables = finder.find_tables();
2527
2528        assert_eq!(tables.len(), 1);
2529        assert_eq!(tables[0].cells.len(), 1);
2530    }
2531
2532    #[test]
2533    fn test_lattice_edge_min_length_filtering() {
2534        // Edges shorter than edge_min_length should be filtered out.
2535        // Short edges (length 2.0) should be removed with min_length=3.0
2536        let edges = vec![
2537            // These form a valid grid
2538            make_h_edge(0.0, 0.0, 100.0),  // length 100, kept
2539            make_h_edge(0.0, 50.0, 100.0), // length 100, kept
2540            make_v_edge(0.0, 0.0, 50.0),   // length 50, kept
2541            make_v_edge(100.0, 0.0, 50.0), // length 50, kept
2542            // Short edges that should be filtered
2543            make_h_edge(200.0, 0.0, 201.0), // length 1, filtered
2544            make_v_edge(200.0, 0.0, 2.0),   // length 2, filtered
2545        ];
2546        let settings = TableSettings {
2547            edge_min_length: 3.0,
2548            ..TableSettings::default()
2549        };
2550        let finder = TableFinder::new(edges, settings);
2551        let tables = finder.find_tables();
2552
2553        // Only the main grid edges remain → 1 table
2554        assert_eq!(tables.len(), 1);
2555        assert_eq!(tables[0].cells.len(), 1);
2556    }
2557
2558    #[test]
2559    fn test_lattice_edge_min_length_filters_all() {
2560        // If all edges are too short, no tables should be detected.
2561        let edges = vec![
2562            make_h_edge(0.0, 0.0, 2.0),   // length 2
2563            make_h_edge(0.0, 50.0, 1.5),  // length 1.5
2564            make_v_edge(0.0, 0.0, 2.5),   // length 2.5
2565            make_v_edge(100.0, 0.0, 1.0), // length 1
2566        ];
2567        let settings = TableSettings {
2568            edge_min_length: 3.0,
2569            ..TableSettings::default()
2570        };
2571        let finder = TableFinder::new(edges, settings);
2572        let tables = finder.find_tables();
2573
2574        assert!(tables.is_empty());
2575    }
2576
2577    #[test]
2578    fn test_lattice_full_pipeline_snap_and_join() {
2579        // Edges that are slightly misaligned and segmented.
2580        // After snap and join, they should form a valid grid.
2581        //
2582        // Two horizontal edges at y≈0 (slightly off) and y≈50:
2583        //   h1: y=0.5, x=0..60
2584        //   h2: y=-0.3, x=55..100  (same line as h1 after snap, overlapping after join)
2585        //   h3: y=50.0, x=0..100
2586        //
2587        // Two vertical edges at x≈0 and x≈100:
2588        //   v1: x=0.0, y=0..50
2589        //   v2: x=100.2, y=0..25
2590        //   v3: x=99.8, y=23..50  (same line as v2 after snap, overlapping after join)
2591        let edges = vec![
2592            make_h_edge(0.0, 0.5, 60.0),
2593            make_h_edge(55.0, -0.3, 100.0),
2594            make_h_edge(0.0, 50.0, 100.0),
2595            make_v_edge(0.0, 0.0, 50.0),
2596            make_v_edge(100.2, 0.0, 25.0),
2597            make_v_edge(99.8, 23.0, 50.0),
2598        ];
2599        let settings = TableSettings::default(); // snap/join tolerances = 3.0
2600        let finder = TableFinder::new(edges, settings);
2601        let tables = finder.find_tables();
2602
2603        // After snap+join, should form 1 table with 1 cell
2604        assert_eq!(tables.len(), 1);
2605        assert_eq!(tables[0].cells.len(), 1);
2606    }
2607
2608    #[test]
2609    fn test_lattice_empty_edges() {
2610        // No edges → no tables
2611        let finder = TableFinder::new(Vec::new(), TableSettings::default());
2612        let tables = finder.find_tables();
2613        assert!(tables.is_empty());
2614    }
2615
2616    #[test]
2617    fn test_lattice_no_intersections() {
2618        // Parallel edges that don't intersect → no tables
2619        let edges = vec![
2620            make_h_edge(0.0, 0.0, 100.0),
2621            make_h_edge(0.0, 50.0, 100.0),
2622            // No vertical edges → no intersections
2623        ];
2624        let finder = TableFinder::new(edges, TableSettings::default());
2625        let tables = finder.find_tables();
2626        assert!(tables.is_empty());
2627    }
2628
2629    #[test]
2630    fn test_lattice_strict_mixed_line_and_rect_edges() {
2631        // LatticeStrict should use line edges but not rect edges.
2632        // Mix of both: only line edges should be used.
2633        let edges = vec![
2634            // Line edges forming top/bottom
2635            make_h_edge_src(0.0, 0.0, 100.0, crate::edges::EdgeSource::Line),
2636            make_h_edge_src(0.0, 50.0, 100.0, crate::edges::EdgeSource::Line),
2637            // Line edges forming left/right
2638            make_v_edge_src(0.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2639            make_v_edge_src(100.0, 0.0, 50.0, crate::edges::EdgeSource::Line),
2640            // Rect edge adding a middle vertical line (should be ignored in strict mode)
2641            make_v_edge_src(50.0, 0.0, 50.0, crate::edges::EdgeSource::RectLeft),
2642        ];
2643        let settings = TableSettings {
2644            strategy: Strategy::LatticeStrict,
2645            ..TableSettings::default()
2646        };
2647        let finder = TableFinder::new(edges, settings);
2648        let tables = finder.find_tables();
2649
2650        // Only line edges used → 1 table with 1 cell (not 2 cells)
2651        assert_eq!(tables.len(), 1);
2652        assert_eq!(tables[0].cells.len(), 1);
2653    }
2654
2655    // --- extract_text_for_cells tests (US-036) ---
2656
2657    fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Char {
2658        Char {
2659            text: text.to_string(),
2660            bbox: BBox::new(x0, top, x1, bottom),
2661            fontname: "TestFont".to_string(),
2662            size: 12.0,
2663            doctop: top,
2664            upright: true,
2665            direction: crate::text::TextDirection::Ltr,
2666            stroking_color: None,
2667            non_stroking_color: None,
2668            ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
2669            char_code: 0,
2670        }
2671    }
2672
2673    #[test]
2674    fn test_extract_text_single_word_in_cell() {
2675        // Cell: (0,0)-(100,50), chars spelling "Hi" centered in cell
2676        let mut cells = vec![Cell {
2677            bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2678            text: None,
2679        }];
2680        let chars = vec![
2681            make_char("H", 10.0, 15.0, 20.0, 27.0),
2682            make_char("i", 20.0, 15.0, 28.0, 27.0),
2683        ];
2684        extract_text_for_cells(&mut cells, &chars);
2685        assert_eq!(cells[0].text, Some("Hi".to_string()));
2686    }
2687
2688    #[test]
2689    fn test_extract_text_empty_cell() {
2690        // Cell with no characters inside → text should remain None
2691        let mut cells = vec![Cell {
2692            bbox: BBox::new(0.0, 0.0, 100.0, 50.0),
2693            text: None,
2694        }];
2695        let chars: Vec<Char> = vec![];
2696        extract_text_for_cells(&mut cells, &chars);
2697        assert_eq!(cells[0].text, None);
2698    }
2699
2700    #[test]
2701    fn test_extract_text_chars_outside_cell() {
2702        // All characters are outside the cell bbox → text should be None
2703        let mut cells = vec![Cell {
2704            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2705            text: None,
2706        }];
2707        // Chars at x=200, far outside cell
2708        let chars = vec![
2709            make_char("A", 200.0, 10.0, 210.0, 22.0),
2710            make_char("B", 210.0, 10.0, 220.0, 22.0),
2711        ];
2712        extract_text_for_cells(&mut cells, &chars);
2713        assert_eq!(cells[0].text, None);
2714    }
2715
2716    #[test]
2717    fn test_extract_text_center_point_containment() {
2718        // Char bbox partially overlaps cell but center is outside → not included
2719        // Cell: (0,0)-(50,30)
2720        // Char bbox: (48,10)-(60,22) → center = (54, 16) → outside cell
2721        let mut cells = vec![Cell {
2722            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2723            text: None,
2724        }];
2725        let chars = vec![make_char("X", 48.0, 10.0, 60.0, 22.0)];
2726        extract_text_for_cells(&mut cells, &chars);
2727        assert_eq!(cells[0].text, None);
2728    }
2729
2730    #[test]
2731    fn test_extract_text_center_inside_cell() {
2732        // Char bbox extends past cell border but center is inside → included
2733        // Cell: (0,0)-(50,30)
2734        // Char bbox: (40,10)-(52,22) → center = (46, 16) → inside cell
2735        let mut cells = vec![Cell {
2736            bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2737            text: None,
2738        }];
2739        let chars = vec![make_char("Y", 40.0, 10.0, 52.0, 22.0)];
2740        extract_text_for_cells(&mut cells, &chars);
2741        assert_eq!(cells[0].text, Some("Y".to_string()));
2742    }
2743
2744    #[test]
2745    fn test_extract_text_multiple_words_in_cell() {
2746        // Cell with two words separated by a space char
2747        let mut cells = vec![Cell {
2748            bbox: BBox::new(0.0, 0.0, 200.0, 50.0),
2749            text: None,
2750        }];
2751        let chars = vec![
2752            make_char("H", 10.0, 15.0, 20.0, 27.0),
2753            make_char("i", 20.0, 15.0, 28.0, 27.0),
2754            make_char(" ", 28.0, 15.0, 33.0, 27.0),
2755            make_char("B", 33.0, 15.0, 43.0, 27.0),
2756            make_char("o", 43.0, 15.0, 51.0, 27.0),
2757            make_char("b", 51.0, 15.0, 59.0, 27.0),
2758        ];
2759        extract_text_for_cells(&mut cells, &chars);
2760        assert_eq!(cells[0].text, Some("Hi Bob".to_string()));
2761    }
2762
2763    #[test]
2764    fn test_extract_text_multiple_lines_in_cell() {
2765        // Cell with text on two lines (different y positions)
2766        let mut cells = vec![Cell {
2767            bbox: BBox::new(0.0, 0.0, 200.0, 80.0),
2768            text: None,
2769        }];
2770        let chars = vec![
2771            // Line 1: "AB" at y=10
2772            make_char("A", 10.0, 10.0, 20.0, 22.0),
2773            make_char("B", 20.0, 10.0, 30.0, 22.0),
2774            // Line 2: "CD" at y=40
2775            make_char("C", 10.0, 40.0, 20.0, 52.0),
2776            make_char("D", 20.0, 40.0, 30.0, 52.0),
2777        ];
2778        extract_text_for_cells(&mut cells, &chars);
2779        assert_eq!(cells[0].text, Some("AB\nCD".to_string()));
2780    }
2781
2782    #[test]
2783    fn test_extract_text_two_cells() {
2784        // Two cells, each with different text
2785        let mut cells = vec![
2786            Cell {
2787                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2788                text: None,
2789            },
2790            Cell {
2791                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
2792                text: None,
2793            },
2794        ];
2795        let chars = vec![
2796            // "A" in cell 0 (center at (15, 16))
2797            make_char("A", 10.0, 10.0, 20.0, 22.0),
2798            // "B" in cell 1 (center at (65, 16))
2799            make_char("B", 60.0, 10.0, 70.0, 22.0),
2800        ];
2801        extract_text_for_cells(&mut cells, &chars);
2802        assert_eq!(cells[0].text, Some("A".to_string()));
2803        assert_eq!(cells[1].text, Some("B".to_string()));
2804    }
2805
2806    #[test]
2807    fn test_extract_text_no_cells() {
2808        // Empty cells slice → should not panic
2809        let mut cells: Vec<Cell> = vec![];
2810        let chars = vec![make_char("A", 10.0, 10.0, 20.0, 22.0)];
2811        extract_text_for_cells(&mut cells, &chars);
2812        assert!(cells.is_empty());
2813    }
2814
2815    #[test]
2816    fn test_extract_text_mixed_empty_and_populated_cells() {
2817        // Three cells: first has text, second is empty, third has text
2818        let mut cells = vec![
2819            Cell {
2820                bbox: BBox::new(0.0, 0.0, 50.0, 30.0),
2821                text: None,
2822            },
2823            Cell {
2824                bbox: BBox::new(50.0, 0.0, 100.0, 30.0),
2825                text: None,
2826            },
2827            Cell {
2828                bbox: BBox::new(100.0, 0.0, 150.0, 30.0),
2829                text: None,
2830            },
2831        ];
2832        let chars = vec![
2833            make_char("X", 10.0, 10.0, 20.0, 22.0), // in cell 0
2834            make_char("Z", 110.0, 10.0, 120.0, 22.0), // in cell 2
2835                                                    // No chars in cell 1
2836        ];
2837        extract_text_for_cells(&mut cells, &chars);
2838        assert_eq!(cells[0].text, Some("X".to_string()));
2839        assert_eq!(cells[1].text, None);
2840        assert_eq!(cells[2].text, Some("Z".to_string()));
2841    }
2842
2843    // --- Stream strategy tests (US-037) ---
2844
2845    fn make_word(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Word {
2846        Word {
2847            text: text.to_string(),
2848            bbox: BBox::new(x0, top, x1, bottom),
2849            doctop: top,
2850            direction: crate::text::TextDirection::Ltr,
2851            chars: vec![],
2852        }
2853    }
2854
2855    #[test]
2856    fn test_words_to_edges_stream_empty() {
2857        let edges = words_to_edges_stream(&[], 3.0, 3.0, 3, 1);
2858        assert!(edges.is_empty());
2859    }
2860
2861    #[test]
2862    fn test_words_to_edges_stream_vertical_x0_alignment() {
2863        // 3 words with x0 at ~10.0 (within tolerance 3.0)
2864        // Should produce a vertical edge at ~10.0
2865        let words = vec![
2866            make_word("A", 10.0, 10.0, 30.0, 22.0),
2867            make_word("B", 10.0, 30.0, 35.0, 42.0),
2868            make_word("C", 10.0, 50.0, 40.0, 62.0),
2869        ];
2870        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2871
2872        // Should have at least one vertical edge from x0 alignment
2873        let v_edges: Vec<&Edge> = edges
2874            .iter()
2875            .filter(|e| e.orientation == Orientation::Vertical)
2876            .collect();
2877        assert!(
2878            !v_edges.is_empty(),
2879            "Should produce vertical edges from x0 alignment"
2880        );
2881
2882        // The vertical edge at x0≈10.0 should span from top=10.0 to bottom=62.0
2883        let v_edge = v_edges
2884            .iter()
2885            .find(|e| (e.x0 - 10.0).abs() < 1.0)
2886            .expect("Should have a vertical edge near x=10");
2887        assert!((v_edge.top - 10.0).abs() < 0.01);
2888        assert!((v_edge.bottom - 62.0).abs() < 0.01);
2889        assert_eq!(v_edge.source, EdgeSource::Stream);
2890    }
2891
2892    #[test]
2893    fn test_words_to_edges_stream_vertical_x1_alignment() {
2894        // 3 words with x1 at ~50.0
2895        let words = vec![
2896            make_word("A", 10.0, 10.0, 50.0, 22.0),
2897            make_word("B", 20.0, 30.0, 50.0, 42.0),
2898            make_word("C", 15.0, 50.0, 50.0, 62.0),
2899        ];
2900        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2901
2902        let v_edges: Vec<&Edge> = edges
2903            .iter()
2904            .filter(|e| e.orientation == Orientation::Vertical)
2905            .collect();
2906        assert!(
2907            !v_edges.is_empty(),
2908            "Should produce vertical edges from x1 alignment"
2909        );
2910
2911        let v_edge = v_edges
2912            .iter()
2913            .find(|e| (e.x0 - 50.0).abs() < 1.0)
2914            .expect("Should have a vertical edge near x=50");
2915        assert!((v_edge.top - 10.0).abs() < 0.01);
2916        assert!((v_edge.bottom - 62.0).abs() < 0.01);
2917    }
2918
2919    #[test]
2920    fn test_words_to_edges_stream_horizontal_top_alignment() {
2921        // 3 words with top at ~10.0 (min_words_horizontal = 1, but 3 words align)
2922        let words = vec![
2923            make_word("A", 10.0, 10.0, 30.0, 22.0),
2924            make_word("B", 40.0, 10.0, 60.0, 22.0),
2925            make_word("C", 70.0, 10.0, 90.0, 22.0),
2926        ];
2927        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2928
2929        let h_edges: Vec<&Edge> = edges
2930            .iter()
2931            .filter(|e| e.orientation == Orientation::Horizontal)
2932            .collect();
2933        assert!(
2934            !h_edges.is_empty(),
2935            "Should produce horizontal edges from top alignment"
2936        );
2937
2938        // The horizontal edge at y≈10.0 should span from x0=10.0 to x1=90.0
2939        let h_edge = h_edges
2940            .iter()
2941            .find(|e| (e.top - 10.0).abs() < 1.0)
2942            .expect("Should have a horizontal edge near y=10");
2943        assert!((h_edge.x0 - 10.0).abs() < 0.01);
2944        assert!((h_edge.x1 - 90.0).abs() < 0.01);
2945    }
2946
2947    #[test]
2948    fn test_words_to_edges_stream_horizontal_bottom_alignment() {
2949        // 3 words with bottom at ~22.0
2950        let words = vec![
2951            make_word("A", 10.0, 10.0, 30.0, 22.0),
2952            make_word("B", 40.0, 12.0, 60.0, 22.0),
2953            make_word("C", 70.0, 8.0, 90.0, 22.0),
2954        ];
2955        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2956
2957        let h_edges: Vec<&Edge> = edges
2958            .iter()
2959            .filter(|e| e.orientation == Orientation::Horizontal)
2960            .collect();
2961        assert!(
2962            !h_edges.is_empty(),
2963            "Should produce horizontal edges from bottom alignment"
2964        );
2965
2966        let h_edge = h_edges
2967            .iter()
2968            .find(|e| (e.top - 22.0).abs() < 1.0)
2969            .expect("Should have a horizontal edge near y=22");
2970        assert!((h_edge.x0 - 10.0).abs() < 0.01);
2971        assert!((h_edge.x1 - 90.0).abs() < 0.01);
2972    }
2973
2974    #[test]
2975    fn test_words_to_edges_stream_threshold_filtering_vertical() {
2976        // Only 2 words with aligned x0, but min_words_vertical=3 → no vertical edge
2977        let words = vec![
2978            make_word("A", 10.0, 10.0, 30.0, 22.0),
2979            make_word("B", 10.0, 30.0, 35.0, 42.0),
2980        ];
2981        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
2982
2983        let v_edges: Vec<&Edge> = edges
2984            .iter()
2985            .filter(|e| e.orientation == Orientation::Vertical)
2986            .collect();
2987        assert!(
2988            v_edges.is_empty(),
2989            "Should not produce vertical edges below threshold"
2990        );
2991    }
2992
2993    #[test]
2994    fn test_words_to_edges_stream_threshold_filtering_horizontal() {
2995        // Only 2 words with aligned top, but min_words_horizontal=3 → no horizontal edge
2996        let words = vec![
2997            make_word("A", 10.0, 10.0, 30.0, 22.0),
2998            make_word("B", 40.0, 10.0, 60.0, 22.0),
2999        ];
3000        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 3);
3001
3002        let h_edges: Vec<&Edge> = edges
3003            .iter()
3004            .filter(|e| e.orientation == Orientation::Horizontal)
3005            .collect();
3006        assert!(
3007            h_edges.is_empty(),
3008            "Should not produce horizontal edges below threshold"
3009        );
3010    }
3011
3012    #[test]
3013    fn test_words_to_edges_stream_tolerance_grouping() {
3014        // Words with x0 slightly different but within tolerance
3015        let words = vec![
3016            make_word("A", 10.0, 10.0, 30.0, 22.0),
3017            make_word("B", 11.5, 30.0, 35.0, 42.0),
3018            make_word("C", 12.0, 50.0, 40.0, 62.0),
3019        ];
3020        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3021
3022        let v_edges: Vec<&Edge> = edges
3023            .iter()
3024            .filter(|e| e.orientation == Orientation::Vertical)
3025            .collect();
3026        // Should group x0 values 10.0, 11.5, 12.0 into one cluster (all within 3.0 tolerance)
3027        assert!(
3028            !v_edges.is_empty(),
3029            "Should group nearby x0 values within tolerance"
3030        );
3031    }
3032
3033    #[test]
3034    fn test_words_to_edges_stream_no_grouping_beyond_tolerance() {
3035        // Words with x0 values far apart → no cluster of 3
3036        let words = vec![
3037            make_word("A", 10.0, 10.0, 30.0, 22.0),
3038            make_word("B", 50.0, 30.0, 70.0, 42.0),
3039            make_word("C", 90.0, 50.0, 110.0, 62.0),
3040        ];
3041        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3042
3043        let v_edges: Vec<&Edge> = edges
3044            .iter()
3045            .filter(|e| e.orientation == Orientation::Vertical)
3046            .collect();
3047        assert!(
3048            v_edges.is_empty(),
3049            "Should not group x0 values that are far apart"
3050        );
3051    }
3052
3053    #[test]
3054    fn test_stream_strategy_full_pipeline() {
3055        // Simulate a 3-column borderless table with 3 rows:
3056        //   Row 0: "A"  "B"  "C"  (top=10, bottom=22)
3057        //   Row 1: "D"  "E"  "F"  (top=30, bottom=42)
3058        //   Row 2: "G"  "H"  "I"  (top=50, bottom=62)
3059        // Columns: x0=10, x0=50, x0=90 → left edges
3060        //          x1=30, x1=70, x1=110 → right edges
3061        let words = vec![
3062            // Row 0
3063            make_word("A", 10.0, 10.0, 30.0, 22.0),
3064            make_word("B", 50.0, 10.0, 70.0, 22.0),
3065            make_word("C", 90.0, 10.0, 110.0, 22.0),
3066            // Row 1
3067            make_word("D", 10.0, 30.0, 30.0, 42.0),
3068            make_word("E", 50.0, 30.0, 70.0, 42.0),
3069            make_word("F", 90.0, 30.0, 110.0, 42.0),
3070            // Row 2
3071            make_word("G", 10.0, 50.0, 30.0, 62.0),
3072            make_word("H", 50.0, 50.0, 70.0, 62.0),
3073            make_word("I", 90.0, 50.0, 110.0, 62.0),
3074        ];
3075
3076        let settings = TableSettings {
3077            strategy: Strategy::Stream,
3078            min_words_vertical: 3,
3079            min_words_horizontal: 3,
3080            ..TableSettings::default()
3081        };
3082
3083        let finder = TableFinder::new_with_words(vec![], words, settings);
3084        let tables = finder.find_tables();
3085
3086        // Should detect at least one table
3087        assert!(!tables.is_empty(), "Stream strategy should detect a table");
3088
3089        // The table should have cells
3090        assert!(
3091            !tables[0].cells.is_empty(),
3092            "Table should have detected cells"
3093        );
3094    }
3095
3096    #[test]
3097    fn test_stream_strategy_with_no_words() {
3098        // Empty words → no synthetic edges → no tables
3099        let settings = TableSettings {
3100            strategy: Strategy::Stream,
3101            ..TableSettings::default()
3102        };
3103        let finder = TableFinder::new_with_words(vec![], vec![], settings);
3104        let tables = finder.find_tables();
3105        assert!(tables.is_empty());
3106    }
3107
3108    #[test]
3109    fn test_stream_edge_source_is_stream() {
3110        // All synthetic edges from Stream should have EdgeSource::Stream
3111        let words = vec![
3112            make_word("A", 10.0, 10.0, 30.0, 22.0),
3113            make_word("B", 10.0, 30.0, 35.0, 42.0),
3114            make_word("C", 10.0, 50.0, 40.0, 62.0),
3115        ];
3116        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3117        for edge in &edges {
3118            assert_eq!(
3119                edge.source,
3120                EdgeSource::Stream,
3121                "All stream edges should have EdgeSource::Stream"
3122            );
3123        }
3124    }
3125
3126    #[test]
3127    fn test_stream_strategy_min_words_horizontal_default() {
3128        // Default min_words_horizontal=1 means even a single row produces horizontal edges
3129        let words = vec![
3130            make_word("A", 10.0, 10.0, 30.0, 22.0),
3131            make_word("B", 50.0, 10.0, 70.0, 22.0),
3132            make_word("C", 90.0, 10.0, 110.0, 22.0),
3133        ];
3134        // min_words_horizontal=1 means each group of 1+ word at same y produces horizontal edges
3135        let edges = words_to_edges_stream(&words, 3.0, 3.0, 3, 1);
3136
3137        let h_edges: Vec<&Edge> = edges
3138            .iter()
3139            .filter(|e| e.orientation == Orientation::Horizontal)
3140            .collect();
3141        assert!(
3142            !h_edges.is_empty(),
3143            "min_words_horizontal=1 should produce horizontal edges for 3 aligned words"
3144        );
3145    }
3146
3147    // --- US-038: Explicit strategy tests ---
3148
3149    #[test]
3150    fn test_explicit_lines_to_edges_basic() {
3151        // A 3x3 grid (3 horizontal + 3 vertical lines) should produce edges
3152        let explicit = ExplicitLines {
3153            horizontal_lines: vec![10.0, 30.0, 50.0],
3154            vertical_lines: vec![100.0, 200.0, 300.0],
3155        };
3156        let edges = explicit_lines_to_edges(&explicit);
3157
3158        // 3 horizontal + 3 vertical = 6 edges
3159        assert_eq!(edges.len(), 6);
3160
3161        let h_edges: Vec<&Edge> = edges
3162            .iter()
3163            .filter(|e| e.orientation == Orientation::Horizontal)
3164            .collect();
3165        let v_edges: Vec<&Edge> = edges
3166            .iter()
3167            .filter(|e| e.orientation == Orientation::Vertical)
3168            .collect();
3169        assert_eq!(h_edges.len(), 3);
3170        assert_eq!(v_edges.len(), 3);
3171
3172        // Horizontal edges span from min_x to max_x of vertical lines
3173        for h in &h_edges {
3174            assert_eq!(h.x0, 100.0);
3175            assert_eq!(h.x1, 300.0);
3176        }
3177        // Vertical edges span from min_y to max_y of horizontal lines
3178        for v in &v_edges {
3179            assert_eq!(v.top, 10.0);
3180            assert_eq!(v.bottom, 50.0);
3181        }
3182    }
3183
3184    #[test]
3185    fn test_explicit_lines_to_edges_empty_horizontal() {
3186        let explicit = ExplicitLines {
3187            horizontal_lines: vec![],
3188            vertical_lines: vec![100.0, 200.0],
3189        };
3190        let edges = explicit_lines_to_edges(&explicit);
3191        // No horizontal lines means no span for verticals either → no edges
3192        assert!(edges.is_empty());
3193    }
3194
3195    #[test]
3196    fn test_explicit_lines_to_edges_empty_vertical() {
3197        let explicit = ExplicitLines {
3198            horizontal_lines: vec![10.0, 20.0],
3199            vertical_lines: vec![],
3200        };
3201        let edges = explicit_lines_to_edges(&explicit);
3202        // No vertical lines means no span for horizontals either → no edges
3203        assert!(edges.is_empty());
3204    }
3205
3206    #[test]
3207    fn test_explicit_lines_to_edges_both_empty() {
3208        let explicit = ExplicitLines {
3209            horizontal_lines: vec![],
3210            vertical_lines: vec![],
3211        };
3212        let edges = explicit_lines_to_edges(&explicit);
3213        assert!(edges.is_empty());
3214    }
3215
3216    #[test]
3217    fn test_explicit_edge_source_is_explicit() {
3218        let explicit = ExplicitLines {
3219            horizontal_lines: vec![10.0, 50.0],
3220            vertical_lines: vec![100.0, 200.0],
3221        };
3222        let edges = explicit_lines_to_edges(&explicit);
3223        for edge in &edges {
3224            assert_eq!(edge.source, EdgeSource::Explicit);
3225        }
3226    }
3227
3228    #[test]
3229    fn test_explicit_grid_detection() {
3230        // A 3x3 grid should produce 4 cells
3231        let explicit = ExplicitLines {
3232            horizontal_lines: vec![0.0, 20.0, 40.0],
3233            vertical_lines: vec![0.0, 50.0, 100.0],
3234        };
3235        let settings = TableSettings {
3236            strategy: Strategy::Explicit,
3237            explicit_lines: Some(explicit),
3238            ..TableSettings::default()
3239        };
3240        let finder = TableFinder::new(vec![], settings);
3241        let tables = finder.find_tables();
3242
3243        assert_eq!(tables.len(), 1);
3244        assert_eq!(tables[0].cells.len(), 4);
3245        assert_eq!(tables[0].rows.len(), 2);
3246        assert_eq!(tables[0].columns.len(), 2);
3247    }
3248
3249    #[test]
3250    fn test_explicit_2x2_grid() {
3251        // A 2x2 grid (2 horizontal + 2 vertical) → 1 cell
3252        let explicit = ExplicitLines {
3253            horizontal_lines: vec![10.0, 50.0],
3254            vertical_lines: vec![100.0, 300.0],
3255        };
3256        let settings = TableSettings {
3257            strategy: Strategy::Explicit,
3258            explicit_lines: Some(explicit),
3259            ..TableSettings::default()
3260        };
3261        let finder = TableFinder::new(vec![], settings);
3262        let tables = finder.find_tables();
3263
3264        assert_eq!(tables.len(), 1);
3265        assert_eq!(tables[0].cells.len(), 1);
3266        let cell = &tables[0].cells[0];
3267        assert_eq!(cell.bbox.x0, 100.0);
3268        assert_eq!(cell.bbox.top, 10.0);
3269        assert_eq!(cell.bbox.x1, 300.0);
3270        assert_eq!(cell.bbox.bottom, 50.0);
3271    }
3272
3273    #[test]
3274    fn test_explicit_strategy_no_explicit_lines() {
3275        // Explicit strategy with no explicit_lines should return no tables
3276        let settings = TableSettings {
3277            strategy: Strategy::Explicit,
3278            explicit_lines: None,
3279            ..TableSettings::default()
3280        };
3281        let finder = TableFinder::new(vec![], settings);
3282        let tables = finder.find_tables();
3283        assert!(tables.is_empty());
3284    }
3285
3286    #[test]
3287    fn test_explicit_mixing_with_detected_edges() {
3288        // Detected edges form partial grid; explicit lines complete it
3289        // Detected: two vertical edges at x=0 and x=100
3290        let detected_edges = vec![make_v_edge(0.0, 0.0, 40.0), make_v_edge(100.0, 0.0, 40.0)];
3291        // Explicit: add horizontal lines at y=0 and y=40
3292        let explicit = ExplicitLines {
3293            horizontal_lines: vec![0.0, 40.0],
3294            vertical_lines: vec![], // no explicit verticals
3295        };
3296        let settings = TableSettings {
3297            strategy: Strategy::Explicit,
3298            explicit_lines: Some(explicit),
3299            ..TableSettings::default()
3300        };
3301        let finder = TableFinder::new(detected_edges, settings);
3302        let tables = finder.find_tables();
3303
3304        // The explicit horizontal lines + detected vertical edges form a complete grid → 1 cell
3305        assert_eq!(tables.len(), 1);
3306        assert_eq!(tables[0].cells.len(), 1);
3307    }
3308
3309    #[test]
3310    fn test_explicit_single_line_each() {
3311        // Only 1 horizontal + 1 vertical → no cells (need at least 2×2 grid)
3312        let explicit = ExplicitLines {
3313            horizontal_lines: vec![10.0],
3314            vertical_lines: vec![100.0],
3315        };
3316        let settings = TableSettings {
3317            strategy: Strategy::Explicit,
3318            explicit_lines: Some(explicit),
3319            ..TableSettings::default()
3320        };
3321        let finder = TableFinder::new(vec![], settings);
3322        let tables = finder.find_tables();
3323        assert!(tables.is_empty());
3324    }
3325
3326    #[test]
3327    fn test_explicit_unsorted_coordinates() {
3328        // Coordinates provided in unsorted order should still work
3329        let explicit = ExplicitLines {
3330            horizontal_lines: vec![40.0, 0.0, 20.0],
3331            vertical_lines: vec![100.0, 0.0, 50.0],
3332        };
3333        let settings = TableSettings {
3334            strategy: Strategy::Explicit,
3335            explicit_lines: Some(explicit),
3336            ..TableSettings::default()
3337        };
3338        let finder = TableFinder::new(vec![], settings);
3339        let tables = finder.find_tables();
3340
3341        assert_eq!(tables.len(), 1);
3342        assert_eq!(tables[0].cells.len(), 4); // 3x3 grid → 4 cells
3343    }
3344}