Skip to main content

pdfsink_rs/
table.rs

1use crate::clustering::cluster_items;
2use crate::geometry::{objects_to_bbox, rect_to_edges, snap_edges};
3use crate::text::{extract_text, extract_words, TextOptions};
4use crate::types::{
5    BBox, Char, Edge, Line, Orientation, Page, Word,
6};
7use ordered_float::OrderedFloat;
8use std::collections::{BTreeMap, BTreeSet};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum TableStrategy {
12    Lines,
13    LinesStrict,
14    Text,
15    Explicit,
16}
17
18impl Default for TableStrategy {
19    fn default() -> Self {
20        Self::Lines
21    }
22}
23
24impl std::str::FromStr for TableStrategy {
25    type Err = crate::Error;
26
27    fn from_str(s: &str) -> crate::Result<Self> {
28        match s {
29            "lines" => Ok(Self::Lines),
30            "lines_strict" => Ok(Self::LinesStrict),
31            "text" => Ok(Self::Text),
32            "explicit" => Ok(Self::Explicit),
33            other => Err(crate::Error::Message(format!("unknown table strategy: {other}"))),
34        }
35    }
36}
37
38#[derive(Debug, Clone)]
39pub enum ExplicitLine {
40    Position(f64),
41    Edge(Edge),
42}
43
44#[derive(Debug, Clone)]
45pub struct TableSettings {
46    pub vertical_strategy: TableStrategy,
47    pub horizontal_strategy: TableStrategy,
48    pub explicit_vertical_lines: Vec<ExplicitLine>,
49    pub explicit_horizontal_lines: Vec<ExplicitLine>,
50    pub snap_tolerance: f64,
51    pub snap_x_tolerance: Option<f64>,
52    pub snap_y_tolerance: Option<f64>,
53    pub join_tolerance: f64,
54    pub join_x_tolerance: Option<f64>,
55    pub join_y_tolerance: Option<f64>,
56    pub edge_min_length: f64,
57    pub edge_min_length_prefilter: f64,
58    pub min_words_vertical: usize,
59    pub min_words_horizontal: usize,
60    pub intersection_tolerance: f64,
61    pub intersection_x_tolerance: Option<f64>,
62    pub intersection_y_tolerance: Option<f64>,
63    pub text_options: TextOptions,
64}
65
66impl Default for TableSettings {
67    fn default() -> Self {
68        Self {
69            vertical_strategy: TableStrategy::Lines,
70            horizontal_strategy: TableStrategy::Lines,
71            explicit_vertical_lines: Vec::new(),
72            explicit_horizontal_lines: Vec::new(),
73            snap_tolerance: 3.0,
74            snap_x_tolerance: None,
75            snap_y_tolerance: None,
76            join_tolerance: 3.0,
77            join_x_tolerance: None,
78            join_y_tolerance: None,
79            edge_min_length: 3.0,
80            edge_min_length_prefilter: 1.0,
81            min_words_vertical: 3,
82            min_words_horizontal: 1,
83            intersection_tolerance: 3.0,
84            intersection_x_tolerance: None,
85            intersection_y_tolerance: None,
86            text_options: TextOptions::default(),
87        }
88    }
89}
90
91impl TableSettings {
92    pub fn snap_x_tolerance(&self) -> f64 {
93        self.snap_x_tolerance.unwrap_or(self.snap_tolerance)
94    }
95
96    pub fn snap_y_tolerance(&self) -> f64 {
97        self.snap_y_tolerance.unwrap_or(self.snap_tolerance)
98    }
99
100    pub fn join_x_tolerance(&self) -> f64 {
101        self.join_x_tolerance.unwrap_or(self.join_tolerance)
102    }
103
104    pub fn join_y_tolerance(&self) -> f64 {
105        self.join_y_tolerance.unwrap_or(self.join_tolerance)
106    }
107
108    pub fn intersection_x_tolerance(&self) -> f64 {
109        self.intersection_x_tolerance.unwrap_or(self.intersection_tolerance)
110    }
111
112    pub fn intersection_y_tolerance(&self) -> f64 {
113        self.intersection_y_tolerance.unwrap_or(self.intersection_tolerance)
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct CellGroup {
119    pub cells: Vec<Option<BBox>>,
120    pub bbox: BBox,
121}
122
123#[derive(Debug, Clone)]
124pub struct Table {
125    pub cells: Vec<BBox>,
126    pub bbox: BBox,
127}
128
129impl Table {
130    pub fn rows(&self) -> Vec<CellGroup> {
131        self.get_rows_or_cols(true)
132    }
133
134    pub fn columns(&self) -> Vec<CellGroup> {
135        self.get_rows_or_cols(false)
136    }
137
138    pub fn extract(&self, page: &Page, options: &TextOptions) -> Vec<Vec<Option<String>>> {
139        let mut table_arr = Vec::new();
140        for row in self.rows() {
141            let mut row_arr = Vec::new();
142            let row_chars: Vec<Char> = page
143                .chars
144                .iter()
145                .filter(|ch| char_in_bbox(ch, row.bbox))
146                .cloned()
147                .collect();
148
149            for cell in row.cells {
150                if let Some(cell_bbox) = cell {
151                    let cell_chars: Vec<Char> = row_chars
152                        .iter()
153                        .filter(|ch| char_in_bbox(ch, cell_bbox))
154                        .cloned()
155                        .collect();
156                    if cell_chars.is_empty() {
157                        row_arr.push(Some(String::new()));
158                    } else {
159                        let mut text_options = options.clone();
160                        if text_options.layout {
161                            text_options.layout_width = Some(cell_bbox.width());
162                            text_options.layout_height = Some(cell_bbox.height());
163                            text_options.layout_bbox = Some(cell_bbox);
164                        }
165                        row_arr.push(Some(extract_text(&cell_chars, &text_options)));
166                    }
167                } else {
168                    row_arr.push(None);
169                }
170            }
171            table_arr.push(row_arr);
172        }
173        table_arr
174    }
175
176    fn get_rows_or_cols(&self, rows: bool) -> Vec<CellGroup> {
177        let (axis, antiaxis) = if rows { (0usize, 1usize) } else { (1usize, 0usize) };
178
179        let mut cells = self.cells.clone();
180        cells.sort_by(|a, b| {
181            let ka = bbox_coord(*a, antiaxis)
182                .total_cmp(&bbox_coord(*b, antiaxis))
183                .then_with(|| bbox_coord(*a, axis).total_cmp(&bbox_coord(*b, axis)));
184            ka
185        });
186
187        let mut xs: Vec<f64> = cells.iter().map(|bbox| bbox_coord(*bbox, axis)).collect();
188        xs.sort_by(|a, b| a.total_cmp(b));
189        xs.dedup_by(|a, b| (*a - *b).abs() < 0.0001);
190
191        let groups = cluster_items(&cells, |bbox| bbox_coord(*bbox, antiaxis), 0.0);
192        let mut out = Vec::new();
193        for group in groups {
194            let mut row_cells = Vec::new();
195            for x in &xs {
196                let cell = group
197                    .iter()
198                    .find(|bbox| (bbox_coord(**bbox, axis) - *x).abs() < 0.0001)
199                    .copied();
200                row_cells.push(cell);
201            }
202
203            let bbox = merged_optional_bbox(&row_cells);
204            out.push(CellGroup { cells: row_cells, bbox });
205        }
206        out
207    }
208}
209
210#[derive(Debug, Clone)]
211pub struct TableFinder {
212    pub settings: TableSettings,
213    pub edges: Vec<Edge>,
214    pub intersections: BTreeMap<PointKey, Intersection>,
215    pub cells: Vec<BBox>,
216    pub tables: Vec<Table>,
217}
218
219impl TableFinder {
220    pub fn new(page: &Page, settings: TableSettings) -> crate::Result<Self> {
221        let edges = get_edges(page, &settings)?;
222        let intersections = edges_to_intersections(
223            &edges,
224            settings.intersection_x_tolerance(),
225            settings.intersection_y_tolerance(),
226        );
227        let cells = intersections_to_cells(&intersections);
228        let tables = cells_to_tables(&cells)
229            .into_iter()
230            .map(|group| {
231                let bbox = merge_bboxes(&group);
232                Table { cells: group, bbox }
233            })
234            .collect();
235
236        Ok(Self {
237            settings,
238            edges,
239            intersections,
240            cells,
241            tables,
242        })
243    }
244}
245
246type PointKey = (OrderedFloat<f64>, OrderedFloat<f64>);
247type BBoxKey = (
248    OrderedFloat<f64>,
249    OrderedFloat<f64>,
250    OrderedFloat<f64>,
251    OrderedFloat<f64>,
252);
253
254#[derive(Debug, Clone, Default)]
255pub struct Intersection {
256    pub vertical: Vec<Edge>,
257    pub horizontal: Vec<Edge>,
258}
259
260fn get_edges(page: &Page, settings: &TableSettings) -> crate::Result<Vec<Edge>> {
261    if matches!(settings.vertical_strategy, TableStrategy::Explicit) && settings.explicit_vertical_lines.len() < 2 {
262        return Err(crate::Error::Message(
263            "explicit vertical strategy requires at least two explicit vertical lines".to_string(),
264        ));
265    }
266    if matches!(settings.horizontal_strategy, TableStrategy::Explicit) && settings.explicit_horizontal_lines.len() < 2 {
267        return Err(crate::Error::Message(
268            "explicit horizontal strategy requires at least two explicit horizontal lines".to_string(),
269        ));
270    }
271
272    let words = if matches!(settings.vertical_strategy, TableStrategy::Text)
273        || matches!(settings.horizontal_strategy, TableStrategy::Text)
274    {
275        extract_words(&page.chars, &settings.text_options, false)
276    } else {
277        Vec::new()
278    };
279
280    let mut v_explicit = Vec::new();
281    for entry in &settings.explicit_vertical_lines {
282        match entry {
283            ExplicitLine::Position(x) => v_explicit.push(Edge {
284                x0: *x,
285                top: page.bbox.top,
286                x1: *x,
287                bottom: page.bbox.bottom,
288                width: 0.0,
289                height: page.bbox.height(),
290                orientation: Orientation::Vertical,
291                object_type: "explicit".to_string(),
292            }),
293            ExplicitLine::Edge(edge) => {
294                if edge.orientation == Orientation::Vertical {
295                    v_explicit.push(edge.clone());
296                }
297            }
298        }
299    }
300
301    let mut h_explicit = Vec::new();
302    for entry in &settings.explicit_horizontal_lines {
303        match entry {
304            ExplicitLine::Position(y) => h_explicit.push(Edge {
305                x0: page.bbox.x0,
306                top: *y,
307                x1: page.bbox.x1,
308                bottom: *y,
309                width: page.bbox.width(),
310                height: 0.0,
311                orientation: Orientation::Horizontal,
312                object_type: "explicit".to_string(),
313            }),
314            ExplicitLine::Edge(edge) => {
315                if edge.orientation == Orientation::Horizontal {
316                    h_explicit.push(edge.clone());
317                }
318            }
319        }
320    }
321
322    let mut vertical = match settings.vertical_strategy {
323        TableStrategy::Lines => filter_edges(&page.edges(), Some(Orientation::Vertical), None, settings.edge_min_length_prefilter),
324        TableStrategy::LinesStrict => filter_edges(
325            &page.edges(),
326            Some(Orientation::Vertical),
327            Some("line"),
328            settings.edge_min_length_prefilter,
329        ),
330        TableStrategy::Text => words_to_edges_v(&words, settings.min_words_vertical),
331        TableStrategy::Explicit => Vec::new(),
332    };
333    vertical.extend(v_explicit);
334
335    let mut horizontal = match settings.horizontal_strategy {
336        TableStrategy::Lines => filter_edges(&page.edges(), Some(Orientation::Horizontal), None, settings.edge_min_length_prefilter),
337        TableStrategy::LinesStrict => filter_edges(
338            &page.edges(),
339            Some(Orientation::Horizontal),
340            Some("line"),
341            settings.edge_min_length_prefilter,
342        ),
343        TableStrategy::Text => words_to_edges_h(&words, settings.min_words_horizontal),
344        TableStrategy::Explicit => Vec::new(),
345    };
346    horizontal.extend(h_explicit);
347
348    let mut edges = Vec::new();
349    edges.extend(vertical);
350    edges.extend(horizontal);
351
352    let merged = merge_edges(
353        &edges,
354        settings.snap_x_tolerance(),
355        settings.snap_y_tolerance(),
356        settings.join_x_tolerance(),
357        settings.join_y_tolerance(),
358    );
359
360    Ok(filter_edges(&merged, None, None, settings.edge_min_length))
361}
362
363fn filter_edges(
364    edges: &[Edge],
365    orientation: Option<Orientation>,
366    edge_type: Option<&str>,
367    min_length: f64,
368) -> Vec<Edge> {
369    edges
370        .iter()
371        .filter(|edge| {
372            let orientation_ok = orientation.map(|value| value == edge.orientation).unwrap_or(true);
373            let edge_type_ok = edge_type.map(|value| value == edge.object_type).unwrap_or(true);
374            let dim = if edge.orientation == Orientation::Vertical {
375                edge.height
376            } else {
377                edge.width
378            };
379            orientation_ok && edge_type_ok && dim >= min_length
380        })
381        .cloned()
382        .collect()
383}
384
385fn merge_edges(
386    edges: &[Edge],
387    snap_x_tolerance: f64,
388    snap_y_tolerance: f64,
389    join_x_tolerance: f64,
390    join_y_tolerance: f64,
391) -> Vec<Edge> {
392    let snapped = if snap_x_tolerance > 0.0 || snap_y_tolerance > 0.0 {
393        snap_edges(edges, snap_x_tolerance, snap_y_tolerance)
394    } else {
395        edges.to_vec()
396    };
397
398    let mut sorted = snapped;
399    sorted.sort_by(|a, b| edge_group_key(a).cmp(&edge_group_key(b)));
400
401    let mut out = Vec::new();
402    let mut idx = 0usize;
403    while idx < sorted.len() {
404        let key = edge_group_key(&sorted[idx]);
405        let mut end = idx + 1;
406        while end < sorted.len() && edge_group_key(&sorted[end]) == key {
407            end += 1;
408        }
409
410        let tolerance = if key.0 == 'h' {
411            join_x_tolerance
412        } else {
413            join_y_tolerance
414        };
415
416        out.extend(join_edge_group(&sorted[idx..end], key.0, tolerance));
417        idx = end;
418    }
419
420    out
421}
422
423fn edge_group_key(edge: &Edge) -> (char, OrderedFloat<f64>) {
424    match edge.orientation {
425        Orientation::Horizontal => ('h', OrderedFloat(edge.top)),
426        Orientation::Vertical => ('v', OrderedFloat(edge.x0)),
427    }
428}
429
430fn join_edge_group(edges: &[Edge], orientation: char, tolerance: f64) -> Vec<Edge> {
431    if edges.is_empty() {
432        return Vec::new();
433    }
434    let mut sorted = edges.to_vec();
435    if orientation == 'h' {
436        sorted.sort_by(|a, b| a.x0.total_cmp(&b.x0));
437    } else {
438        sorted.sort_by(|a, b| a.top.total_cmp(&b.top));
439    }
440
441    let mut joined = vec![sorted[0].clone()];
442    for edge in sorted.into_iter().skip(1) {
443        let last = joined.last_mut().expect("non-empty");
444        let overlaps = if orientation == 'h' {
445            edge.x0 <= last.x1 + tolerance
446        } else {
447            edge.top <= last.bottom + tolerance
448        };
449
450        if overlaps {
451            if orientation == 'h' && edge.x1 > last.x1 {
452                last.x1 = edge.x1;
453                last.width = last.x1 - last.x0;
454            } else if orientation == 'v' && edge.bottom > last.bottom {
455                last.bottom = edge.bottom;
456                last.height = last.bottom - last.top;
457            }
458        } else {
459            joined.push(edge);
460        }
461    }
462    joined
463}
464
465fn words_to_edges_h(words: &[Word], threshold: usize) -> Vec<Edge> {
466    let clusters = cluster_items(words, |word| word.top, 1.0);
467    let large: Vec<Vec<Word>> = clusters.into_iter().filter(|cluster| cluster.len() >= threshold).collect();
468    if large.is_empty() {
469        return Vec::new();
470    }
471
472    let rects: Vec<BBox> = large
473        .iter()
474        .filter_map(|cluster| objects_to_bbox(cluster))
475        .collect();
476
477    if rects.is_empty() {
478        return Vec::new();
479    }
480
481    let min_x0 = rects.iter().map(|bbox| bbox.x0).fold(f64::INFINITY, f64::min);
482    let max_x1 = rects.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
483
484    let mut edges = Vec::new();
485    for rect in rects {
486        edges.push(Edge {
487            x0: min_x0,
488            top: rect.top,
489            x1: max_x1,
490            bottom: rect.top,
491            width: max_x1 - min_x0,
492            height: 0.0,
493            orientation: Orientation::Horizontal,
494            object_type: "text".to_string(),
495        });
496        edges.push(Edge {
497            x0: min_x0,
498            top: rect.bottom,
499            x1: max_x1,
500            bottom: rect.bottom,
501            width: max_x1 - min_x0,
502            height: 0.0,
503            orientation: Orientation::Horizontal,
504            object_type: "text".to_string(),
505        });
506    }
507    edges
508}
509
510fn words_to_edges_v(words: &[Word], threshold: usize) -> Vec<Edge> {
511    let by_x0 = cluster_items(words, |word| word.x0, 1.0);
512    let by_x1 = cluster_items(words, |word| word.x1, 1.0);
513    let by_center = cluster_items(words, |word| (word.x0 + word.x1) / 2.0, 1.0);
514
515    let mut clusters = Vec::new();
516    clusters.extend(by_x0);
517    clusters.extend(by_x1);
518    clusters.extend(by_center);
519    clusters.sort_by(|a, b| b.len().cmp(&a.len()));
520
521    let large: Vec<Vec<Word>> = clusters.into_iter().filter(|cluster| cluster.len() >= threshold).collect();
522    let mut boxes: Vec<BBox> = large.iter().filter_map(|cluster| objects_to_bbox(cluster)).collect();
523
524    let mut condensed = Vec::new();
525    for bbox in boxes.drain(..) {
526        if !condensed.iter().any(|existing: &BBox| existing.overlap(bbox).is_some()) {
527            condensed.push(bbox);
528        }
529    }
530
531    if condensed.is_empty() {
532        return Vec::new();
533    }
534
535    condensed.sort_by(|a, b| a.x0.total_cmp(&b.x0));
536    let max_x1 = condensed.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
537    let min_top = condensed.iter().map(|bbox| bbox.top).fold(f64::INFINITY, f64::min);
538    let max_bottom = condensed.iter().map(|bbox| bbox.bottom).fold(f64::NEG_INFINITY, f64::max);
539
540    let mut out = Vec::new();
541    for bbox in &condensed {
542        out.push(Edge {
543            x0: bbox.x0,
544            top: min_top,
545            x1: bbox.x0,
546            bottom: max_bottom,
547            width: 0.0,
548            height: max_bottom - min_top,
549            orientation: Orientation::Vertical,
550            object_type: "text".to_string(),
551        });
552    }
553
554    out.push(Edge {
555        x0: max_x1,
556        top: min_top,
557        x1: max_x1,
558        bottom: max_bottom,
559        width: 0.0,
560        height: max_bottom - min_top,
561        orientation: Orientation::Vertical,
562        object_type: "text".to_string(),
563    });
564
565    out
566}
567
568fn edges_to_intersections(edges: &[Edge], x_tolerance: f64, y_tolerance: f64) -> BTreeMap<PointKey, Intersection> {
569    let vertical: Vec<Edge> = edges
570        .iter()
571        .filter(|edge| edge.orientation == Orientation::Vertical)
572        .cloned()
573        .collect();
574    let horizontal: Vec<Edge> = edges
575        .iter()
576        .filter(|edge| edge.orientation == Orientation::Horizontal)
577        .cloned()
578        .collect();
579
580    let mut intersections: BTreeMap<PointKey, Intersection> = BTreeMap::new();
581    for v in &vertical {
582        for h in &horizontal {
583            if v.top <= h.top + y_tolerance
584                && v.bottom >= h.top - y_tolerance
585                && v.x0 >= h.x0 - x_tolerance
586                && v.x0 <= h.x1 + x_tolerance
587            {
588                let key = (OrderedFloat(v.x0), OrderedFloat(h.top));
589                let entry = intersections.entry(key).or_default();
590                entry.vertical.push(v.clone());
591                entry.horizontal.push(h.clone());
592            }
593        }
594    }
595    intersections
596}
597
598fn intersections_to_cells(intersections: &BTreeMap<PointKey, Intersection>) -> Vec<BBox> {
599    let points: Vec<PointKey> = intersections.keys().copied().collect();
600    let mut out = Vec::new();
601
602    for (idx, point) in points.iter().enumerate() {
603        let below: Vec<PointKey> = points.iter().copied().skip(idx + 1).filter(|other| other.0 == point.0).collect();
604        let right: Vec<PointKey> = points.iter().copied().skip(idx + 1).filter(|other| other.1 == point.1).collect();
605
606        for below_pt in &below {
607            if !edge_connects(*point, *below_pt, intersections) {
608                continue;
609            }
610            for right_pt in &right {
611                if !edge_connects(*point, *right_pt, intersections) {
612                    continue;
613                }
614
615                let bottom_right = (right_pt.0, below_pt.1);
616                if intersections.contains_key(&bottom_right)
617                    && edge_connects(bottom_right, *right_pt, intersections)
618                    && edge_connects(bottom_right, *below_pt, intersections)
619                {
620                    out.push(BBox::new(point.0.into_inner(), point.1.into_inner(), right_pt.0.into_inner(), below_pt.1.into_inner()));
621                    break;
622                }
623            }
624        }
625    }
626
627    out
628}
629
630fn edge_connects(p1: PointKey, p2: PointKey, intersections: &BTreeMap<PointKey, Intersection>) -> bool {
631    if p1.0 == p2.0 {
632        let a: BTreeSet<BBoxKey> = intersections[&p1]
633            .vertical
634            .iter()
635            .map(edge_bbox_key)
636            .collect();
637        let b: BTreeSet<BBoxKey> = intersections[&p2]
638            .vertical
639            .iter()
640            .map(edge_bbox_key)
641            .collect();
642        return !a.is_disjoint(&b);
643    }
644
645    if p1.1 == p2.1 {
646        let a: BTreeSet<BBoxKey> = intersections[&p1]
647            .horizontal
648            .iter()
649            .map(edge_bbox_key)
650            .collect();
651        let b: BTreeSet<BBoxKey> = intersections[&p2]
652            .horizontal
653            .iter()
654            .map(edge_bbox_key)
655            .collect();
656        return !a.is_disjoint(&b);
657    }
658
659    false
660}
661
662fn cells_to_tables(cells: &[BBox]) -> Vec<Vec<BBox>> {
663    let mut remaining = cells.to_vec();
664    let mut current_corners: BTreeSet<PointKey> = BTreeSet::new();
665    let mut current_cells: Vec<BBox> = Vec::new();
666    let mut tables = Vec::new();
667
668    while !remaining.is_empty() {
669        let initial = current_cells.len();
670
671        let snapshot = remaining.clone();
672        for cell in snapshot {
673            let corners = bbox_corners(cell);
674            if current_cells.is_empty() {
675                current_corners.extend(corners);
676                current_cells.push(cell);
677                remove_bbox(&mut remaining, cell);
678            } else {
679                let corner_count = corners.iter().filter(|corner| current_corners.contains(corner)).count();
680                if corner_count > 0 {
681                    current_corners.extend(corners);
682                    current_cells.push(cell);
683                    remove_bbox(&mut remaining, cell);
684                }
685            }
686        }
687
688        if current_cells.len() == initial {
689            tables.push(current_cells.clone());
690            current_corners.clear();
691            current_cells.clear();
692        }
693    }
694
695    if !current_cells.is_empty() {
696        tables.push(current_cells);
697    }
698
699    tables.retain(|table| table.len() > 1);
700    tables.sort_by(|a, b| {
701        let aa = top_left_of_table(a);
702        let bb = top_left_of_table(b);
703        aa.0.total_cmp(&bb.0).then_with(|| aa.1.total_cmp(&bb.1))
704    });
705    tables
706}
707
708fn char_in_bbox(ch: &Char, bbox: BBox) -> bool {
709    let v_mid = (ch.top + ch.bottom) / 2.0;
710    let h_mid = (ch.x0 + ch.x1) / 2.0;
711    h_mid >= bbox.x0 && h_mid < bbox.x1 && v_mid >= bbox.top && v_mid < bbox.bottom
712}
713
714fn merge_bboxes(boxes: &[BBox]) -> BBox {
715    let x0 = boxes.iter().map(|bbox| bbox.x0).fold(f64::INFINITY, f64::min);
716    let top = boxes.iter().map(|bbox| bbox.top).fold(f64::INFINITY, f64::min);
717    let x1 = boxes.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
718    let bottom = boxes.iter().map(|bbox| bbox.bottom).fold(f64::NEG_INFINITY, f64::max);
719    BBox::new(x0, top, x1, bottom)
720}
721
722fn merged_optional_bbox(boxes: &[Option<BBox>]) -> BBox {
723    let selected: Vec<BBox> = boxes.iter().filter_map(|bbox| *bbox).collect();
724    merge_bboxes(&selected)
725}
726
727fn bbox_coord(bbox: BBox, axis: usize) -> f64 {
728    match axis {
729        0 => bbox.x0,
730        1 => bbox.top,
731        _ => unreachable!(),
732    }
733}
734
735fn bbox_corners(bbox: BBox) -> [PointKey; 4] {
736    [
737        (OrderedFloat(bbox.x0), OrderedFloat(bbox.top)),
738        (OrderedFloat(bbox.x0), OrderedFloat(bbox.bottom)),
739        (OrderedFloat(bbox.x1), OrderedFloat(bbox.top)),
740        (OrderedFloat(bbox.x1), OrderedFloat(bbox.bottom)),
741    ]
742}
743
744fn top_left_of_table(cells: &[BBox]) -> (f64, f64) {
745    let bbox = merge_bboxes(cells);
746    (bbox.top, bbox.x0)
747}
748
749fn edge_bbox_key(edge: &Edge) -> BBoxKey {
750    (
751        OrderedFloat(edge.x0),
752        OrderedFloat(edge.top),
753        OrderedFloat(edge.x1),
754        OrderedFloat(edge.bottom),
755    )
756}
757
758fn remove_bbox(items: &mut Vec<BBox>, target: BBox) {
759    if let Some(idx) = items.iter().position(|bbox| *bbox == target) {
760        items.remove(idx);
761    }
762}
763
764fn line_to_edge(line: &Line) -> Edge {
765    let orientation = if (line.top - line.bottom).abs() < 0.0001 {
766        Orientation::Horizontal
767    } else {
768        Orientation::Vertical
769    };
770    Edge {
771        x0: line.x0,
772        top: line.top,
773        x1: line.x1,
774        bottom: line.bottom,
775        width: line.width,
776        height: line.height,
777        orientation,
778        object_type: "line".to_string(),
779    }
780}
781
782fn curve_to_edges(curve: &crate::types::Curve) -> Vec<Edge> {
783    let mut edges = Vec::new();
784    for pair in curve.pts.windows(2) {
785        let p0 = pair[0];
786        let p1 = pair[1];
787        let orientation = if (p0.x - p1.x).abs() < 0.0001 {
788            Some(Orientation::Vertical)
789        } else if (p0.y - p1.y).abs() < 0.0001 {
790            Some(Orientation::Horizontal)
791        } else {
792            None
793        };
794
795        if let Some(orientation) = orientation {
796            edges.push(Edge {
797                x0: p0.x.min(p1.x),
798                top: p0.y.min(p1.y),
799                x1: p0.x.max(p1.x),
800                bottom: p0.y.max(p1.y),
801                width: (p1.x - p0.x).abs(),
802                height: (p1.y - p0.y).abs(),
803                orientation,
804                object_type: "curve_edge".to_string(),
805            });
806        }
807    }
808    edges
809}
810
811impl Page {
812    pub fn edges(&self) -> Vec<Edge> {
813        let mut edges: Vec<Edge> = self.lines.iter().map(line_to_edge).collect();
814        for rect in &self.rects {
815            edges.extend(rect_to_edges(rect));
816        }
817        for curve in &self.curves {
818            edges.extend(curve_to_edges(curve));
819        }
820        edges
821    }
822}