1use crate::clustering::cluster_items;
2use crate::geometry::{objects_to_bbox, rect_to_edges, snap_edges};
3use crate::text::{extract_text, extract_words, TextOptions};
4use crate::types::{
5 BBox, Char, Edge, Line, Orientation, Page, Word,
6};
7use ordered_float::OrderedFloat;
8use std::collections::{BTreeMap, BTreeSet};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum TableStrategy {
12 Lines,
13 LinesStrict,
14 Text,
15 Explicit,
16}
17
18impl Default for TableStrategy {
19 fn default() -> Self {
20 Self::Lines
21 }
22}
23
24impl std::str::FromStr for TableStrategy {
25 type Err = crate::Error;
26
27 fn from_str(s: &str) -> crate::Result<Self> {
28 match s {
29 "lines" => Ok(Self::Lines),
30 "lines_strict" => Ok(Self::LinesStrict),
31 "text" => Ok(Self::Text),
32 "explicit" => Ok(Self::Explicit),
33 other => Err(crate::Error::Message(format!("unknown table strategy: {other}"))),
34 }
35 }
36}
37
38#[derive(Debug, Clone)]
39pub enum ExplicitLine {
40 Position(f64),
41 Edge(Edge),
42}
43
44#[derive(Debug, Clone)]
45pub struct TableSettings {
46 pub vertical_strategy: TableStrategy,
47 pub horizontal_strategy: TableStrategy,
48 pub explicit_vertical_lines: Vec<ExplicitLine>,
49 pub explicit_horizontal_lines: Vec<ExplicitLine>,
50 pub snap_tolerance: f64,
51 pub snap_x_tolerance: Option<f64>,
52 pub snap_y_tolerance: Option<f64>,
53 pub join_tolerance: f64,
54 pub join_x_tolerance: Option<f64>,
55 pub join_y_tolerance: Option<f64>,
56 pub edge_min_length: f64,
57 pub edge_min_length_prefilter: f64,
58 pub min_words_vertical: usize,
59 pub min_words_horizontal: usize,
60 pub intersection_tolerance: f64,
61 pub intersection_x_tolerance: Option<f64>,
62 pub intersection_y_tolerance: Option<f64>,
63 pub text_options: TextOptions,
64}
65
66impl Default for TableSettings {
67 fn default() -> Self {
68 Self {
69 vertical_strategy: TableStrategy::Lines,
70 horizontal_strategy: TableStrategy::Lines,
71 explicit_vertical_lines: Vec::new(),
72 explicit_horizontal_lines: Vec::new(),
73 snap_tolerance: 3.0,
74 snap_x_tolerance: None,
75 snap_y_tolerance: None,
76 join_tolerance: 3.0,
77 join_x_tolerance: None,
78 join_y_tolerance: None,
79 edge_min_length: 3.0,
80 edge_min_length_prefilter: 1.0,
81 min_words_vertical: 3,
82 min_words_horizontal: 1,
83 intersection_tolerance: 3.0,
84 intersection_x_tolerance: None,
85 intersection_y_tolerance: None,
86 text_options: TextOptions::default(),
87 }
88 }
89}
90
91impl TableSettings {
92 pub fn snap_x_tolerance(&self) -> f64 {
93 self.snap_x_tolerance.unwrap_or(self.snap_tolerance)
94 }
95
96 pub fn snap_y_tolerance(&self) -> f64 {
97 self.snap_y_tolerance.unwrap_or(self.snap_tolerance)
98 }
99
100 pub fn join_x_tolerance(&self) -> f64 {
101 self.join_x_tolerance.unwrap_or(self.join_tolerance)
102 }
103
104 pub fn join_y_tolerance(&self) -> f64 {
105 self.join_y_tolerance.unwrap_or(self.join_tolerance)
106 }
107
108 pub fn intersection_x_tolerance(&self) -> f64 {
109 self.intersection_x_tolerance.unwrap_or(self.intersection_tolerance)
110 }
111
112 pub fn intersection_y_tolerance(&self) -> f64 {
113 self.intersection_y_tolerance.unwrap_or(self.intersection_tolerance)
114 }
115}
116
117#[derive(Debug, Clone)]
118pub struct CellGroup {
119 pub cells: Vec<Option<BBox>>,
120 pub bbox: BBox,
121}
122
123#[derive(Debug, Clone)]
124pub struct Table {
125 pub cells: Vec<BBox>,
126 pub bbox: BBox,
127}
128
129impl Table {
130 pub fn rows(&self) -> Vec<CellGroup> {
131 self.get_rows_or_cols(true)
132 }
133
134 pub fn columns(&self) -> Vec<CellGroup> {
135 self.get_rows_or_cols(false)
136 }
137
138 pub fn extract(&self, page: &Page, options: &TextOptions) -> Vec<Vec<Option<String>>> {
139 let mut table_arr = Vec::new();
140 for row in self.rows() {
141 let mut row_arr = Vec::new();
142 let row_chars: Vec<Char> = page
143 .chars
144 .iter()
145 .filter(|ch| char_in_bbox(ch, row.bbox))
146 .cloned()
147 .collect();
148
149 for cell in row.cells {
150 if let Some(cell_bbox) = cell {
151 let cell_chars: Vec<Char> = row_chars
152 .iter()
153 .filter(|ch| char_in_bbox(ch, cell_bbox))
154 .cloned()
155 .collect();
156 if cell_chars.is_empty() {
157 row_arr.push(Some(String::new()));
158 } else {
159 let mut text_options = options.clone();
160 if text_options.layout {
161 text_options.layout_width = Some(cell_bbox.width());
162 text_options.layout_height = Some(cell_bbox.height());
163 text_options.layout_bbox = Some(cell_bbox);
164 }
165 row_arr.push(Some(extract_text(&cell_chars, &text_options)));
166 }
167 } else {
168 row_arr.push(None);
169 }
170 }
171 table_arr.push(row_arr);
172 }
173 table_arr
174 }
175
176 fn get_rows_or_cols(&self, rows: bool) -> Vec<CellGroup> {
177 let (axis, antiaxis) = if rows { (0usize, 1usize) } else { (1usize, 0usize) };
178
179 let mut cells = self.cells.clone();
180 cells.sort_by(|a, b| {
181 let ka = bbox_coord(*a, antiaxis)
182 .total_cmp(&bbox_coord(*b, antiaxis))
183 .then_with(|| bbox_coord(*a, axis).total_cmp(&bbox_coord(*b, axis)));
184 ka
185 });
186
187 let mut xs: Vec<f64> = cells.iter().map(|bbox| bbox_coord(*bbox, axis)).collect();
188 xs.sort_by(|a, b| a.total_cmp(b));
189 xs.dedup_by(|a, b| (*a - *b).abs() < 0.0001);
190
191 let groups = cluster_items(&cells, |bbox| bbox_coord(*bbox, antiaxis), 0.0);
192 let mut out = Vec::new();
193 for group in groups {
194 let mut row_cells = Vec::new();
195 for x in &xs {
196 let cell = group
197 .iter()
198 .find(|bbox| (bbox_coord(**bbox, axis) - *x).abs() < 0.0001)
199 .copied();
200 row_cells.push(cell);
201 }
202
203 let bbox = merged_optional_bbox(&row_cells);
204 out.push(CellGroup { cells: row_cells, bbox });
205 }
206 out
207 }
208}
209
210#[derive(Debug, Clone)]
211pub struct TableFinder {
212 pub settings: TableSettings,
213 pub edges: Vec<Edge>,
214 pub intersections: BTreeMap<PointKey, Intersection>,
215 pub cells: Vec<BBox>,
216 pub tables: Vec<Table>,
217}
218
219impl TableFinder {
220 pub fn new(page: &Page, settings: TableSettings) -> crate::Result<Self> {
221 let edges = get_edges(page, &settings)?;
222 let intersections = edges_to_intersections(
223 &edges,
224 settings.intersection_x_tolerance(),
225 settings.intersection_y_tolerance(),
226 );
227 let cells = intersections_to_cells(&intersections);
228 let tables = cells_to_tables(&cells)
229 .into_iter()
230 .map(|group| {
231 let bbox = merge_bboxes(&group);
232 Table { cells: group, bbox }
233 })
234 .collect();
235
236 Ok(Self {
237 settings,
238 edges,
239 intersections,
240 cells,
241 tables,
242 })
243 }
244}
245
246type PointKey = (OrderedFloat<f64>, OrderedFloat<f64>);
247type BBoxKey = (
248 OrderedFloat<f64>,
249 OrderedFloat<f64>,
250 OrderedFloat<f64>,
251 OrderedFloat<f64>,
252);
253
254#[derive(Debug, Clone, Default)]
255pub struct Intersection {
256 pub vertical: Vec<Edge>,
257 pub horizontal: Vec<Edge>,
258}
259
260fn get_edges(page: &Page, settings: &TableSettings) -> crate::Result<Vec<Edge>> {
261 if matches!(settings.vertical_strategy, TableStrategy::Explicit) && settings.explicit_vertical_lines.len() < 2 {
262 return Err(crate::Error::Message(
263 "explicit vertical strategy requires at least two explicit vertical lines".to_string(),
264 ));
265 }
266 if matches!(settings.horizontal_strategy, TableStrategy::Explicit) && settings.explicit_horizontal_lines.len() < 2 {
267 return Err(crate::Error::Message(
268 "explicit horizontal strategy requires at least two explicit horizontal lines".to_string(),
269 ));
270 }
271
272 let words = if matches!(settings.vertical_strategy, TableStrategy::Text)
273 || matches!(settings.horizontal_strategy, TableStrategy::Text)
274 {
275 extract_words(&page.chars, &settings.text_options, false)
276 } else {
277 Vec::new()
278 };
279
280 let mut v_explicit = Vec::new();
281 for entry in &settings.explicit_vertical_lines {
282 match entry {
283 ExplicitLine::Position(x) => v_explicit.push(Edge {
284 x0: *x,
285 top: page.bbox.top,
286 x1: *x,
287 bottom: page.bbox.bottom,
288 width: 0.0,
289 height: page.bbox.height(),
290 orientation: Orientation::Vertical,
291 object_type: "explicit".to_string(),
292 }),
293 ExplicitLine::Edge(edge) => {
294 if edge.orientation == Orientation::Vertical {
295 v_explicit.push(edge.clone());
296 }
297 }
298 }
299 }
300
301 let mut h_explicit = Vec::new();
302 for entry in &settings.explicit_horizontal_lines {
303 match entry {
304 ExplicitLine::Position(y) => h_explicit.push(Edge {
305 x0: page.bbox.x0,
306 top: *y,
307 x1: page.bbox.x1,
308 bottom: *y,
309 width: page.bbox.width(),
310 height: 0.0,
311 orientation: Orientation::Horizontal,
312 object_type: "explicit".to_string(),
313 }),
314 ExplicitLine::Edge(edge) => {
315 if edge.orientation == Orientation::Horizontal {
316 h_explicit.push(edge.clone());
317 }
318 }
319 }
320 }
321
322 let mut vertical = match settings.vertical_strategy {
323 TableStrategy::Lines => filter_edges(&page.edges(), Some(Orientation::Vertical), None, settings.edge_min_length_prefilter),
324 TableStrategy::LinesStrict => filter_edges(
325 &page.edges(),
326 Some(Orientation::Vertical),
327 Some("line"),
328 settings.edge_min_length_prefilter,
329 ),
330 TableStrategy::Text => words_to_edges_v(&words, settings.min_words_vertical),
331 TableStrategy::Explicit => Vec::new(),
332 };
333 vertical.extend(v_explicit);
334
335 let mut horizontal = match settings.horizontal_strategy {
336 TableStrategy::Lines => filter_edges(&page.edges(), Some(Orientation::Horizontal), None, settings.edge_min_length_prefilter),
337 TableStrategy::LinesStrict => filter_edges(
338 &page.edges(),
339 Some(Orientation::Horizontal),
340 Some("line"),
341 settings.edge_min_length_prefilter,
342 ),
343 TableStrategy::Text => words_to_edges_h(&words, settings.min_words_horizontal),
344 TableStrategy::Explicit => Vec::new(),
345 };
346 horizontal.extend(h_explicit);
347
348 let mut edges = Vec::new();
349 edges.extend(vertical);
350 edges.extend(horizontal);
351
352 let merged = merge_edges(
353 &edges,
354 settings.snap_x_tolerance(),
355 settings.snap_y_tolerance(),
356 settings.join_x_tolerance(),
357 settings.join_y_tolerance(),
358 );
359
360 Ok(filter_edges(&merged, None, None, settings.edge_min_length))
361}
362
363fn filter_edges(
364 edges: &[Edge],
365 orientation: Option<Orientation>,
366 edge_type: Option<&str>,
367 min_length: f64,
368) -> Vec<Edge> {
369 edges
370 .iter()
371 .filter(|edge| {
372 let orientation_ok = orientation.map(|value| value == edge.orientation).unwrap_or(true);
373 let edge_type_ok = edge_type.map(|value| value == edge.object_type).unwrap_or(true);
374 let dim = if edge.orientation == Orientation::Vertical {
375 edge.height
376 } else {
377 edge.width
378 };
379 orientation_ok && edge_type_ok && dim >= min_length
380 })
381 .cloned()
382 .collect()
383}
384
385fn merge_edges(
386 edges: &[Edge],
387 snap_x_tolerance: f64,
388 snap_y_tolerance: f64,
389 join_x_tolerance: f64,
390 join_y_tolerance: f64,
391) -> Vec<Edge> {
392 let snapped = if snap_x_tolerance > 0.0 || snap_y_tolerance > 0.0 {
393 snap_edges(edges, snap_x_tolerance, snap_y_tolerance)
394 } else {
395 edges.to_vec()
396 };
397
398 let mut sorted = snapped;
399 sorted.sort_by(|a, b| edge_group_key(a).cmp(&edge_group_key(b)));
400
401 let mut out = Vec::new();
402 let mut idx = 0usize;
403 while idx < sorted.len() {
404 let key = edge_group_key(&sorted[idx]);
405 let mut end = idx + 1;
406 while end < sorted.len() && edge_group_key(&sorted[end]) == key {
407 end += 1;
408 }
409
410 let tolerance = if key.0 == 'h' {
411 join_x_tolerance
412 } else {
413 join_y_tolerance
414 };
415
416 out.extend(join_edge_group(&sorted[idx..end], key.0, tolerance));
417 idx = end;
418 }
419
420 out
421}
422
423fn edge_group_key(edge: &Edge) -> (char, OrderedFloat<f64>) {
424 match edge.orientation {
425 Orientation::Horizontal => ('h', OrderedFloat(edge.top)),
426 Orientation::Vertical => ('v', OrderedFloat(edge.x0)),
427 }
428}
429
430fn join_edge_group(edges: &[Edge], orientation: char, tolerance: f64) -> Vec<Edge> {
431 if edges.is_empty() {
432 return Vec::new();
433 }
434 let mut sorted = edges.to_vec();
435 if orientation == 'h' {
436 sorted.sort_by(|a, b| a.x0.total_cmp(&b.x0));
437 } else {
438 sorted.sort_by(|a, b| a.top.total_cmp(&b.top));
439 }
440
441 let mut joined = vec![sorted[0].clone()];
442 for edge in sorted.into_iter().skip(1) {
443 let last = joined.last_mut().expect("non-empty");
444 let overlaps = if orientation == 'h' {
445 edge.x0 <= last.x1 + tolerance
446 } else {
447 edge.top <= last.bottom + tolerance
448 };
449
450 if overlaps {
451 if orientation == 'h' && edge.x1 > last.x1 {
452 last.x1 = edge.x1;
453 last.width = last.x1 - last.x0;
454 } else if orientation == 'v' && edge.bottom > last.bottom {
455 last.bottom = edge.bottom;
456 last.height = last.bottom - last.top;
457 }
458 } else {
459 joined.push(edge);
460 }
461 }
462 joined
463}
464
465fn words_to_edges_h(words: &[Word], threshold: usize) -> Vec<Edge> {
466 let clusters = cluster_items(words, |word| word.top, 1.0);
467 let large: Vec<Vec<Word>> = clusters.into_iter().filter(|cluster| cluster.len() >= threshold).collect();
468 if large.is_empty() {
469 return Vec::new();
470 }
471
472 let rects: Vec<BBox> = large
473 .iter()
474 .filter_map(|cluster| objects_to_bbox(cluster))
475 .collect();
476
477 if rects.is_empty() {
478 return Vec::new();
479 }
480
481 let min_x0 = rects.iter().map(|bbox| bbox.x0).fold(f64::INFINITY, f64::min);
482 let max_x1 = rects.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
483
484 let mut edges = Vec::new();
485 for rect in rects {
486 edges.push(Edge {
487 x0: min_x0,
488 top: rect.top,
489 x1: max_x1,
490 bottom: rect.top,
491 width: max_x1 - min_x0,
492 height: 0.0,
493 orientation: Orientation::Horizontal,
494 object_type: "text".to_string(),
495 });
496 edges.push(Edge {
497 x0: min_x0,
498 top: rect.bottom,
499 x1: max_x1,
500 bottom: rect.bottom,
501 width: max_x1 - min_x0,
502 height: 0.0,
503 orientation: Orientation::Horizontal,
504 object_type: "text".to_string(),
505 });
506 }
507 edges
508}
509
510fn words_to_edges_v(words: &[Word], threshold: usize) -> Vec<Edge> {
511 let by_x0 = cluster_items(words, |word| word.x0, 1.0);
512 let by_x1 = cluster_items(words, |word| word.x1, 1.0);
513 let by_center = cluster_items(words, |word| (word.x0 + word.x1) / 2.0, 1.0);
514
515 let mut clusters = Vec::new();
516 clusters.extend(by_x0);
517 clusters.extend(by_x1);
518 clusters.extend(by_center);
519 clusters.sort_by(|a, b| b.len().cmp(&a.len()));
520
521 let large: Vec<Vec<Word>> = clusters.into_iter().filter(|cluster| cluster.len() >= threshold).collect();
522 let mut boxes: Vec<BBox> = large.iter().filter_map(|cluster| objects_to_bbox(cluster)).collect();
523
524 let mut condensed = Vec::new();
525 for bbox in boxes.drain(..) {
526 if !condensed.iter().any(|existing: &BBox| existing.overlap(bbox).is_some()) {
527 condensed.push(bbox);
528 }
529 }
530
531 if condensed.is_empty() {
532 return Vec::new();
533 }
534
535 condensed.sort_by(|a, b| a.x0.total_cmp(&b.x0));
536 let max_x1 = condensed.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
537 let min_top = condensed.iter().map(|bbox| bbox.top).fold(f64::INFINITY, f64::min);
538 let max_bottom = condensed.iter().map(|bbox| bbox.bottom).fold(f64::NEG_INFINITY, f64::max);
539
540 let mut out = Vec::new();
541 for bbox in &condensed {
542 out.push(Edge {
543 x0: bbox.x0,
544 top: min_top,
545 x1: bbox.x0,
546 bottom: max_bottom,
547 width: 0.0,
548 height: max_bottom - min_top,
549 orientation: Orientation::Vertical,
550 object_type: "text".to_string(),
551 });
552 }
553
554 out.push(Edge {
555 x0: max_x1,
556 top: min_top,
557 x1: max_x1,
558 bottom: max_bottom,
559 width: 0.0,
560 height: max_bottom - min_top,
561 orientation: Orientation::Vertical,
562 object_type: "text".to_string(),
563 });
564
565 out
566}
567
568fn edges_to_intersections(edges: &[Edge], x_tolerance: f64, y_tolerance: f64) -> BTreeMap<PointKey, Intersection> {
569 let vertical: Vec<Edge> = edges
570 .iter()
571 .filter(|edge| edge.orientation == Orientation::Vertical)
572 .cloned()
573 .collect();
574 let horizontal: Vec<Edge> = edges
575 .iter()
576 .filter(|edge| edge.orientation == Orientation::Horizontal)
577 .cloned()
578 .collect();
579
580 let mut intersections: BTreeMap<PointKey, Intersection> = BTreeMap::new();
581 for v in &vertical {
582 for h in &horizontal {
583 if v.top <= h.top + y_tolerance
584 && v.bottom >= h.top - y_tolerance
585 && v.x0 >= h.x0 - x_tolerance
586 && v.x0 <= h.x1 + x_tolerance
587 {
588 let key = (OrderedFloat(v.x0), OrderedFloat(h.top));
589 let entry = intersections.entry(key).or_default();
590 entry.vertical.push(v.clone());
591 entry.horizontal.push(h.clone());
592 }
593 }
594 }
595 intersections
596}
597
598fn intersections_to_cells(intersections: &BTreeMap<PointKey, Intersection>) -> Vec<BBox> {
599 let points: Vec<PointKey> = intersections.keys().copied().collect();
600 let mut out = Vec::new();
601
602 for (idx, point) in points.iter().enumerate() {
603 let below: Vec<PointKey> = points.iter().copied().skip(idx + 1).filter(|other| other.0 == point.0).collect();
604 let right: Vec<PointKey> = points.iter().copied().skip(idx + 1).filter(|other| other.1 == point.1).collect();
605
606 for below_pt in &below {
607 if !edge_connects(*point, *below_pt, intersections) {
608 continue;
609 }
610 for right_pt in &right {
611 if !edge_connects(*point, *right_pt, intersections) {
612 continue;
613 }
614
615 let bottom_right = (right_pt.0, below_pt.1);
616 if intersections.contains_key(&bottom_right)
617 && edge_connects(bottom_right, *right_pt, intersections)
618 && edge_connects(bottom_right, *below_pt, intersections)
619 {
620 out.push(BBox::new(point.0.into_inner(), point.1.into_inner(), right_pt.0.into_inner(), below_pt.1.into_inner()));
621 break;
622 }
623 }
624 }
625 }
626
627 out
628}
629
630fn edge_connects(p1: PointKey, p2: PointKey, intersections: &BTreeMap<PointKey, Intersection>) -> bool {
631 if p1.0 == p2.0 {
632 let a: BTreeSet<BBoxKey> = intersections[&p1]
633 .vertical
634 .iter()
635 .map(edge_bbox_key)
636 .collect();
637 let b: BTreeSet<BBoxKey> = intersections[&p2]
638 .vertical
639 .iter()
640 .map(edge_bbox_key)
641 .collect();
642 return !a.is_disjoint(&b);
643 }
644
645 if p1.1 == p2.1 {
646 let a: BTreeSet<BBoxKey> = intersections[&p1]
647 .horizontal
648 .iter()
649 .map(edge_bbox_key)
650 .collect();
651 let b: BTreeSet<BBoxKey> = intersections[&p2]
652 .horizontal
653 .iter()
654 .map(edge_bbox_key)
655 .collect();
656 return !a.is_disjoint(&b);
657 }
658
659 false
660}
661
662fn cells_to_tables(cells: &[BBox]) -> Vec<Vec<BBox>> {
663 let mut remaining = cells.to_vec();
664 let mut current_corners: BTreeSet<PointKey> = BTreeSet::new();
665 let mut current_cells: Vec<BBox> = Vec::new();
666 let mut tables = Vec::new();
667
668 while !remaining.is_empty() {
669 let initial = current_cells.len();
670
671 let snapshot = remaining.clone();
672 for cell in snapshot {
673 let corners = bbox_corners(cell);
674 if current_cells.is_empty() {
675 current_corners.extend(corners);
676 current_cells.push(cell);
677 remove_bbox(&mut remaining, cell);
678 } else {
679 let corner_count = corners.iter().filter(|corner| current_corners.contains(corner)).count();
680 if corner_count > 0 {
681 current_corners.extend(corners);
682 current_cells.push(cell);
683 remove_bbox(&mut remaining, cell);
684 }
685 }
686 }
687
688 if current_cells.len() == initial {
689 tables.push(current_cells.clone());
690 current_corners.clear();
691 current_cells.clear();
692 }
693 }
694
695 if !current_cells.is_empty() {
696 tables.push(current_cells);
697 }
698
699 tables.retain(|table| table.len() > 1);
700 tables.sort_by(|a, b| {
701 let aa = top_left_of_table(a);
702 let bb = top_left_of_table(b);
703 aa.0.total_cmp(&bb.0).then_with(|| aa.1.total_cmp(&bb.1))
704 });
705 tables
706}
707
708fn char_in_bbox(ch: &Char, bbox: BBox) -> bool {
709 let v_mid = (ch.top + ch.bottom) / 2.0;
710 let h_mid = (ch.x0 + ch.x1) / 2.0;
711 h_mid >= bbox.x0 && h_mid < bbox.x1 && v_mid >= bbox.top && v_mid < bbox.bottom
712}
713
714fn merge_bboxes(boxes: &[BBox]) -> BBox {
715 let x0 = boxes.iter().map(|bbox| bbox.x0).fold(f64::INFINITY, f64::min);
716 let top = boxes.iter().map(|bbox| bbox.top).fold(f64::INFINITY, f64::min);
717 let x1 = boxes.iter().map(|bbox| bbox.x1).fold(f64::NEG_INFINITY, f64::max);
718 let bottom = boxes.iter().map(|bbox| bbox.bottom).fold(f64::NEG_INFINITY, f64::max);
719 BBox::new(x0, top, x1, bottom)
720}
721
722fn merged_optional_bbox(boxes: &[Option<BBox>]) -> BBox {
723 let selected: Vec<BBox> = boxes.iter().filter_map(|bbox| *bbox).collect();
724 merge_bboxes(&selected)
725}
726
727fn bbox_coord(bbox: BBox, axis: usize) -> f64 {
728 match axis {
729 0 => bbox.x0,
730 1 => bbox.top,
731 _ => unreachable!(),
732 }
733}
734
735fn bbox_corners(bbox: BBox) -> [PointKey; 4] {
736 [
737 (OrderedFloat(bbox.x0), OrderedFloat(bbox.top)),
738 (OrderedFloat(bbox.x0), OrderedFloat(bbox.bottom)),
739 (OrderedFloat(bbox.x1), OrderedFloat(bbox.top)),
740 (OrderedFloat(bbox.x1), OrderedFloat(bbox.bottom)),
741 ]
742}
743
744fn top_left_of_table(cells: &[BBox]) -> (f64, f64) {
745 let bbox = merge_bboxes(cells);
746 (bbox.top, bbox.x0)
747}
748
749fn edge_bbox_key(edge: &Edge) -> BBoxKey {
750 (
751 OrderedFloat(edge.x0),
752 OrderedFloat(edge.top),
753 OrderedFloat(edge.x1),
754 OrderedFloat(edge.bottom),
755 )
756}
757
758fn remove_bbox(items: &mut Vec<BBox>, target: BBox) {
759 if let Some(idx) = items.iter().position(|bbox| *bbox == target) {
760 items.remove(idx);
761 }
762}
763
764fn line_to_edge(line: &Line) -> Edge {
765 let orientation = if (line.top - line.bottom).abs() < 0.0001 {
766 Orientation::Horizontal
767 } else {
768 Orientation::Vertical
769 };
770 Edge {
771 x0: line.x0,
772 top: line.top,
773 x1: line.x1,
774 bottom: line.bottom,
775 width: line.width,
776 height: line.height,
777 orientation,
778 object_type: "line".to_string(),
779 }
780}
781
782fn curve_to_edges(curve: &crate::types::Curve) -> Vec<Edge> {
783 let mut edges = Vec::new();
784 for pair in curve.pts.windows(2) {
785 let p0 = pair[0];
786 let p1 = pair[1];
787 let orientation = if (p0.x - p1.x).abs() < 0.0001 {
788 Some(Orientation::Vertical)
789 } else if (p0.y - p1.y).abs() < 0.0001 {
790 Some(Orientation::Horizontal)
791 } else {
792 None
793 };
794
795 if let Some(orientation) = orientation {
796 edges.push(Edge {
797 x0: p0.x.min(p1.x),
798 top: p0.y.min(p1.y),
799 x1: p0.x.max(p1.x),
800 bottom: p0.y.max(p1.y),
801 width: (p1.x - p0.x).abs(),
802 height: (p1.y - p0.y).abs(),
803 orientation,
804 object_type: "curve_edge".to_string(),
805 });
806 }
807 }
808 edges
809}
810
811impl Page {
812 pub fn edges(&self) -> Vec<Edge> {
813 let mut edges: Vec<Edge> = self.lines.iter().map(line_to_edge).collect();
814 for rect in &self.rects {
815 edges.extend(rect_to_edges(rect));
816 }
817 for curve in &self.curves {
818 edges.extend(curve_to_edges(curve));
819 }
820 edges
821 }
822}