dump_regions/
dump_regions.rs1use fleischwolf_pdf::layout::LayoutModel;
3use fleischwolf_pdf::PdfDocument;
4
5fn main() {
6 let path = std::env::args().nth(1).expect("pdf");
7 let bytes = std::fs::read(&path).expect("read");
8 let doc = PdfDocument::open(&bytes, None).expect("open");
9 let mut layout = LayoutModel::load().expect("layout");
10 for (pi, page) in doc.pages.iter().enumerate() {
11 let regions = layout
12 .predict(&page.image, page.width, page.height)
13 .expect("layout");
14 for r in ®ions {
15 let txt: String = page
17 .cells
18 .iter()
19 .filter(|c| {
20 let (cx, cy) = ((c.l + c.r) / 2.0, (c.t + c.b) / 2.0);
21 cx >= r.l && cx <= r.r && cy >= r.t && cy <= r.b
22 })
23 .map(|c| c.text.trim())
24 .collect::<Vec<_>>()
25 .join(" ");
26 let tail: String = txt
27 .chars()
28 .rev()
29 .take(40)
30 .collect::<Vec<_>>()
31 .into_iter()
32 .rev()
33 .collect();
34 println!(
35 "p{} {:>14} t={:6.1} b={:6.1} | …{}",
36 pi + 1,
37 r.label,
38 r.t,
39 r.b,
40 tail
41 );
42 }
43 if std::env::var("DUMP_CELLS").is_ok() {
45 for (ci, c) in page.cells.iter().enumerate() {
46 let snip: String = c.text.chars().take(300).collect();
47 println!(
48 " CELL[{ci}] t={:6.1} l={:6.1} r={:6.1} | {}",
49 c.t, c.l, c.r, snip
50 );
51 }
52 }
53 }
54}