1use fleischwolf_pdf::layout::LayoutModel;
5use fleischwolf_pdf::tableformer::TableFormer;
6use fleischwolf_pdf::PdfDocument;
7use image::imageops;
8
9fn name(t: i64) -> &'static str {
10 match t {
11 4 => "ecel",
12 5 => "fcel",
13 6 => "lcel",
14 7 => "ucel",
15 8 => "xcel",
16 9 => "nl",
17 10 => "ched",
18 11 => "rhed",
19 12 => "srow",
20 _ => "?",
21 }
22}
23
24fn main() {
25 let path = std::env::args().nth(1).expect("usage: tf_otsl <pdf>");
26 let bytes = std::fs::read(&path).expect("read");
27 let doc = PdfDocument::open(&bytes, None).expect("open");
28 let mut layout = LayoutModel::load().expect("layout");
29 let mut tf = TableFormer::load().expect("tableformer models missing");
30 for (pi, page) in doc.pages.iter().enumerate() {
31 let regions = layout
32 .predict(&page.image, page.width, page.height)
33 .expect("layout");
34 let sf = 1024.0 / page.image.height() as f32;
37 let pw1024 = (page.image.width() as f32 * sf) as u32; let page1024 = fleischwolf_pdf::resample::inter_area(&page.image, pw1024, 1024);
39 for r in regions.iter().filter(|r| r.label == "table") {
40 let k = 1024.0 / page.height;
43 let x = (r.l * k).round().max(0.0) as u32;
44 let y = (r.t * k).round().max(0.0) as u32;
45 let x2 = (r.r * k).round() as u32;
46 let y2 = (r.b * k).round() as u32;
47 let (w, h) = (x2 - x, y2 - y);
48 let crop = imageops::crop_imm(&page1024, x, y, w, h).to_image();
49 let cells = tf.predict_table_structure(&crop).expect("predict");
50 println!(
51 "page {} table {}x{}px -> {} cells",
52 pi + 1,
53 w,
54 h,
55 cells.len()
56 );
57 for c in &cells {
58 println!(
59 " r{} c{} {}x{} {} | cxcywh {:.4} {:.4} {:.4} {:.4}",
60 c.row,
61 c.col,
62 c.colspan,
63 c.rowspan,
64 name(c.tag),
65 c.cx,
66 c.cy,
67 c.w,
68 c.h
69 );
70 }
71 }
72 }
73}