Skip to main content

LayoutModel

Struct LayoutModel 

Source
pub struct LayoutModel { /* private fields */ }

Implementations§

Source§

impl LayoutModel

Source

pub fn load() -> Result<Self, String>

Load the ONNX model from DOCLING_LAYOUT_ONNX. Without the override, prefers models/layout_heron_int8.onnx when present (the quantized default; FLEISCHWOLF_FP32=1 opts out), else models/layout_heron.onnx.

Examples found in repository?
examples/layout.rs (line 11)
7fn main() {
8    let path = std::env::args().nth(1).expect("usage: layout <file.pdf>");
9    let bytes = std::fs::read(&path).expect("read pdf");
10    let doc = PdfDocument::open(&bytes, None).expect("open pdf");
11    let mut model = LayoutModel::load().expect("load layout model");
12    for (i, page) in doc.pages.iter().enumerate().take(1) {
13        let regions = model
14            .predict(&page.image, page.width, page.height)
15            .expect("predict");
16        println!(
17            "page {} ({:.0}x{:.0}): {} regions",
18            i + 1,
19            page.width,
20            page.height,
21            regions.len()
22        );
23        let mut rs = regions.clone();
24        rs.sort_by(|a, b| a.t.total_cmp(&b.t));
25        for r in &rs {
26            println!(
27                "  {:<16} {:.2}  [{:.0},{:.0},{:.0},{:.0}]",
28                r.label, r.score, r.l, r.t, r.r, r.b
29            );
30        }
31    }
32}
More examples
Hide additional examples
examples/dump_regions.rs (line 9)
5fn main() {
6    let path = std::env::args().nth(1).expect("pdf");
7    let bytes = std::fs::read(&path).expect("read");
8    let doc = PdfDocument::open(&bytes, None).expect("open");
9    let mut layout = LayoutModel::load().expect("layout");
10    for (pi, page) in doc.pages.iter().enumerate() {
11        let regions = layout
12            .predict(&page.image, page.width, page.height)
13            .expect("layout");
14        for r in &regions {
15            // crude text: cells whose center is inside the region
16            let txt: String = page
17                .cells
18                .iter()
19                .filter(|c| {
20                    let (cx, cy) = ((c.l + c.r) / 2.0, (c.t + c.b) / 2.0);
21                    cx >= r.l && cx <= r.r && cy >= r.t && cy <= r.b
22                })
23                .map(|c| c.text.trim())
24                .collect::<Vec<_>>()
25                .join(" ");
26            let tail: String = txt
27                .chars()
28                .rev()
29                .take(40)
30                .collect::<Vec<_>>()
31                .into_iter()
32                .rev()
33                .collect();
34            println!(
35                "p{} {:>14} t={:6.1} b={:6.1} | …{}",
36                pi + 1,
37                r.label,
38                r.t,
39                r.b,
40                tail
41            );
42        }
43        // raw line cells in extraction order (to inspect RTL ordering)
44        if std::env::var("DUMP_CELLS").is_ok() {
45            for (ci, c) in page.cells.iter().enumerate() {
46                let snip: String = c.text.chars().take(300).collect();
47                println!(
48                    "   CELL[{ci}] t={:6.1} l={:6.1} r={:6.1} | {}",
49                    c.t, c.l, c.r, snip
50                );
51            }
52        }
53    }
54}
examples/tf_otsl.rs (line 28)
24fn main() {
25    let path = std::env::args().nth(1).expect("usage: tf_otsl <pdf>");
26    let bytes = std::fs::read(&path).expect("read");
27    let doc = PdfDocument::open(&bytes, None).expect("open");
28    let mut layout = LayoutModel::load().expect("layout");
29    let mut tf = TableFormer::load().expect("tableformer models missing");
30    for (pi, page) in doc.pages.iter().enumerate() {
31        let regions = layout
32            .predict(&page.image, page.width, page.height)
33            .expect("layout");
34        // docling resizes the whole page to 1024px height (cv2.INTER_AREA), then
35        // crops the table bbox out of *that*. Replicate exactly.
36        let sf = 1024.0 / page.image.height() as f32;
37        let pw1024 = (page.image.width() as f32 * sf) as u32; // docling: int(w*r)
38        let page1024 = fleischwolf_pdf::resample::inter_area(&page.image, pw1024, 1024);
39        for r in regions.iter().filter(|r| r.label == "table") {
40            // bbox (points) → 1024px-page coords: scale*sf = 1024/page_h_pt;
41            // docling rounds the crop edges.
42            let k = 1024.0 / page.height;
43            let x = (r.l * k).round().max(0.0) as u32;
44            let y = (r.t * k).round().max(0.0) as u32;
45            let x2 = (r.r * k).round() as u32;
46            let y2 = (r.b * k).round() as u32;
47            let (w, h) = (x2 - x, y2 - y);
48            let crop = imageops::crop_imm(&page1024, x, y, w, h).to_image();
49            let cells = tf.predict_table_structure(&crop).expect("predict");
50            println!(
51                "page {} table {}x{}px -> {} cells",
52                pi + 1,
53                w,
54                h,
55                cells.len()
56            );
57            for c in &cells {
58                println!(
59                    "  r{} c{} {}x{} {} | cxcywh {:.4} {:.4} {:.4} {:.4}",
60                    c.row,
61                    c.col,
62                    c.colspan,
63                    c.rowspan,
64                    name(c.tag),
65                    c.cx,
66                    c.cy,
67                    c.w,
68                    c.h
69                );
70            }
71        }
72    }
73}
Source

pub fn load_with(intra: usize) -> Result<Self, String>

Like load but with an explicit intra-op thread count. A parallel page-worker pool loads its helper models on a single thread each and gets its speed-up from running pages concurrently instead.

Source

pub fn predict( &mut self, img: &RgbImage, page_w: f32, page_h: f32, ) -> Result<Vec<Region>, String>

Detect layout regions on a page image. page_w/page_h are the page size in points; returned boxes are in those coordinates.

Examples found in repository?
examples/layout.rs (line 14)
7fn main() {
8    let path = std::env::args().nth(1).expect("usage: layout <file.pdf>");
9    let bytes = std::fs::read(&path).expect("read pdf");
10    let doc = PdfDocument::open(&bytes, None).expect("open pdf");
11    let mut model = LayoutModel::load().expect("load layout model");
12    for (i, page) in doc.pages.iter().enumerate().take(1) {
13        let regions = model
14            .predict(&page.image, page.width, page.height)
15            .expect("predict");
16        println!(
17            "page {} ({:.0}x{:.0}): {} regions",
18            i + 1,
19            page.width,
20            page.height,
21            regions.len()
22        );
23        let mut rs = regions.clone();
24        rs.sort_by(|a, b| a.t.total_cmp(&b.t));
25        for r in &rs {
26            println!(
27                "  {:<16} {:.2}  [{:.0},{:.0},{:.0},{:.0}]",
28                r.label, r.score, r.l, r.t, r.r, r.b
29            );
30        }
31    }
32}
More examples
Hide additional examples
examples/dump_regions.rs (line 12)
5fn main() {
6    let path = std::env::args().nth(1).expect("pdf");
7    let bytes = std::fs::read(&path).expect("read");
8    let doc = PdfDocument::open(&bytes, None).expect("open");
9    let mut layout = LayoutModel::load().expect("layout");
10    for (pi, page) in doc.pages.iter().enumerate() {
11        let regions = layout
12            .predict(&page.image, page.width, page.height)
13            .expect("layout");
14        for r in &regions {
15            // crude text: cells whose center is inside the region
16            let txt: String = page
17                .cells
18                .iter()
19                .filter(|c| {
20                    let (cx, cy) = ((c.l + c.r) / 2.0, (c.t + c.b) / 2.0);
21                    cx >= r.l && cx <= r.r && cy >= r.t && cy <= r.b
22                })
23                .map(|c| c.text.trim())
24                .collect::<Vec<_>>()
25                .join(" ");
26            let tail: String = txt
27                .chars()
28                .rev()
29                .take(40)
30                .collect::<Vec<_>>()
31                .into_iter()
32                .rev()
33                .collect();
34            println!(
35                "p{} {:>14} t={:6.1} b={:6.1} | …{}",
36                pi + 1,
37                r.label,
38                r.t,
39                r.b,
40                tail
41            );
42        }
43        // raw line cells in extraction order (to inspect RTL ordering)
44        if std::env::var("DUMP_CELLS").is_ok() {
45            for (ci, c) in page.cells.iter().enumerate() {
46                let snip: String = c.text.chars().take(300).collect();
47                println!(
48                    "   CELL[{ci}] t={:6.1} l={:6.1} r={:6.1} | {}",
49                    c.t, c.l, c.r, snip
50                );
51            }
52        }
53    }
54}
examples/tf_otsl.rs (line 32)
24fn main() {
25    let path = std::env::args().nth(1).expect("usage: tf_otsl <pdf>");
26    let bytes = std::fs::read(&path).expect("read");
27    let doc = PdfDocument::open(&bytes, None).expect("open");
28    let mut layout = LayoutModel::load().expect("layout");
29    let mut tf = TableFormer::load().expect("tableformer models missing");
30    for (pi, page) in doc.pages.iter().enumerate() {
31        let regions = layout
32            .predict(&page.image, page.width, page.height)
33            .expect("layout");
34        // docling resizes the whole page to 1024px height (cv2.INTER_AREA), then
35        // crops the table bbox out of *that*. Replicate exactly.
36        let sf = 1024.0 / page.image.height() as f32;
37        let pw1024 = (page.image.width() as f32 * sf) as u32; // docling: int(w*r)
38        let page1024 = fleischwolf_pdf::resample::inter_area(&page.image, pw1024, 1024);
39        for r in regions.iter().filter(|r| r.label == "table") {
40            // bbox (points) → 1024px-page coords: scale*sf = 1024/page_h_pt;
41            // docling rounds the crop edges.
42            let k = 1024.0 / page.height;
43            let x = (r.l * k).round().max(0.0) as u32;
44            let y = (r.t * k).round().max(0.0) as u32;
45            let x2 = (r.r * k).round() as u32;
46            let y2 = (r.b * k).round() as u32;
47            let (w, h) = (x2 - x, y2 - y);
48            let crop = imageops::crop_imm(&page1024, x, y, w, h).to_image();
49            let cells = tf.predict_table_structure(&crop).expect("predict");
50            println!(
51                "page {} table {}x{}px -> {} cells",
52                pi + 1,
53                w,
54                h,
55                cells.len()
56            );
57            for c in &cells {
58                println!(
59                    "  r{} c{} {}x{} {} | cxcywh {:.4} {:.4} {:.4} {:.4}",
60                    c.row,
61                    c.col,
62                    c.colspan,
63                    c.rowspan,
64                    name(c.tag),
65                    c.cx,
66                    c.cy,
67                    c.w,
68                    c.h
69                );
70            }
71        }
72    }
73}

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

Source§

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.