kreuzberg 4.7.2

High-performance document intelligence library for Rust. Extract text, metadata, and structured data from PDFs, Office documents, images, and 91+ formats and 248 programming languages via tree-sitter code intelligence with async/sync APIs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Layout-guided extraction utilities.
//!
//! Provides table recognition, layout validation, and heading classification
//! helpers used by the PDF structure pipeline.

mod heading;
pub(super) mod layout_validation;
pub(super) mod table_recognition;
mod tables;

// Re-exports for use by pipeline.rs and other siblings
pub(super) use heading::looks_like_figure_label;
#[cfg(feature = "layout-detection")]
pub(super) use table_recognition::recognize_tables_for_native_page;
#[cfg(feature = "layout-detection")]
pub(super) use table_recognition::recognize_tables_slanet;
pub(super) use tables::extract_tables_from_layout_hints;