kreuzberg 4.8.0

High-performance document intelligence library for Rust. Extract text, metadata, and structured data from PDFs, Office documents, images, and 91+ formats and 248 programming languages via tree-sitter code intelligence with async/sync APIs.
Documentation
1
2
3
4
5
6
7
8
9
10
pub mod tsv_parser;

// Re-export core table utilities (HocrWord, reconstruct_table, table_to_markdown)
pub use crate::table_core::{HocrWord, reconstruct_table, table_to_markdown};

// Re-export PDF-specific table utilities when the pdf feature is enabled
#[cfg(feature = "pdf")]
pub use crate::pdf::table_reconstruct::post_process_table;

pub use tsv_parser::extract_words_from_tsv;