kreuzberg 4.4.2

High-performance document intelligence library for Rust. Extract text, metadata, and structured data from PDFs, Office documents, images, and 75+ formats with async/sync APIs.
Documentation
pub mod utf8_validation;

#[cfg(feature = "quality")]
pub mod quality;

#[cfg(feature = "quality")]
pub mod string_utils;

#[cfg(feature = "quality")]
pub mod token_reduction;

#[cfg(feature = "quality")]
pub mod quality_processor;

#[cfg(feature = "quality")]
pub use quality::{calculate_quality_score, clean_extracted_text, normalize_spaces};

#[cfg(feature = "quality")]
pub use quality_processor::QualityProcessor;

#[cfg(feature = "quality")]
pub use string_utils::{calculate_text_confidence, fix_mojibake, get_encoding_cache_key, safe_decode};

#[cfg(feature = "quality")]
pub use token_reduction::{
    ReductionLevel, TokenReductionConfig, batch_reduce_tokens, get_reduction_statistics, reduce_tokens,
};