oxidize-pdf 2.16.1

Pure Rust PDF library for AI/RAG: structure-aware chunking with bounding boxes, heading context, and token estimates. No Python, no ML, no C bindings.
Documentation
pub(crate) mod chunk_metadata;
pub mod element;
pub mod export;
pub mod graph;
pub mod hybrid_chunking;
pub mod partition;
pub mod profile;
pub mod rag;
pub mod reading_order;
pub mod semantic_chunking;
#[cfg(feature = "unstable-spi")]
pub mod spi;

#[cfg(feature = "language-detection")]
pub use chunk_metadata::detect_language;
pub use chunk_metadata::{ChunkMetadata, ContentTypeFlags, DocumentSource, PageRegion};
pub use element::{
    element_reading_order, Element, ElementBBox, ElementData, ElementMetadata, ImageElementData,
    KeyValueElementData, TableElementData,
};
pub use export::{ElementMarkdownExporter, ExportConfig};
pub use graph::ElementGraph;
pub use hybrid_chunking::{HybridChunk, HybridChunkConfig, HybridChunker, MergePolicy};
pub use partition::{PartitionConfig, Partitioner, ReadingOrderStrategy};
pub use profile::{ExtractionProfile, ProfileConfig};
pub use rag::RagChunk;
pub use reading_order::{ReadingOrder, SimpleReadingOrder, XYCutReadingOrder};
pub use semantic_chunking::{SemanticChunk, SemanticChunkConfig, SemanticChunker};
#[cfg(feature = "unstable-spi")]
pub use spi::{
    AnalysisPipeline, ChunkGroup, ChunkingStrategy, ClassLabel, ClassifyContext, ElementClassifier,
};
#[cfg(all(feature = "unstable-spi", feature = "semantic"))]
pub use spi::{EnrichContext, MetadataEnricher};