pdf_oxide 0.3.22

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
//! Layout analysis algorithms for PDF documents.
//!
//! This module provides algorithms for analyzing document layout:
//! - DBSCAN clustering (characters → words → lines)
//! - Reading order determination
//! - Font clustering and normalization
//! - Bounded text extraction (v0.3.1)

pub mod area_filter;
pub mod clustering;
pub mod document_analyzer;
pub mod reading_order;
pub mod text_block;

// Phase 2: Core architectural components
pub mod bold_validation;
pub mod font_normalization;

// Re-export main types
pub use area_filter::{LayoutObjectSpatial, RectFilterMode, SpatialCollectionFiltering};
pub use document_analyzer::{AdaptiveLayoutParams, DocumentProperties};
pub use reading_order::graph_based_reading_order;
pub use text_block::{Color, FontWeight, PageText, TextBlock, TextChar, TextLine, TextSpan, Word};

// Re-export Phase 2 components
pub use bold_validation::{BoldGroup, BoldMarkerDecision, BoldMarkerValidator};
pub use font_normalization::{FontWeightNormalizer, NormalizedSpan, SpanType};