pdf_oxide 0.3.33

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
//! Text processing and analysis module.
//!
//! This module provides tools for working with extracted text from PDF documents,
//! including word boundary detection per ISO 32000-1:2008 specification.

pub mod cjk_punctuation;
pub mod complex_script_detector;
pub mod document_classifier;
pub mod hyphenation;
pub mod justification;
pub mod kangxi;
pub mod ligature_processor;
pub mod rtl_detector;
pub mod script_detector;
pub mod word_boundary;

pub use complex_script_detector::{detect_complex_script, is_complex_script, ComplexScript};
pub use document_classifier::{DocumentClassifier, DocumentStats};
pub use hyphenation::HyphenationHandler;
pub use justification::{JustificationDetector, JustificationMode};
pub use rtl_detector::{
    detect_rtl_script, is_arabic_diacritic, is_hebrew_diacritic, is_rtl_text, RTLScript,
};
pub use script_detector::{CJKScript, DocumentLanguage};
pub use word_boundary::{
    detect_word_boundaries, BoundaryContext, CharacterInfo, DocumentScript, WordBoundaryDetector,
};