Skip to main content

orbok_extract/
lib.rs

1//! # orbok-extract
2//!
3//! Text extraction (RFC-005): pluggable extractors turn boundary-
4//! validated source files into normalized, line-located segments.
5//! Extraction output is derived data — cacheable, rebuildable, never
6//! authoritative.
7
8pub mod chunker;
9pub mod normalize;
10pub mod registry;
11pub mod types;
12
13pub mod docx;
14pub mod html;
15mod markdown;
16pub mod plugin;
17pub mod pdf;
18mod text;
19
20#[cfg(test)]
21mod tests;
22
23pub use registry::ExtractorRegistry;
24pub use chunker::chunk;
25pub use plugin::{PluginManifest, PluginRegistry};
26pub use types::{
27    DocumentExtractor, ExtractOutput, ExtractedSegment, LocationQuality, SegmentKind,
28};