orbok_extract/lib.rs
1//! # orbok-extract
2//!
3//! Text extraction (RFC-005): pluggable extractors turn boundary-
4//! validated source files into normalized, line-located segments.
5//! Extraction output is derived data — cacheable, rebuildable, never
6//! authoritative.
7
8pub mod chunker;
9pub mod normalize;
10pub mod registry;
11pub mod types;
12
13mod markdown;
14mod pdf;
15mod text;
16
17#[cfg(test)]
18mod tests;
19
20pub use registry::ExtractorRegistry;
21pub use chunker::chunk;
22pub use types::{
23 DocumentExtractor, ExtractOutput, ExtractedSegment, LocationQuality, SegmentKind,
24};