Skip to main content

orbok_extract/
lib.rs

1//! # orbok-extract
2//!
3//! Text extraction (RFC-005): pluggable extractors turn boundary-
4//! validated source files into normalized, line-located segments.
5//! Extraction output is derived data — cacheable, rebuildable, never
6//! authoritative.
7
8pub mod chunker;
9pub mod normalize;
10pub mod registry;
11pub mod types;
12
13mod markdown;
14mod text;
15
16#[cfg(test)]
17mod tests;
18
19pub use registry::ExtractorRegistry;
20pub use chunker::chunk;
21pub use types::{
22    DocumentExtractor, ExtractOutput, ExtractedSegment, LocationQuality, SegmentKind,
23};