Skip to main content

orbok_extract/
lib.rs

1//! # orbok-extract
2//!
3//! Text extraction (RFC-005): pluggable extractors turn boundary-
4//! validated source files into normalized, line-located segments.
5//! Extraction output is derived data — cacheable, rebuildable, never
6//! authoritative.
7
8pub mod chunker;
9pub mod normalize;
10pub mod registry;
11pub mod types;
12
13mod markdown;
14pub mod plugin;
15pub mod pdf;
16mod text;
17
18#[cfg(test)]
19mod tests;
20
21pub use registry::ExtractorRegistry;
22pub use chunker::chunk;
23pub use plugin::{PluginManifest, PluginRegistry};
24pub use types::{
25    DocumentExtractor, ExtractOutput, ExtractedSegment, LocationQuality, SegmentKind,
26};