use crate::markdown::MarkdownExtractor;
use crate::text::PlainTextExtractor;
use crate::types::{DocumentExtractor, ExtractOutput};
use orbok_core::{ErrorCategory, OrbokError, OrbokResult};
use orbok_fs::ValidatedPath;
pub struct ExtractorRegistry {
extractors: Vec<Box<dyn DocumentExtractor>>,
}
impl Default for ExtractorRegistry {
fn default() -> Self {
Self {
extractors: vec![Box::new(MarkdownExtractor), Box::new(PlainTextExtractor)],
}
}
}
impl ExtractorRegistry {
pub fn select(&self, extension: &str) -> Option<&dyn DocumentExtractor> {
let ext = extension.to_ascii_lowercase();
self.extractors
.iter()
.find(|e| e.supported_extensions().contains(&ext.as_str()))
.map(|e| e.as_ref())
}
pub fn extract(&self, path: &ValidatedPath) -> OrbokResult<ExtractOutput> {
let extension = path
.canonical
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default();
match self.select(extension) {
Some(extractor) => {
tracing::debug!(extractor = extractor.name(), "extracting");
extractor.extract(path)
}
None => Err(OrbokError::Extraction {
category: ErrorCategory::UnsupportedType,
message: format!("no extractor for extension '{extension}'"),
}),
}
}
}