orbok_extract/
registry.rs1use crate::markdown::MarkdownExtractor;
5use crate::text::PlainTextExtractor;
6use crate::types::{DocumentExtractor, ExtractOutput};
7use orbok_core::{ErrorCategory, OrbokError, OrbokResult};
8use orbok_fs::ValidatedPath;
9
10pub struct ExtractorRegistry {
13 extractors: Vec<Box<dyn DocumentExtractor>>,
14}
15
16impl Default for ExtractorRegistry {
17 fn default() -> Self {
18 Self {
19 extractors: vec![Box::new(MarkdownExtractor), Box::new(PlainTextExtractor)],
20 }
21 }
22}
23
24impl ExtractorRegistry {
25 pub fn select(&self, extension: &str) -> Option<&dyn DocumentExtractor> {
27 let ext = extension.to_ascii_lowercase();
28 self.extractors
29 .iter()
30 .find(|e| e.supported_extensions().contains(&ext.as_str()))
31 .map(|e| e.as_ref())
32 }
33
34 pub fn extract(&self, path: &ValidatedPath) -> OrbokResult<ExtractOutput> {
38 let extension = path
39 .canonical
40 .extension()
41 .and_then(|e| e.to_str())
42 .unwrap_or_default();
43 match self.select(extension) {
44 Some(extractor) => {
45 tracing::debug!(extractor = extractor.name(), "extracting");
46 extractor.extract(path)
47 }
48 None => Err(OrbokError::Extraction {
49 category: ErrorCategory::UnsupportedType,
50 message: format!("no extractor for extension '{extension}'"),
51 }),
52 }
53 }
54}