Skip to main content

memvid_core/reader/
passthrough.rs

1use std::sync::OnceLock;
2
3use crate::{
4    DocumentFormat, DocumentProcessor, DocumentReader, ReaderDiagnostics, ReaderHint, ReaderOutput,
5    Result,
6};
7
8/// Basic reader that proxies to the global `DocumentProcessor` for formats we
9/// already support via Extractous/lopdf.
10pub struct PassthroughReader;
11
12impl PassthroughReader {
13    fn processor() -> &'static DocumentProcessor {
14        static PROCESSOR: OnceLock<DocumentProcessor> = OnceLock::new();
15        PROCESSOR.get_or_init(DocumentProcessor::default)
16    }
17
18    fn supported_format(format: Option<DocumentFormat>) -> bool {
19        matches!(
20            format,
21            Some(
22                DocumentFormat::Pdf
23                    | DocumentFormat::PlainText
24                    | DocumentFormat::Markdown
25                    | DocumentFormat::Html
26            ) | None
27        )
28    }
29}
30
31impl DocumentReader for PassthroughReader {
32    fn name(&self) -> &'static str {
33        "document_processor"
34    }
35
36    fn supports(&self, hint: &ReaderHint<'_>) -> bool {
37        Self::supported_format(hint.format)
38            || hint.mime.is_none_or(|mime| {
39                mime.eq_ignore_ascii_case("application/pdf") || mime.starts_with("text/")
40            })
41    }
42
43    fn extract(&self, bytes: &[u8], _hint: &ReaderHint<'_>) -> Result<ReaderOutput> {
44        let document = Self::processor().extract_from_bytes(bytes)?;
45        Ok(ReaderOutput::new(document, self.name()).with_diagnostics(ReaderDiagnostics::default()))
46    }
47}