memvid_core/reader/
passthrough.rs

1use std::sync::OnceLock;
2
3use crate::{
4    DocumentFormat, DocumentProcessor, DocumentReader, ReaderDiagnostics, ReaderHint, ReaderOutput,
5    Result,
6};
7
8/// Basic reader that proxies to the global `DocumentProcessor` for formats we
9/// already support via Extractous/lopdf.
10pub struct PassthroughReader;
11
12impl PassthroughReader {
13    fn processor() -> &'static DocumentProcessor {
14        static PROCESSOR: OnceLock<DocumentProcessor> = OnceLock::new();
15        PROCESSOR.get_or_init(DocumentProcessor::default)
16    }
17
18    fn supported_format(format: Option<DocumentFormat>) -> bool {
19        matches!(
20            format,
21            Some(DocumentFormat::Pdf)
22                | Some(DocumentFormat::PlainText)
23                | Some(DocumentFormat::Markdown)
24                | Some(DocumentFormat::Html)
25                | None
26        )
27    }
28}
29
30impl DocumentReader for PassthroughReader {
31    fn name(&self) -> &'static str {
32        "document_processor"
33    }
34
35    fn supports(&self, hint: &ReaderHint<'_>) -> bool {
36        Self::supported_format(hint.format)
37            || hint
38                .mime
39                .map(|mime| {
40                    mime.eq_ignore_ascii_case("application/pdf") || mime.starts_with("text/")
41                })
42                .unwrap_or(true)
43    }
44
45    fn extract(&self, bytes: &[u8], _hint: &ReaderHint<'_>) -> Result<ReaderOutput> {
46        let document = Self::processor().extract_from_bytes(bytes)?;
47        Ok(ReaderOutput::new(document, self.name()).with_diagnostics(ReaderDiagnostics::default()))
48    }
49}