oxirs_vec/content_processing/
mod.rs1#[cfg(feature = "content-processing")]
9use anyhow::Result;
10#[cfg(feature = "content-processing")]
11use std::collections::HashMap;
12
13mod data_handlers;
15mod multimedia_handlers;
16mod office_handlers;
17mod pdf_handler;
18mod text_handlers;
19mod types;
20
21#[cfg(feature = "content-processing")]
22pub use data_handlers::*;
23#[cfg(feature = "content-processing")]
24pub use multimedia_handlers::*;
25#[cfg(feature = "content-processing")]
26pub use office_handlers::*;
27#[cfg(feature = "content-processing")]
28pub use pdf_handler::*;
29#[cfg(feature = "content-processing")]
30pub use text_handlers::*;
31#[cfg(feature = "content-processing")]
32pub use types::*;
33
34#[cfg(feature = "content-processing")]
36pub struct ContentProcessor {
37 config: ContentExtractionConfig,
38 format_handlers: HashMap<DocumentFormat, Box<dyn FormatHandler>>,
39}
40
41#[cfg(feature = "content-processing")]
43pub trait FormatHandler: Send + Sync {
44 fn extract_content(
46 &self,
47 data: &[u8],
48 config: &ContentExtractionConfig,
49 ) -> Result<ExtractedContent>;
50
51 fn can_handle(&self, data: &[u8]) -> bool;
53
54 fn supported_extensions(&self) -> Vec<&'static str>;
56}