Skip to main content

sanitize_engine/processor/
registry.rs

1//! Processor registry — discovers and dispatches structured processors.
2//!
3//! The [`ProcessorRegistry`] holds a set of registered [`Processor`]
4//! implementations and provides methods to:
5//!
6//! 1. Look up a processor by name.
7//! 2. Auto-detect a processor for given content + profile.
8//! 3. Process content using a matching processor, falling back to `None`
9//!    if no processor matches (caller can then use the streaming scanner).
10
11use super::{FileTypeProfile, Processor};
12use crate::error::Result;
13use crate::store::MappingStore;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17/// Registry of structured processors.
18///
19/// Thread-safe (processors are `Arc<dyn Processor>`) and can be shared
20/// across threads via `Arc<ProcessorRegistry>`.
21pub struct ProcessorRegistry {
22    /// Processors indexed by name.
23    processors: HashMap<String, Arc<dyn Processor>>,
24}
25
26impl ProcessorRegistry {
27    /// Create an empty registry.
28    #[must_use]
29    pub fn new() -> Self {
30        Self {
31            processors: HashMap::new(),
32        }
33    }
34
35    /// Create a registry pre-populated with all built-in processors.
36    #[must_use]
37    pub fn with_builtins() -> Self {
38        let mut reg = Self::new();
39        reg.register(Arc::new(super::key_value::KeyValueProcessor));
40        reg.register(Arc::new(super::json_proc::JsonProcessor));
41        reg.register(Arc::new(super::yaml_proc::YamlProcessor));
42        reg.register(Arc::new(super::xml_proc::XmlProcessor));
43        reg.register(Arc::new(super::csv_proc::CsvProcessor));
44        reg
45    }
46
47    /// Register a processor. Overwrites any existing processor with the
48    /// same name.
49    pub fn register(&mut self, processor: Arc<dyn Processor>) {
50        self.processors
51            .insert(processor.name().to_string(), processor);
52    }
53
54    /// Look up a processor by its name.
55    pub fn get(&self, name: &str) -> Option<&Arc<dyn Processor>> {
56        self.processors.get(name)
57    }
58
59    /// List all registered processor names.
60    pub fn names(&self) -> Vec<&str> {
61        self.processors.keys().map(|s| s.as_str()).collect()
62    }
63
64    /// Number of registered processors.
65    #[must_use]
66    pub fn len(&self) -> usize {
67        self.processors.len()
68    }
69
70    /// Whether the registry is empty.
71    #[must_use]
72    pub fn is_empty(&self) -> bool {
73        self.processors.is_empty()
74    }
75
76    /// Find a processor that can handle the given content + profile.
77    ///
78    /// 1. If the profile names a specific processor, look it up directly.
79    /// 2. Otherwise, iterate all processors and return the first whose
80    ///    `can_handle` returns `true`.
81    ///
82    /// Returns `None` if no processor matches (caller should fall back
83    /// to the streaming scanner).
84    pub fn find_processor(
85        &self,
86        content: &[u8],
87        profile: &FileTypeProfile,
88    ) -> Option<&Arc<dyn Processor>> {
89        // Direct lookup by profile's processor name.
90        if let Some(proc) = self.processors.get(&profile.processor) {
91            if proc.can_handle(content, profile) {
92                return Some(proc);
93            }
94        }
95
96        // Auto-detect: first matching processor.
97        self.processors
98            .values()
99            .find(|proc| proc.can_handle(content, profile))
100    }
101
102    /// Process content using the matching processor.
103    ///
104    /// Returns `Ok(Some(output))` if a processor matched and succeeded,
105    /// `Ok(None)` if no processor matches (caller should fall back),
106    /// or `Err(...)` if processing failed.
107    ///
108    /// # Errors
109    ///
110    /// Returns the underlying processor's error if processing fails.
111    pub fn process(
112        &self,
113        content: &[u8],
114        profile: &FileTypeProfile,
115        store: &MappingStore,
116    ) -> Result<Option<Vec<u8>>> {
117        match self.find_processor(content, profile) {
118            Some(proc) => {
119                let output = proc.process(content, profile, store)?;
120                Ok(Some(output))
121            }
122            None => Ok(None),
123        }
124    }
125}
126
127impl Default for ProcessorRegistry {
128    fn default() -> Self {
129        Self::with_builtins()
130    }
131}