Skip to main content

sanitize_engine/processor/
registry.rs

1//! Processor registry — discovers and dispatches structured processors.
2//!
3//! The [`ProcessorRegistry`] holds a set of registered [`Processor`]
4//! implementations and provides methods to:
5//!
6//! 1. Look up a processor by name.
7//! 2. Auto-detect a processor for given content + profile.
8//! 3. Process content using a matching processor, falling back to `None`
9//!    if no processor matches (caller can then use the streaming scanner).
10
11use super::{FileTypeProfile, Processor};
12use crate::error::Result;
13use crate::store::MappingStore;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17/// Registry of structured processors.
18///
19/// Thread-safe (processors are `Arc<dyn Processor>`) and can be shared
20/// across threads via `Arc<ProcessorRegistry>`.
21pub struct ProcessorRegistry {
22    /// Processors indexed by name.
23    processors: HashMap<String, Arc<dyn Processor>>,
24}
25
26impl ProcessorRegistry {
27    /// Create an empty registry.
28    #[must_use]
29    pub fn new() -> Self {
30        Self {
31            processors: HashMap::new(),
32        }
33    }
34
35    /// Create a registry pre-populated with all built-in processors.
36    #[must_use]
37    pub fn with_builtins() -> Self {
38        let mut reg = Self::new();
39        reg.register(Arc::new(super::key_value::KeyValueProcessor));
40        reg.register(Arc::new(super::json_proc::JsonProcessor));
41        reg.register(Arc::new(super::yaml_proc::YamlProcessor));
42        reg.register(Arc::new(super::xml_proc::XmlProcessor));
43        reg.register(Arc::new(super::csv_proc::CsvProcessor));
44        reg.register(Arc::new(super::toml_proc::TomlProcessor));
45        reg.register(Arc::new(super::env_proc::EnvProcessor));
46        reg.register(Arc::new(super::ini_proc::IniProcessor));
47        reg.register(Arc::new(super::log_line::LogLineProcessor::new()));
48        reg
49    }
50
51    /// Register a processor. Overwrites any existing processor with the
52    /// same name.
53    pub fn register(&mut self, processor: Arc<dyn Processor>) {
54        self.processors
55            .insert(processor.name().to_string(), processor);
56    }
57
58    /// Look up a processor by its name.
59    pub fn get(&self, name: &str) -> Option<&Arc<dyn Processor>> {
60        self.processors.get(name)
61    }
62
63    /// List all registered processor names.
64    pub fn names(&self) -> Vec<&str> {
65        self.processors.keys().map(|s| s.as_str()).collect()
66    }
67
68    /// Number of registered processors.
69    #[must_use]
70    pub fn len(&self) -> usize {
71        self.processors.len()
72    }
73
74    /// Whether the registry is empty.
75    #[must_use]
76    pub fn is_empty(&self) -> bool {
77        self.processors.is_empty()
78    }
79
80    /// Find a processor that can handle the given content + profile.
81    ///
82    /// 1. If the profile names a specific processor, look it up directly.
83    /// 2. Otherwise, iterate all processors and return the first whose
84    ///    `can_handle` returns `true`.
85    ///
86    /// Returns `None` if no processor matches (caller should fall back
87    /// to the streaming scanner).
88    pub fn find_processor(
89        &self,
90        content: &[u8],
91        profile: &FileTypeProfile,
92    ) -> Option<&Arc<dyn Processor>> {
93        // Direct lookup by profile's processor name.
94        if let Some(proc) = self.processors.get(&profile.processor) {
95            if proc.can_handle(content, profile) {
96                return Some(proc);
97            }
98        }
99
100        // Auto-detect: first matching processor.
101        self.processors
102            .values()
103            .find(|proc| proc.can_handle(content, profile))
104    }
105
106    /// Process content using the matching processor.
107    ///
108    /// Returns `Ok(Some(output))` if a processor matched and succeeded,
109    /// `Ok(None)` if no processor matches (caller should fall back),
110    /// or `Err(...)` if processing failed.
111    ///
112    /// # Errors
113    ///
114    /// Returns the underlying processor's error if processing fails.
115    pub fn process(
116        &self,
117        content: &[u8],
118        profile: &FileTypeProfile,
119        store: &MappingStore,
120    ) -> Result<Option<Vec<u8>>> {
121        match self.find_processor(content, profile) {
122            Some(proc) => {
123                let output = proc.process(content, profile, store)?;
124                Ok(Some(output))
125            }
126            None => Ok(None),
127        }
128    }
129}
130
131impl Default for ProcessorRegistry {
132    fn default() -> Self {
133        Self::with_builtins()
134    }
135}