sanitize_engine/processor/registry.rs
1//! Processor registry — discovers and dispatches structured processors.
2//!
3//! The [`ProcessorRegistry`] holds a set of registered [`Processor`]
4//! implementations and provides methods to:
5//!
6//! 1. Look up a processor by name.
7//! 2. Auto-detect a processor for given content + profile.
8//! 3. Process content using a matching processor, falling back to `None`
9//! if no processor matches (caller can then use the streaming scanner).
10
11use super::{FileTypeProfile, Processor};
12use crate::error::Result;
13use crate::store::MappingStore;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17/// Registry of structured processors.
18///
19/// Thread-safe (processors are `Arc<dyn Processor>`) and can be shared
20/// across threads via `Arc<ProcessorRegistry>`.
21pub struct ProcessorRegistry {
22 /// Processors indexed by name.
23 processors: HashMap<String, Arc<dyn Processor>>,
24}
25
26impl ProcessorRegistry {
27 /// Create an empty registry.
28 #[must_use]
29 pub fn new() -> Self {
30 Self {
31 processors: HashMap::new(),
32 }
33 }
34
35 /// Create a registry pre-populated with all built-in processors.
36 #[must_use]
37 pub fn with_builtins() -> Self {
38 let mut reg = Self::new();
39 reg.register(Arc::new(super::key_value::KeyValueProcessor));
40 reg.register(Arc::new(super::json_proc::JsonProcessor));
41 reg.register(Arc::new(super::yaml_proc::YamlProcessor));
42 reg.register(Arc::new(super::xml_proc::XmlProcessor));
43 reg.register(Arc::new(super::csv_proc::CsvProcessor));
44 reg.register(Arc::new(super::toml_proc::TomlProcessor));
45 reg.register(Arc::new(super::env_proc::EnvProcessor));
46 reg.register(Arc::new(super::ini_proc::IniProcessor));
47 reg.register(Arc::new(super::log_line::LogLineProcessor::new()));
48 reg
49 }
50
51 /// Register a processor. Overwrites any existing processor with the
52 /// same name.
53 pub fn register(&mut self, processor: Arc<dyn Processor>) {
54 self.processors
55 .insert(processor.name().to_string(), processor);
56 }
57
58 /// Look up a processor by its name.
59 pub fn get(&self, name: &str) -> Option<&Arc<dyn Processor>> {
60 self.processors.get(name)
61 }
62
63 /// List all registered processor names.
64 pub fn names(&self) -> Vec<&str> {
65 self.processors.keys().map(|s| s.as_str()).collect()
66 }
67
68 /// Number of registered processors.
69 #[must_use]
70 pub fn len(&self) -> usize {
71 self.processors.len()
72 }
73
74 /// Whether the registry is empty.
75 #[must_use]
76 pub fn is_empty(&self) -> bool {
77 self.processors.is_empty()
78 }
79
80 /// Find a processor that can handle the given content + profile.
81 ///
82 /// 1. If the profile names a specific processor, look it up directly.
83 /// 2. Otherwise, iterate all processors and return the first whose
84 /// `can_handle` returns `true`.
85 ///
86 /// Returns `None` if no processor matches (caller should fall back
87 /// to the streaming scanner).
88 pub fn find_processor(
89 &self,
90 content: &[u8],
91 profile: &FileTypeProfile,
92 ) -> Option<&Arc<dyn Processor>> {
93 // Direct lookup by profile's processor name.
94 if let Some(proc) = self.processors.get(&profile.processor) {
95 if proc.can_handle(content, profile) {
96 return Some(proc);
97 }
98 }
99
100 // Auto-detect: first matching processor.
101 self.processors
102 .values()
103 .find(|proc| proc.can_handle(content, profile))
104 }
105
106 /// Process content using the matching processor.
107 ///
108 /// Returns `Ok(Some(output))` if a processor matched and succeeded,
109 /// `Ok(None)` if no processor matches (caller should fall back),
110 /// or `Err(...)` if processing failed.
111 ///
112 /// # Errors
113 ///
114 /// Returns the underlying processor's error if processing fails.
115 pub fn process(
116 &self,
117 content: &[u8],
118 profile: &FileTypeProfile,
119 store: &MappingStore,
120 ) -> Result<Option<Vec<u8>>> {
121 match self.find_processor(content, profile) {
122 Some(proc) => {
123 let output = proc.process(content, profile, store)?;
124 Ok(Some(output))
125 }
126 None => Ok(None),
127 }
128 }
129}
130
131impl Default for ProcessorRegistry {
132 fn default() -> Self {
133 Self::with_builtins()
134 }
135}