sanitize_engine/processor/registry.rs
1//! Processor registry — discovers and dispatches structured processors.
2//!
3//! The [`ProcessorRegistry`] holds a set of registered [`Processor`]
4//! implementations and provides methods to:
5//!
6//! 1. Look up a processor by name.
7//! 2. Auto-detect a processor for given content + profile.
8//! 3. Process content using a matching processor, falling back to `None`
9//! if no processor matches (caller can then use the streaming scanner).
10
11use super::{FileTypeProfile, Processor};
12use crate::error::Result;
13use crate::store::MappingStore;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17/// Registry of structured processors.
18///
19/// Thread-safe (processors are `Arc<dyn Processor>`) and can be shared
20/// across threads via `Arc<ProcessorRegistry>`.
21pub struct ProcessorRegistry {
22 /// Processors indexed by name.
23 processors: HashMap<String, Arc<dyn Processor>>,
24}
25
26impl ProcessorRegistry {
27 /// Create an empty registry.
28 #[must_use]
29 pub fn new() -> Self {
30 Self {
31 processors: HashMap::new(),
32 }
33 }
34
35 /// Create a registry pre-populated with all built-in processors.
36 #[must_use]
37 pub fn with_builtins() -> Self {
38 let mut reg = Self::new();
39 reg.register(Arc::new(super::key_value::KeyValueProcessor));
40 reg.register(Arc::new(super::json_proc::JsonProcessor));
41 reg.register(Arc::new(super::yaml_proc::YamlProcessor));
42 reg.register(Arc::new(super::xml_proc::XmlProcessor));
43 reg.register(Arc::new(super::csv_proc::CsvProcessor));
44 reg
45 }
46
47 /// Register a processor. Overwrites any existing processor with the
48 /// same name.
49 pub fn register(&mut self, processor: Arc<dyn Processor>) {
50 self.processors
51 .insert(processor.name().to_string(), processor);
52 }
53
54 /// Look up a processor by its name.
55 pub fn get(&self, name: &str) -> Option<&Arc<dyn Processor>> {
56 self.processors.get(name)
57 }
58
59 /// List all registered processor names.
60 pub fn names(&self) -> Vec<&str> {
61 self.processors.keys().map(|s| s.as_str()).collect()
62 }
63
64 /// Number of registered processors.
65 #[must_use]
66 pub fn len(&self) -> usize {
67 self.processors.len()
68 }
69
70 /// Whether the registry is empty.
71 #[must_use]
72 pub fn is_empty(&self) -> bool {
73 self.processors.is_empty()
74 }
75
76 /// Find a processor that can handle the given content + profile.
77 ///
78 /// 1. If the profile names a specific processor, look it up directly.
79 /// 2. Otherwise, iterate all processors and return the first whose
80 /// `can_handle` returns `true`.
81 ///
82 /// Returns `None` if no processor matches (caller should fall back
83 /// to the streaming scanner).
84 pub fn find_processor(
85 &self,
86 content: &[u8],
87 profile: &FileTypeProfile,
88 ) -> Option<&Arc<dyn Processor>> {
89 // Direct lookup by profile's processor name.
90 if let Some(proc) = self.processors.get(&profile.processor) {
91 if proc.can_handle(content, profile) {
92 return Some(proc);
93 }
94 }
95
96 // Auto-detect: first matching processor.
97 self.processors
98 .values()
99 .find(|proc| proc.can_handle(content, profile))
100 }
101
102 /// Process content using the matching processor.
103 ///
104 /// Returns `Ok(Some(output))` if a processor matched and succeeded,
105 /// `Ok(None)` if no processor matches (caller should fall back),
106 /// or `Err(...)` if processing failed.
107 ///
108 /// # Errors
109 ///
110 /// Returns the underlying processor's error if processing fails.
111 pub fn process(
112 &self,
113 content: &[u8],
114 profile: &FileTypeProfile,
115 store: &MappingStore,
116 ) -> Result<Option<Vec<u8>>> {
117 match self.find_processor(content, profile) {
118 Some(proc) => {
119 let output = proc.process(content, profile, store)?;
120 Ok(Some(output))
121 }
122 None => Ok(None),
123 }
124 }
125}
126
127impl Default for ProcessorRegistry {
128 fn default() -> Self {
129 Self::with_builtins()
130 }
131}