sanitize_engine/processor/
yaml_proc.rs1use crate::error::{Result, SanitizeError};
11use crate::processor::limits::{DEFAULT_DEPTH, YAML_INPUT_SIZE, YAML_NODE_COUNT};
12use crate::processor::{walk_tree, FileTypeProfile, Processor, TreeNode};
13use crate::store::MappingStore;
14use serde_yaml_ng::Value;
15
16pub struct YamlProcessor;
18
19impl Processor for YamlProcessor {
20 fn name(&self) -> &'static str {
21 "yaml"
22 }
23
24 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
25 if profile.processor == "yaml" {
26 return true;
27 }
28 let text = String::from_utf8_lossy(content);
30 let trimmed = text.trim_start();
31 trimmed.starts_with("---")
32 || trimmed.starts_with("- ")
33 || trimmed.starts_with('{')
34 || trimmed.contains(": ")
35 }
36
37 fn process(
38 &self,
39 content: &[u8],
40 profile: &FileTypeProfile,
41 store: &MappingStore,
42 ) -> Result<Vec<u8>> {
43 if content.len() > YAML_INPUT_SIZE {
45 return Err(SanitizeError::InputTooLarge {
46 size: content.len(),
47 limit: YAML_INPUT_SIZE,
48 });
49 }
50
51 let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
52 format: "YAML".into(),
53 message: format!("invalid UTF-8: {}", e),
54 })?;
55
56 let mut value: Value =
57 serde_yaml_ng::from_str(text).map_err(|e| SanitizeError::ParseError {
58 format: "YAML".into(),
59 message: format!("YAML parse error: {}", e),
60 })?;
61
62 let node_count = count_yaml_nodes(&value);
67 if node_count > YAML_NODE_COUNT {
68 return Err(SanitizeError::InputTooLarge {
69 size: node_count,
70 limit: YAML_NODE_COUNT,
71 });
72 }
73
74 walk_yaml(&mut value, "", profile, store, 0)?;
75
76 let output = serde_yaml_ng::to_string(&value).map_err(|e| {
77 SanitizeError::IoError(std::io::Error::other(format!("YAML serialize error: {e}")))
78 })?;
79
80 Ok(output.into_bytes())
81 }
82}
83
84fn count_yaml_nodes(value: &Value) -> usize {
88 count_yaml_nodes_inner(value, 0)
89}
90
91fn count_yaml_nodes_inner(value: &Value, depth: usize) -> usize {
94 if depth > DEFAULT_DEPTH {
95 return 1; }
97 match value {
98 Value::Mapping(map) => {
99 1 + map
100 .iter()
101 .map(|(k, v)| {
102 count_yaml_nodes_inner(k, depth + 1) + count_yaml_nodes_inner(v, depth + 1)
103 })
104 .sum::<usize>()
105 }
106 Value::Sequence(seq) => {
107 1 + seq
108 .iter()
109 .map(|v| count_yaml_nodes_inner(v, depth + 1))
110 .sum::<usize>()
111 }
112 Value::Tagged(tagged) => 1 + count_yaml_nodes_inner(&tagged.value, depth + 1),
113 _ => 1, }
115}
116
117impl TreeNode for Value {
118 fn for_each_map_entry<F>(&mut self, mut f: F) -> Result<()>
119 where
120 F: FnMut(&str, &mut Self) -> Result<()>,
121 {
122 if let Self::Mapping(map) = self {
123 let keys: Vec<Self> = map.keys().cloned().collect();
124 for key in keys {
125 let key_str = yaml_key_to_string(&key);
126 if let Some(v) = map.get_mut(&key) {
127 f(&key_str, v)?;
128 }
129 }
130 }
131 Ok(())
132 }
133
134 fn for_each_seq_item<F>(&mut self, mut f: F) -> Result<()>
135 where
136 F: FnMut(&mut Self) -> Result<()>,
137 {
138 if let Self::Sequence(seq) = self {
139 for item in seq.iter_mut() {
140 f(item)?;
141 }
142 }
143 Ok(())
144 }
145
146 fn as_str_mut(&mut self) -> Option<&mut String> {
147 if let Self::String(s) = self {
148 Some(s)
149 } else {
150 None
151 }
152 }
153
154 fn is_scalar(&self) -> bool {
155 matches!(self, Self::Number(_) | Self::Bool(_))
156 }
157
158 fn scalar_to_string(&self) -> String {
159 yaml_scalar_to_string(self)
160 }
161
162 fn set_string(&mut self, s: String) {
163 *self = Self::String(s);
164 }
165}
166
167fn walk_yaml(
169 value: &mut Value,
170 prefix: &str,
171 profile: &FileTypeProfile,
172 store: &MappingStore,
173 depth: usize,
174) -> Result<()> {
175 walk_tree(value, prefix, profile, store, depth, "YAML")
176}
177
178fn yaml_key_to_string(key: &Value) -> String {
179 match key {
180 Value::String(s) => s.clone(),
181 Value::Number(n) => n.to_string(),
182 Value::Bool(b) => b.to_string(),
183 _ => format!("{:?}", key),
184 }
185}
186
187fn yaml_scalar_to_string(v: &Value) -> String {
188 match v {
189 Value::String(s) => s.clone(),
190 Value::Number(n) => n.to_string(),
191 Value::Bool(b) => b.to_string(),
192 _ => String::new(),
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199 use crate::category::Category;
200 use crate::generator::HmacGenerator;
201 use crate::processor::profile::FieldRule;
202 use std::sync::Arc;
203
204 fn make_store() -> MappingStore {
205 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
206 MappingStore::new(gen, None)
207 }
208
209 #[test]
210 fn basic_yaml_replacement() {
211 let store = make_store();
212 let proc = YamlProcessor;
213
214 let content = b"database:\n host: db.corp.com\n password: s3cret\nport: 5432\n";
215 let profile = FileTypeProfile::new(
216 "yaml",
217 vec![
218 FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
219 FieldRule::new("database.host").with_category(Category::Hostname),
220 ],
221 );
222
223 let result = proc.process(content, &profile, &store).unwrap();
224 let out = String::from_utf8(result).unwrap();
225
226 assert!(!out.contains("s3cret"));
227 assert!(!out.contains("db.corp.com"));
228 assert!(out.contains("5432"));
230 }
231
232 #[test]
233 fn can_handle_by_profile_name() {
234 let proc = YamlProcessor;
235 let profile = FileTypeProfile::new("yaml", vec![]).with_extension(".yaml");
236 assert!(proc.can_handle(b"anything", &profile));
237 }
238
239 #[test]
240 fn can_handle_detects_document_marker() {
241 let proc = YamlProcessor;
242 let profile = FileTypeProfile::new("json", vec![]).with_extension(".json");
243 assert!(proc.can_handle(b"---\nkey: value\n", &profile));
244 }
245
246 #[test]
247 fn can_handle_detects_key_value_heuristic() {
248 let proc = YamlProcessor;
249 let profile = FileTypeProfile::new("other", vec![]).with_extension(".conf");
250 assert!(proc.can_handle(b"host: localhost\nport: 5432\n", &profile));
251 }
252
253 #[test]
254 fn can_handle_detects_sequence_heuristic() {
255 let proc = YamlProcessor;
256 let profile = FileTypeProfile::new("other", vec![]).with_extension(".txt");
257 assert!(proc.can_handle(b"- item1\n- item2\n", &profile));
258 }
259
260 #[test]
261 fn can_handle_rejects_plaintext() {
262 let proc = YamlProcessor;
263 let profile = FileTypeProfile::new("json", vec![]).with_extension(".json");
264 assert!(!proc.can_handle(b"just plain text with no yaml markers", &profile));
265 }
266
267 #[test]
268 fn non_string_scalars_not_targeted_pass_through() {
269 let store = make_store();
270 let proc = YamlProcessor;
271 let content = b"enabled: true\ncount: 42\nsecret: hunter2\n";
273 let profile = FileTypeProfile::new(
274 "yaml",
275 vec![FieldRule::new("secret").with_category(Category::Custom("pw".into()))],
276 );
277 let result = proc.process(content, &profile, &store).unwrap();
278 let out = String::from_utf8(result).unwrap();
279 assert!(!out.contains("hunter2"), "secret must be replaced");
280 assert!(out.contains("42"), "integer must be preserved");
281 }
282
283 #[test]
284 fn deeply_nested_yaml_replaced() {
285 let store = make_store();
286 let proc = YamlProcessor;
287 let content = b"a:\n b:\n c:\n secret: hunter2\n";
288 let profile = FileTypeProfile::new(
289 "yaml",
290 vec![FieldRule::new("a.b.c.secret").with_category(Category::Custom("pw".into()))],
291 );
292 let result = proc.process(content, &profile, &store).unwrap();
293 let out = String::from_utf8(result).unwrap();
294 assert!(!out.contains("hunter2"));
295 }
296
297 #[test]
298 fn invalid_utf8_returns_parse_error() {
299 let store = make_store();
300 let proc = YamlProcessor;
301 let bad = b"\xff\xfe invalid";
302 let profile = FileTypeProfile::new("yaml", vec![]);
303 let err = proc.process(bad, &profile, &store).unwrap_err();
304 assert!(matches!(
305 err,
306 crate::error::SanitizeError::ParseError { .. }
307 ));
308 }
309
310 #[test]
311 fn invalid_yaml_returns_parse_error() {
312 let store = make_store();
313 let proc = YamlProcessor;
314 let bad = b"key: [unclosed";
315 let profile = FileTypeProfile::new("yaml", vec![]);
316 let err = proc.process(bad, &profile, &store).unwrap_err();
317 assert!(matches!(
318 err,
319 crate::error::SanitizeError::ParseError { .. }
320 ));
321 }
322
323 #[test]
324 fn yaml_sequence_traversal() {
325 let store = make_store();
326 let proc = YamlProcessor;
327
328 let content = b"users:\n - email: a@b.com\n - email: c@d.com\n";
329 let profile = FileTypeProfile::new(
330 "yaml",
331 vec![FieldRule::new("users.email").with_category(Category::Email)],
332 );
333
334 let result = proc.process(content, &profile, &store).unwrap();
335 let out = String::from_utf8(result).unwrap();
336
337 assert!(!out.contains("a@b.com"));
338 assert!(!out.contains("c@d.com"));
339 }
340}