sanitize_engine/processor/
yaml_proc.rs1use crate::error::{Result, SanitizeError};
11use crate::processor::{build_path, find_matching_rule, replace_value, FileTypeProfile, Processor};
12use crate::store::MappingStore;
13use serde_yaml_ng::Value;
14
15const MAX_YAML_DEPTH: usize = 128;
17
18const MAX_YAML_INPUT_SIZE: usize = 64 * 1024 * 1024; const MAX_YAML_NODE_COUNT: usize = 10_000_000;
27
28pub struct YamlProcessor;
30
31impl Processor for YamlProcessor {
32 fn name(&self) -> &'static str {
33 "yaml"
34 }
35
36 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
37 if profile.processor == "yaml" {
38 return true;
39 }
40 let text = String::from_utf8_lossy(content);
42 let trimmed = text.trim_start();
43 trimmed.starts_with("---") || trimmed.starts_with("- ") || trimmed.contains(": ")
44 }
45
46 fn process(
47 &self,
48 content: &[u8],
49 profile: &FileTypeProfile,
50 store: &MappingStore,
51 ) -> Result<Vec<u8>> {
52 if content.len() > MAX_YAML_INPUT_SIZE {
54 return Err(SanitizeError::InputTooLarge {
55 size: content.len(),
56 limit: MAX_YAML_INPUT_SIZE,
57 });
58 }
59
60 let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
61 format: "YAML".into(),
62 message: format!("invalid UTF-8: {}", e),
63 })?;
64
65 let mut value: Value =
66 serde_yaml_ng::from_str(text).map_err(|e| SanitizeError::ParseError {
67 format: "YAML".into(),
68 message: format!("YAML parse error: {}", e),
69 })?;
70
71 let node_count = count_yaml_nodes(&value);
76 if node_count > MAX_YAML_NODE_COUNT {
77 return Err(SanitizeError::InputTooLarge {
78 size: node_count,
79 limit: MAX_YAML_NODE_COUNT,
80 });
81 }
82
83 walk_yaml(&mut value, "", profile, store, 0)?;
84
85 let output = serde_yaml_ng::to_string(&value)
86 .map_err(|e| SanitizeError::IoError(format!("YAML serialize error: {}", e)))?;
87
88 Ok(output.into_bytes())
89 }
90}
91
92fn count_yaml_nodes(value: &Value) -> usize {
96 count_yaml_nodes_inner(value, 0)
97}
98
99fn count_yaml_nodes_inner(value: &Value, depth: usize) -> usize {
102 if depth > MAX_YAML_DEPTH {
103 return 1; }
105 match value {
106 Value::Mapping(map) => {
107 1 + map
108 .iter()
109 .map(|(k, v)| {
110 count_yaml_nodes_inner(k, depth + 1) + count_yaml_nodes_inner(v, depth + 1)
111 })
112 .sum::<usize>()
113 }
114 Value::Sequence(seq) => {
115 1 + seq
116 .iter()
117 .map(|v| count_yaml_nodes_inner(v, depth + 1))
118 .sum::<usize>()
119 }
120 Value::Tagged(tagged) => 1 + count_yaml_nodes_inner(&tagged.value, depth + 1),
121 _ => 1, }
123}
124
125fn walk_yaml(
127 value: &mut Value,
128 prefix: &str,
129 profile: &FileTypeProfile,
130 store: &MappingStore,
131 depth: usize,
132) -> Result<()> {
133 if depth > MAX_YAML_DEPTH {
134 return Err(SanitizeError::RecursionDepthExceeded(format!(
135 "YAML recursion depth exceeds limit of {MAX_YAML_DEPTH}"
136 )));
137 }
138 match value {
139 Value::Mapping(map) => {
140 let keys: Vec<Value> = map.keys().cloned().collect();
141 for key in keys {
142 let key_str = yaml_key_to_string(&key);
143 let path = build_path(prefix, &key_str);
144
145 if let Some(v) = map.get_mut(&key) {
146 match v {
147 Value::String(s) => {
148 if let Some(rule) = find_matching_rule(&path, profile) {
149 *s = replace_value(s, rule, store)?;
150 }
151 }
152 Value::Number(_) | Value::Bool(_) => {
153 if let Some(rule) = find_matching_rule(&path, profile) {
154 let repr = yaml_scalar_to_string(v);
155 let replaced = replace_value(&repr, rule, store)?;
156 *v = Value::String(replaced);
157 }
158 }
159 Value::Mapping(_) | Value::Sequence(_) => {
160 walk_yaml(v, &path, profile, store, depth + 1)?;
161 }
162 Value::Null | Value::Tagged(_) => {}
163 }
164 }
165 }
166 }
167 Value::Sequence(seq) => {
168 for item in seq.iter_mut() {
169 walk_yaml(item, prefix, profile, store, depth + 1)?;
170 }
171 }
172 _ => {}
173 }
174 Ok(())
175}
176
177fn yaml_key_to_string(key: &Value) -> String {
178 match key {
179 Value::String(s) => s.clone(),
180 Value::Number(n) => n.to_string(),
181 Value::Bool(b) => b.to_string(),
182 _ => format!("{:?}", key),
183 }
184}
185
186fn yaml_scalar_to_string(v: &Value) -> String {
187 match v {
188 Value::String(s) => s.clone(),
189 Value::Number(n) => n.to_string(),
190 Value::Bool(b) => b.to_string(),
191 _ => String::new(),
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198 use crate::category::Category;
199 use crate::generator::HmacGenerator;
200 use crate::processor::profile::FieldRule;
201 use std::sync::Arc;
202
203 fn make_store() -> MappingStore {
204 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
205 MappingStore::new(gen, None)
206 }
207
208 #[test]
209 fn basic_yaml_replacement() {
210 let store = make_store();
211 let proc = YamlProcessor;
212
213 let content = b"database:\n host: db.corp.com\n password: s3cret\nport: 5432\n";
214 let profile = FileTypeProfile::new(
215 "yaml",
216 vec![
217 FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
218 FieldRule::new("database.host").with_category(Category::Hostname),
219 ],
220 );
221
222 let result = proc.process(content, &profile, &store).unwrap();
223 let out = String::from_utf8(result).unwrap();
224
225 assert!(!out.contains("s3cret"));
226 assert!(!out.contains("db.corp.com"));
227 assert!(out.contains("5432"));
229 }
230
231 #[test]
232 fn yaml_sequence_traversal() {
233 let store = make_store();
234 let proc = YamlProcessor;
235
236 let content = b"users:\n - email: a@b.com\n - email: c@d.com\n";
237 let profile = FileTypeProfile::new(
238 "yaml",
239 vec![FieldRule::new("users.email").with_category(Category::Email)],
240 );
241
242 let result = proc.process(content, &profile, &store).unwrap();
243 let out = String::from_utf8(result).unwrap();
244
245 assert!(!out.contains("a@b.com"));
246 assert!(!out.contains("c@d.com"));
247 }
248}