sanitize_engine/processor/
yaml_proc.rs1use crate::error::{Result, SanitizeError};
11use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
12use crate::store::MappingStore;
13use serde_yaml_ng::Value;
14
15const MAX_YAML_DEPTH: usize = 128;
17
18const MAX_YAML_INPUT_SIZE: usize = 64 * 1024 * 1024; const MAX_YAML_NODE_COUNT: usize = 10_000_000;
27
28pub struct YamlProcessor;
30
31impl Processor for YamlProcessor {
32 fn name(&self) -> &'static str {
33 "yaml"
34 }
35
36 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
37 if profile.processor == "yaml" {
38 return true;
39 }
40 let text = String::from_utf8_lossy(content);
42 let trimmed = text.trim_start();
43 trimmed.starts_with("---") || trimmed.starts_with("- ") || trimmed.contains(": ")
44 }
45
46 fn process(
47 &self,
48 content: &[u8],
49 profile: &FileTypeProfile,
50 store: &MappingStore,
51 ) -> Result<Vec<u8>> {
52 if content.len() > MAX_YAML_INPUT_SIZE {
54 return Err(SanitizeError::InputTooLarge {
55 size: content.len(),
56 limit: MAX_YAML_INPUT_SIZE,
57 });
58 }
59
60 let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
61 format: "YAML".into(),
62 message: format!("invalid UTF-8: {}", e),
63 })?;
64
65 let mut value: Value =
66 serde_yaml_ng::from_str(text).map_err(|e| SanitizeError::ParseError {
67 format: "YAML".into(),
68 message: format!("YAML parse error: {}", e),
69 })?;
70
71 let node_count = count_yaml_nodes(&value);
76 if node_count > MAX_YAML_NODE_COUNT {
77 return Err(SanitizeError::InputTooLarge {
78 size: node_count,
79 limit: MAX_YAML_NODE_COUNT,
80 });
81 }
82
83 walk_yaml(&mut value, "", profile, store, 0)?;
84
85 let output = serde_yaml_ng::to_string(&value)
86 .map_err(|e| SanitizeError::IoError(format!("YAML serialize error: {}", e)))?;
87
88 Ok(output.into_bytes())
89 }
90}
91
92fn count_yaml_nodes(value: &Value) -> usize {
96 count_yaml_nodes_inner(value, 0)
97}
98
99fn count_yaml_nodes_inner(value: &Value, depth: usize) -> usize {
102 if depth > MAX_YAML_DEPTH {
103 return 1; }
105 match value {
106 Value::Mapping(map) => {
107 1 + map
108 .iter()
109 .map(|(k, v)| {
110 count_yaml_nodes_inner(k, depth + 1) + count_yaml_nodes_inner(v, depth + 1)
111 })
112 .sum::<usize>()
113 }
114 Value::Sequence(seq) => {
115 1 + seq
116 .iter()
117 .map(|v| count_yaml_nodes_inner(v, depth + 1))
118 .sum::<usize>()
119 }
120 Value::Tagged(tagged) => 1 + count_yaml_nodes_inner(&tagged.value, depth + 1),
121 _ => 1, }
123}
124
125fn walk_yaml(
127 value: &mut Value,
128 prefix: &str,
129 profile: &FileTypeProfile,
130 store: &MappingStore,
131 depth: usize,
132) -> Result<()> {
133 if depth > MAX_YAML_DEPTH {
134 return Err(SanitizeError::RecursionDepthExceeded(format!(
135 "YAML recursion depth exceeds limit of {MAX_YAML_DEPTH}"
136 )));
137 }
138 match value {
139 Value::Mapping(map) => {
140 let keys: Vec<Value> = map.keys().cloned().collect();
141 for key in keys {
142 let key_str = yaml_key_to_string(&key);
143 let path = if prefix.is_empty() {
144 key_str.clone()
145 } else {
146 format!("{}.{}", prefix, key_str)
147 };
148
149 if let Some(v) = map.get_mut(&key) {
150 match v {
151 Value::String(s) => {
152 if let Some(rule) = find_matching_rule(&path, profile) {
153 *s = replace_value(s, rule, store)?;
154 }
155 }
156 Value::Number(_) | Value::Bool(_) => {
157 if let Some(rule) = find_matching_rule(&path, profile) {
158 let repr = yaml_scalar_to_string(v);
159 let replaced = replace_value(&repr, rule, store)?;
160 *v = Value::String(replaced);
161 }
162 }
163 Value::Mapping(_) | Value::Sequence(_) => {
164 walk_yaml(v, &path, profile, store, depth + 1)?;
165 }
166 Value::Null | Value::Tagged(_) => {}
167 }
168 }
169 }
170 }
171 Value::Sequence(seq) => {
172 for item in seq.iter_mut() {
173 walk_yaml(item, prefix, profile, store, depth + 1)?;
174 }
175 }
176 _ => {}
177 }
178 Ok(())
179}
180
181fn yaml_key_to_string(key: &Value) -> String {
182 match key {
183 Value::String(s) => s.clone(),
184 Value::Number(n) => n.to_string(),
185 Value::Bool(b) => b.to_string(),
186 _ => format!("{:?}", key),
187 }
188}
189
190fn yaml_scalar_to_string(v: &Value) -> String {
191 match v {
192 Value::String(s) => s.clone(),
193 Value::Number(n) => n.to_string(),
194 Value::Bool(b) => b.to_string(),
195 _ => String::new(),
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202 use crate::category::Category;
203 use crate::generator::HmacGenerator;
204 use crate::processor::profile::FieldRule;
205 use std::sync::Arc;
206
207 fn make_store() -> MappingStore {
208 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
209 MappingStore::new(gen, None)
210 }
211
212 #[test]
213 fn basic_yaml_replacement() {
214 let store = make_store();
215 let proc = YamlProcessor;
216
217 let content = b"database:\n host: db.corp.com\n password: s3cret\nport: 5432\n";
218 let profile = FileTypeProfile::new(
219 "yaml",
220 vec![
221 FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
222 FieldRule::new("database.host").with_category(Category::Hostname),
223 ],
224 );
225
226 let result = proc.process(content, &profile, &store).unwrap();
227 let out = String::from_utf8(result).unwrap();
228
229 assert!(!out.contains("s3cret"));
230 assert!(!out.contains("db.corp.com"));
231 assert!(out.contains("5432"));
233 }
234
235 #[test]
236 fn yaml_sequence_traversal() {
237 let store = make_store();
238 let proc = YamlProcessor;
239
240 let content = b"users:\n - email: a@b.com\n - email: c@d.com\n";
241 let profile = FileTypeProfile::new(
242 "yaml",
243 vec![FieldRule::new("users.email").with_category(Category::Email)],
244 );
245
246 let result = proc.process(content, &profile, &store).unwrap();
247 let out = String::from_utf8(result).unwrap();
248
249 assert!(!out.contains("a@b.com"));
250 assert!(!out.contains("c@d.com"));
251 }
252}