sanitize_engine/processor/
xml_proc.rs1use crate::error::{Result, SanitizeError};
15use crate::processor::limits::{DEFAULT_INPUT_SIZE, XML_DEPTH};
16use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
17use crate::store::MappingStore;
18use quick_xml::events::{BytesStart, BytesText, Event};
19use quick_xml::{Reader, Writer};
20use std::io::Cursor;
21
22pub struct XmlProcessor;
24
25impl Processor for XmlProcessor {
26 fn name(&self) -> &'static str {
27 "xml"
28 }
29
30 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
31 if profile.processor == "xml" {
32 return true;
33 }
34 let trimmed = content
35 .iter()
36 .copied()
37 .skip_while(|b| b.is_ascii_whitespace())
38 .take(5)
39 .collect::<Vec<u8>>();
40 trimmed.starts_with(b"<?xml") || trimmed.starts_with(b"<")
41 }
42
43 fn process(
44 &self,
45 content: &[u8],
46 profile: &FileTypeProfile,
47 store: &MappingStore,
48 ) -> Result<Vec<u8>> {
49 if content.len() > DEFAULT_INPUT_SIZE {
51 return Err(SanitizeError::InputTooLarge {
52 size: content.len(),
53 limit: DEFAULT_INPUT_SIZE,
54 });
55 }
56
57 let mut reader = Reader::from_reader(content);
60 reader.trim_text(false);
61
62 let mut writer = Writer::new(Cursor::new(Vec::new()));
63 let mut element_stack: Vec<String> = Vec::new();
64 let mut buf = Vec::new();
65
66 loop {
67 match reader.read_event_into(&mut buf) {
68 Ok(Event::Start(ref e)) => {
69 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
70 element_stack.push(name.clone());
71
72 if element_stack.len() > XML_DEPTH {
73 return Err(SanitizeError::RecursionDepthExceeded(format!(
74 "XML element depth exceeds limit of {XML_DEPTH}"
75 )));
76 }
77
78 let current_path = element_stack.join("/");
80 let new_elem = process_attributes(e, ¤t_path, profile, store)?;
81 writer.write_event(Event::Start(new_elem)).map_err(|e| {
82 SanitizeError::IoError(std::io::Error::other(format!(
83 "XML write error: {e}"
84 )))
85 })?;
86 }
87 Ok(Event::End(ref e)) => {
88 writer.write_event(Event::End(e.clone())).map_err(|e| {
89 SanitizeError::IoError(std::io::Error::other(format!(
90 "XML write error: {e}"
91 )))
92 })?;
93 element_stack.pop();
94 }
95 Ok(Event::Empty(ref e)) => {
96 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
97 let path = if element_stack.is_empty() {
98 name.clone()
99 } else {
100 format!("{}/{}", element_stack.join("/"), name)
101 };
102 let new_elem = process_attributes(e, &path, profile, store)?;
103 writer.write_event(Event::Empty(new_elem)).map_err(|e| {
104 SanitizeError::IoError(std::io::Error::other(format!(
105 "XML write error: {e}"
106 )))
107 })?;
108 }
109 Ok(Event::Text(ref e)) => {
110 let current_path = element_stack.join("/");
111 if let Some(rule) = find_matching_rule(¤t_path, profile) {
112 let text = e.unescape().map_err(|e| SanitizeError::ParseError {
113 format: "XML".into(),
114 message: format!("XML decode error: {}", e),
115 })?;
116 let replaced = replace_value(&text, rule, store)?;
117 writer
118 .write_event(Event::Text(BytesText::new(&replaced)))
119 .map_err(|e| {
120 SanitizeError::IoError(std::io::Error::other(format!(
121 "XML write error: {e}"
122 )))
123 })?;
124 } else {
125 writer.write_event(Event::Text(e.clone())).map_err(|e| {
126 SanitizeError::IoError(std::io::Error::other(format!(
127 "XML write error: {e}"
128 )))
129 })?;
130 }
131 }
132 Ok(Event::Eof) => break,
133 Ok(e) => {
134 writer.write_event(e).map_err(|er| {
135 SanitizeError::IoError(std::io::Error::other(format!(
136 "XML write error: {er}"
137 )))
138 })?;
139 }
140 Err(e) => {
141 return Err(SanitizeError::ParseError {
142 format: "XML".into(),
143 message: format!("XML parse error: {}", e),
144 });
145 }
146 }
147 buf.clear();
148 }
149
150 let result = writer.into_inner().into_inner();
151 Ok(result)
152 }
153}
154
155fn process_attributes(
157 elem: &BytesStart<'_>,
158 element_path: &str,
159 profile: &FileTypeProfile,
160 store: &MappingStore,
161) -> Result<BytesStart<'static>> {
162 let name = elem.name();
163 let mut new_elem = BytesStart::new(String::from_utf8_lossy(name.as_ref()).to_string());
164
165 for attr_result in elem.attributes() {
166 let attr = attr_result.map_err(|e| SanitizeError::ParseError {
167 format: "XML".into(),
168 message: format!("XML attribute error: {}", e),
169 })?;
170 let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
171 let attr_path = format!("{}/@{}", element_path, attr_key);
172
173 if let Some(rule) = find_matching_rule(&attr_path, profile) {
174 let attr_value = attr
175 .unescape_value()
176 .map_err(|e| SanitizeError::ParseError {
177 format: "XML".into(),
178 message: format!("XML attr decode error: {}", e),
179 })?;
180 let replaced = replace_value(&attr_value, rule, store)?;
181 new_elem.push_attribute((attr_key.as_str(), replaced.as_str()));
182 } else {
183 let attr_value = attr
184 .unescape_value()
185 .map_err(|e| SanitizeError::ParseError {
186 format: "XML".into(),
187 message: format!("XML attr decode error: {}", e),
188 })?;
189 new_elem.push_attribute((attr_key.as_str(), attr_value.as_ref()));
190 }
191 }
192
193 Ok(new_elem)
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199 use crate::category::Category;
200 use crate::generator::HmacGenerator;
201 use crate::processor::profile::FieldRule;
202 use std::fmt::Write as _;
203 use std::sync::Arc;
204
205 fn make_store() -> MappingStore {
206 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
207 MappingStore::new(gen, None)
208 }
209
210 #[test]
211 fn basic_xml_text_replacement() {
212 let store = make_store();
213 let proc = XmlProcessor;
214
215 let content =
216 b"<config><database><password>s3cret</password><port>5432</port></database></config>";
217 let profile = FileTypeProfile::new(
218 "xml",
219 vec![FieldRule::new("config/database/password")
220 .with_category(Category::Custom("pw".into()))],
221 );
222
223 let result = proc.process(content, &profile, &store).unwrap();
224 let out = String::from_utf8(result).unwrap();
225
226 assert!(!out.contains("s3cret"));
227 assert!(out.contains("<port>5432</port>"));
228 }
229
230 #[test]
231 fn xml_attribute_replacement() {
232 let store = make_store();
233 let proc = XmlProcessor;
234
235 let content = b"<config><connection host=\"db.corp.com\" port=\"5432\"/></config>";
236 let profile = FileTypeProfile::new(
237 "xml",
238 vec![FieldRule::new("config/connection/@host").with_category(Category::Hostname)],
239 );
240
241 let result = proc.process(content, &profile, &store).unwrap();
242 let out = String::from_utf8(result).unwrap();
243
244 assert!(!out.contains("db.corp.com"));
245 assert!(out.contains("5432"));
246 }
247
248 #[test]
249 fn can_handle_xml_declaration() {
250 let proc = XmlProcessor;
251 let profile = FileTypeProfile::new("other", vec![]).with_extension(".txt");
252 assert!(proc.can_handle(b"<?xml version=\"1.0\"?><root/>", &profile));
253 }
254
255 #[test]
256 fn can_handle_bare_tag() {
257 let proc = XmlProcessor;
258 let profile = FileTypeProfile::new("other", vec![]).with_extension(".txt");
259 assert!(proc.can_handle(b"<root><child/></root>", &profile));
260 }
261
262 #[test]
263 fn can_handle_by_profile_name() {
264 let proc = XmlProcessor;
265 let profile = FileTypeProfile::new("xml", vec![]).with_extension(".xml");
266 assert!(proc.can_handle(b"not xml at all", &profile));
267 }
268
269 #[test]
270 fn can_handle_rejects_plaintext() {
271 let proc = XmlProcessor;
272 let profile = FileTypeProfile::new("json", vec![]).with_extension(".json");
273 assert!(!proc.can_handle(b"just some plain text", &profile));
274 }
275
276 #[test]
277 fn empty_element_attributes_replaced() {
278 let store = make_store();
279 let proc = XmlProcessor;
280 let content = b"<config><server host=\"prod.corp.com\" port=\"443\"/></config>";
281 let profile = FileTypeProfile::new(
282 "xml",
283 vec![FieldRule::new("config/server/@host").with_category(Category::Hostname)],
284 );
285 let result = proc.process(content, &profile, &store).unwrap();
286 let out = String::from_utf8(result).unwrap();
287 assert!(!out.contains("prod.corp.com"));
288 assert!(out.contains("443"));
289 }
290
291 #[test]
292 fn empty_element_at_root_level() {
293 let store = make_store();
294 let proc = XmlProcessor;
295 let content = b"<server host=\"root.corp.com\"/>";
296 let profile = FileTypeProfile::new(
297 "xml",
298 vec![FieldRule::new("server/@host").with_category(Category::Hostname)],
299 );
300 let result = proc.process(content, &profile, &store).unwrap();
301 let out = String::from_utf8(result).unwrap();
302 assert!(!out.contains("root.corp.com"));
303 }
304
305 #[test]
306 fn unmatched_attributes_pass_through() {
307 let store = make_store();
308 let proc = XmlProcessor;
309 let content = b"<config><db host=\"db.corp.com\" port=\"5432\"/></config>";
310 let profile = FileTypeProfile::new("xml", vec![]); let result = proc.process(content, &profile, &store).unwrap();
312 let out = String::from_utf8(result).unwrap();
313 assert!(out.contains("db.corp.com"));
314 assert!(out.contains("5432"));
315 }
316
317 #[test]
318 fn other_xml_events_pass_through() {
319 let store = make_store();
320 let proc = XmlProcessor;
321 let content = b"<?xml version=\"1.0\"?><!-- comment --><root><child>value</child></root>";
322 let profile = FileTypeProfile::new("xml", vec![]);
323 let result = proc.process(content, &profile, &store).unwrap();
324 let out = String::from_utf8(result).unwrap();
325 assert!(out.contains("value"));
326 }
327
328 #[test]
329 fn depth_limit_exceeded_returns_error() {
330 let store = make_store();
331 let proc = XmlProcessor;
332 let open: String = (0..260).fold(String::new(), |mut s, i| {
334 write!(s, "<l{i}>").unwrap();
335 s
336 });
337 let close: String = (0..260).rev().fold(String::new(), |mut s, i| {
338 write!(s, "</l{i}>").unwrap();
339 s
340 });
341 let content = format!("{open}secret{close}");
342 let profile = FileTypeProfile::new("xml", vec![]);
343 let err = proc
344 .process(content.as_bytes(), &profile, &store)
345 .unwrap_err();
346 assert!(matches!(
347 err,
348 crate::error::SanitizeError::RecursionDepthExceeded(_)
349 ));
350 }
351}