sanitize_engine/processor/
xml_proc.rs1use crate::error::{Result, SanitizeError};
15use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
16use crate::store::MappingStore;
17use quick_xml::events::{BytesStart, BytesText, Event};
18use quick_xml::{Reader, Writer};
19use std::io::Cursor;
20
21const MAX_XML_DEPTH: usize = 256;
24
25const MAX_XML_INPUT_SIZE: usize = 256 * 1024 * 1024; pub struct XmlProcessor;
30
31impl Processor for XmlProcessor {
32 fn name(&self) -> &'static str {
33 "xml"
34 }
35
36 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
37 if profile.processor == "xml" {
38 return true;
39 }
40 let trimmed = content
41 .iter()
42 .copied()
43 .skip_while(|b| b.is_ascii_whitespace())
44 .take(5)
45 .collect::<Vec<u8>>();
46 trimmed.starts_with(b"<?xml") || trimmed.starts_with(b"<")
47 }
48
49 fn process(
50 &self,
51 content: &[u8],
52 profile: &FileTypeProfile,
53 store: &MappingStore,
54 ) -> Result<Vec<u8>> {
55 if content.len() > MAX_XML_INPUT_SIZE {
57 return Err(SanitizeError::InputTooLarge {
58 size: content.len(),
59 limit: MAX_XML_INPUT_SIZE,
60 });
61 }
62
63 let mut reader = Reader::from_reader(content);
66 reader.trim_text(false);
67
68 let mut writer = Writer::new(Cursor::new(Vec::new()));
69 let mut element_stack: Vec<String> = Vec::new();
70 let mut buf = Vec::new();
71
72 loop {
73 match reader.read_event_into(&mut buf) {
74 Ok(Event::Start(ref e)) => {
75 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
76 element_stack.push(name.clone());
77
78 if element_stack.len() > MAX_XML_DEPTH {
79 return Err(SanitizeError::RecursionDepthExceeded(format!(
80 "XML element depth exceeds limit of {MAX_XML_DEPTH}"
81 )));
82 }
83
84 let current_path = element_stack.join("/");
86 let new_elem = process_attributes(e, ¤t_path, profile, store)?;
87 writer
88 .write_event(Event::Start(new_elem))
89 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
90 }
91 Ok(Event::End(ref e)) => {
92 writer
93 .write_event(Event::End(e.clone()))
94 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
95 element_stack.pop();
96 }
97 Ok(Event::Empty(ref e)) => {
98 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
99 let path = if element_stack.is_empty() {
100 name.clone()
101 } else {
102 format!("{}/{}", element_stack.join("/"), name)
103 };
104 let new_elem = process_attributes(e, &path, profile, store)?;
105 writer
106 .write_event(Event::Empty(new_elem))
107 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
108 }
109 Ok(Event::Text(ref e)) => {
110 let current_path = element_stack.join("/");
111 if let Some(rule) = find_matching_rule(¤t_path, profile) {
112 let text = e.unescape().map_err(|e| SanitizeError::ParseError {
113 format: "XML".into(),
114 message: format!("XML decode error: {}", e),
115 })?;
116 let replaced = replace_value(&text, rule, store)?;
117 writer
118 .write_event(Event::Text(BytesText::new(&replaced)))
119 .map_err(|e| {
120 SanitizeError::IoError(format!("XML write error: {}", e))
121 })?;
122 } else {
123 writer.write_event(Event::Text(e.clone())).map_err(|e| {
124 SanitizeError::IoError(format!("XML write error: {}", e))
125 })?;
126 }
127 }
128 Ok(Event::Eof) => break,
129 Ok(e) => {
130 writer
131 .write_event(e)
132 .map_err(|er| SanitizeError::IoError(format!("XML write error: {}", er)))?;
133 }
134 Err(e) => {
135 return Err(SanitizeError::ParseError {
136 format: "XML".into(),
137 message: format!("XML parse error: {}", e),
138 });
139 }
140 }
141 buf.clear();
142 }
143
144 let result = writer.into_inner().into_inner();
145 Ok(result)
146 }
147}
148
149fn process_attributes(
151 elem: &BytesStart<'_>,
152 element_path: &str,
153 profile: &FileTypeProfile,
154 store: &MappingStore,
155) -> Result<BytesStart<'static>> {
156 let name = elem.name();
157 let mut new_elem = BytesStart::new(String::from_utf8_lossy(name.as_ref()).to_string());
158
159 for attr_result in elem.attributes() {
160 let attr = attr_result.map_err(|e| SanitizeError::ParseError {
161 format: "XML".into(),
162 message: format!("XML attribute error: {}", e),
163 })?;
164 let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
165 let attr_path = format!("{}/@{}", element_path, attr_key);
166
167 if let Some(rule) = find_matching_rule(&attr_path, profile) {
168 let attr_value = attr
169 .unescape_value()
170 .map_err(|e| SanitizeError::ParseError {
171 format: "XML".into(),
172 message: format!("XML attr decode error: {}", e),
173 })?;
174 let replaced = replace_value(&attr_value, rule, store)?;
175 new_elem.push_attribute((attr_key.as_str(), replaced.as_str()));
176 } else {
177 let attr_value = attr
178 .unescape_value()
179 .map_err(|e| SanitizeError::ParseError {
180 format: "XML".into(),
181 message: format!("XML attr decode error: {}", e),
182 })?;
183 new_elem.push_attribute((attr_key.as_str(), attr_value.as_ref()));
184 }
185 }
186
187 Ok(new_elem)
188}
189
190#[cfg(test)]
191mod tests {
192 use super::*;
193 use crate::category::Category;
194 use crate::generator::HmacGenerator;
195 use crate::processor::profile::FieldRule;
196 use std::sync::Arc;
197
198 fn make_store() -> MappingStore {
199 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
200 MappingStore::new(gen, None)
201 }
202
203 #[test]
204 fn basic_xml_text_replacement() {
205 let store = make_store();
206 let proc = XmlProcessor;
207
208 let content =
209 b"<config><database><password>s3cret</password><port>5432</port></database></config>";
210 let profile = FileTypeProfile::new(
211 "xml",
212 vec![FieldRule::new("config/database/password")
213 .with_category(Category::Custom("pw".into()))],
214 );
215
216 let result = proc.process(content, &profile, &store).unwrap();
217 let out = String::from_utf8(result).unwrap();
218
219 assert!(!out.contains("s3cret"));
220 assert!(out.contains("<port>5432</port>"));
221 }
222
223 #[test]
224 fn xml_attribute_replacement() {
225 let store = make_store();
226 let proc = XmlProcessor;
227
228 let content = b"<config><connection host=\"db.corp.com\" port=\"5432\"/></config>";
229 let profile = FileTypeProfile::new(
230 "xml",
231 vec![FieldRule::new("config/connection/@host").with_category(Category::Hostname)],
232 );
233
234 let result = proc.process(content, &profile, &store).unwrap();
235 let out = String::from_utf8(result).unwrap();
236
237 assert!(!out.contains("db.corp.com"));
238 assert!(out.contains("5432"));
239 }
240}