sanitize_engine/processor/
xml_proc.rs1use crate::error::{Result, SanitizeError};
15use crate::processor::limits::{DEFAULT_INPUT_SIZE, XML_DEPTH};
16use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
17use crate::store::MappingStore;
18use quick_xml::events::{BytesStart, BytesText, Event};
19use quick_xml::{Reader, Writer};
20use std::io::Cursor;
21
22pub struct XmlProcessor;
24
25impl Processor for XmlProcessor {
26 fn name(&self) -> &'static str {
27 "xml"
28 }
29
30 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
31 if profile.processor == "xml" {
32 return true;
33 }
34 let trimmed = content
35 .iter()
36 .copied()
37 .skip_while(|b| b.is_ascii_whitespace())
38 .take(5)
39 .collect::<Vec<u8>>();
40 trimmed.starts_with(b"<?xml") || trimmed.starts_with(b"<")
41 }
42
43 fn process(
44 &self,
45 content: &[u8],
46 profile: &FileTypeProfile,
47 store: &MappingStore,
48 ) -> Result<Vec<u8>> {
49 if content.len() > DEFAULT_INPUT_SIZE {
51 return Err(SanitizeError::InputTooLarge {
52 size: content.len(),
53 limit: DEFAULT_INPUT_SIZE,
54 });
55 }
56
57 let mut reader = Reader::from_reader(content);
60 reader.trim_text(false);
61
62 let mut writer = Writer::new(Cursor::new(Vec::new()));
63 let mut element_stack: Vec<String> = Vec::new();
64 let mut buf = Vec::new();
65
66 loop {
67 match reader.read_event_into(&mut buf) {
68 Ok(Event::Start(ref e)) => {
69 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
70 element_stack.push(name.clone());
71
72 if element_stack.len() > XML_DEPTH {
73 return Err(SanitizeError::RecursionDepthExceeded(format!(
74 "XML element depth exceeds limit of {XML_DEPTH}"
75 )));
76 }
77
78 let current_path = element_stack.join("/");
80 let new_elem = process_attributes(e, ¤t_path, profile, store)?;
81 writer
82 .write_event(Event::Start(new_elem))
83 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
84 }
85 Ok(Event::End(ref e)) => {
86 writer
87 .write_event(Event::End(e.clone()))
88 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
89 element_stack.pop();
90 }
91 Ok(Event::Empty(ref e)) => {
92 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
93 let path = if element_stack.is_empty() {
94 name.clone()
95 } else {
96 format!("{}/{}", element_stack.join("/"), name)
97 };
98 let new_elem = process_attributes(e, &path, profile, store)?;
99 writer
100 .write_event(Event::Empty(new_elem))
101 .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
102 }
103 Ok(Event::Text(ref e)) => {
104 let current_path = element_stack.join("/");
105 if let Some(rule) = find_matching_rule(¤t_path, profile) {
106 let text = e.unescape().map_err(|e| SanitizeError::ParseError {
107 format: "XML".into(),
108 message: format!("XML decode error: {}", e),
109 })?;
110 let replaced = replace_value(&text, rule, store)?;
111 writer
112 .write_event(Event::Text(BytesText::new(&replaced)))
113 .map_err(|e| {
114 SanitizeError::IoError(format!("XML write error: {}", e))
115 })?;
116 } else {
117 writer.write_event(Event::Text(e.clone())).map_err(|e| {
118 SanitizeError::IoError(format!("XML write error: {}", e))
119 })?;
120 }
121 }
122 Ok(Event::Eof) => break,
123 Ok(e) => {
124 writer
125 .write_event(e)
126 .map_err(|er| SanitizeError::IoError(format!("XML write error: {}", er)))?;
127 }
128 Err(e) => {
129 return Err(SanitizeError::ParseError {
130 format: "XML".into(),
131 message: format!("XML parse error: {}", e),
132 });
133 }
134 }
135 buf.clear();
136 }
137
138 let result = writer.into_inner().into_inner();
139 Ok(result)
140 }
141}
142
143fn process_attributes(
145 elem: &BytesStart<'_>,
146 element_path: &str,
147 profile: &FileTypeProfile,
148 store: &MappingStore,
149) -> Result<BytesStart<'static>> {
150 let name = elem.name();
151 let mut new_elem = BytesStart::new(String::from_utf8_lossy(name.as_ref()).to_string());
152
153 for attr_result in elem.attributes() {
154 let attr = attr_result.map_err(|e| SanitizeError::ParseError {
155 format: "XML".into(),
156 message: format!("XML attribute error: {}", e),
157 })?;
158 let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
159 let attr_path = format!("{}/@{}", element_path, attr_key);
160
161 if let Some(rule) = find_matching_rule(&attr_path, profile) {
162 let attr_value = attr
163 .unescape_value()
164 .map_err(|e| SanitizeError::ParseError {
165 format: "XML".into(),
166 message: format!("XML attr decode error: {}", e),
167 })?;
168 let replaced = replace_value(&attr_value, rule, store)?;
169 new_elem.push_attribute((attr_key.as_str(), replaced.as_str()));
170 } else {
171 let attr_value = attr
172 .unescape_value()
173 .map_err(|e| SanitizeError::ParseError {
174 format: "XML".into(),
175 message: format!("XML attr decode error: {}", e),
176 })?;
177 new_elem.push_attribute((attr_key.as_str(), attr_value.as_ref()));
178 }
179 }
180
181 Ok(new_elem)
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187 use crate::category::Category;
188 use crate::generator::HmacGenerator;
189 use crate::processor::profile::FieldRule;
190 use std::sync::Arc;
191
192 fn make_store() -> MappingStore {
193 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
194 MappingStore::new(gen, None)
195 }
196
197 #[test]
198 fn basic_xml_text_replacement() {
199 let store = make_store();
200 let proc = XmlProcessor;
201
202 let content =
203 b"<config><database><password>s3cret</password><port>5432</port></database></config>";
204 let profile = FileTypeProfile::new(
205 "xml",
206 vec![FieldRule::new("config/database/password")
207 .with_category(Category::Custom("pw".into()))],
208 );
209
210 let result = proc.process(content, &profile, &store).unwrap();
211 let out = String::from_utf8(result).unwrap();
212
213 assert!(!out.contains("s3cret"));
214 assert!(out.contains("<port>5432</port>"));
215 }
216
217 #[test]
218 fn xml_attribute_replacement() {
219 let store = make_store();
220 let proc = XmlProcessor;
221
222 let content = b"<config><connection host=\"db.corp.com\" port=\"5432\"/></config>";
223 let profile = FileTypeProfile::new(
224 "xml",
225 vec![FieldRule::new("config/connection/@host").with_category(Category::Hostname)],
226 );
227
228 let result = proc.process(content, &profile, &store).unwrap();
229 let out = String::from_utf8(result).unwrap();
230
231 assert!(!out.contains("db.corp.com"));
232 assert!(out.contains("5432"));
233 }
234}