Skip to main content

sanitize_engine/processor/
xml_proc.rs

1//! XML structured processor.
2//!
3//! Uses `quick-xml` to parse and rewrite XML, preserving the document
4//! structure, attributes, and non-matched content.
5//!
6//! # Key Paths
7//!
8//! Element paths are slash-separated: `database/password`. Attributes
9//! are expressed as `element/@attr` (e.g. `connection/@host`).
10//!
11//! For simplicity this processor tracks the element stack and matches
12//! text content of elements and attribute values against field rules.
13
14use crate::error::{Result, SanitizeError};
15use crate::processor::limits::{DEFAULT_INPUT_SIZE, XML_DEPTH};
16use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
17use crate::store::MappingStore;
18use quick_xml::events::{BytesStart, BytesText, Event};
19use quick_xml::{Reader, Writer};
20use std::io::Cursor;
21
22/// Structured processor for XML files.
23pub struct XmlProcessor;
24
25impl Processor for XmlProcessor {
26    fn name(&self) -> &'static str {
27        "xml"
28    }
29
30    fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
31        if profile.processor == "xml" {
32            return true;
33        }
34        let trimmed = content
35            .iter()
36            .copied()
37            .skip_while(|b| b.is_ascii_whitespace())
38            .take(5)
39            .collect::<Vec<u8>>();
40        trimmed.starts_with(b"<?xml") || trimmed.starts_with(b"<")
41    }
42
43    fn process(
44        &self,
45        content: &[u8],
46        profile: &FileTypeProfile,
47        store: &MappingStore,
48    ) -> Result<Vec<u8>> {
49        // F-04 fix: enforce input size limit.
50        if content.len() > DEFAULT_INPUT_SIZE {
51            return Err(SanitizeError::InputTooLarge {
52                size: content.len(),
53                limit: DEFAULT_INPUT_SIZE,
54            });
55        }
56
57        // Security: quick-xml disables external entity expansion by default,
58        // so XXE attacks are not possible with this configuration.
59        let mut reader = Reader::from_reader(content);
60        reader.trim_text(false);
61
62        let mut writer = Writer::new(Cursor::new(Vec::new()));
63        let mut element_stack: Vec<String> = Vec::new();
64        let mut buf = Vec::new();
65
66        loop {
67            match reader.read_event_into(&mut buf) {
68                Ok(Event::Start(ref e)) => {
69                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
70                    element_stack.push(name.clone());
71
72                    if element_stack.len() > XML_DEPTH {
73                        return Err(SanitizeError::RecursionDepthExceeded(format!(
74                            "XML element depth exceeds limit of {XML_DEPTH}"
75                        )));
76                    }
77
78                    // Process attributes.
79                    let current_path = element_stack.join("/");
80                    let new_elem = process_attributes(e, &current_path, profile, store)?;
81                    writer
82                        .write_event(Event::Start(new_elem))
83                        .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
84                }
85                Ok(Event::End(ref e)) => {
86                    writer
87                        .write_event(Event::End(e.clone()))
88                        .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
89                    element_stack.pop();
90                }
91                Ok(Event::Empty(ref e)) => {
92                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
93                    let path = if element_stack.is_empty() {
94                        name.clone()
95                    } else {
96                        format!("{}/{}", element_stack.join("/"), name)
97                    };
98                    let new_elem = process_attributes(e, &path, profile, store)?;
99                    writer
100                        .write_event(Event::Empty(new_elem))
101                        .map_err(|e| SanitizeError::IoError(format!("XML write error: {}", e)))?;
102                }
103                Ok(Event::Text(ref e)) => {
104                    let current_path = element_stack.join("/");
105                    if let Some(rule) = find_matching_rule(&current_path, profile) {
106                        let text = e.unescape().map_err(|e| SanitizeError::ParseError {
107                            format: "XML".into(),
108                            message: format!("XML decode error: {}", e),
109                        })?;
110                        let replaced = replace_value(&text, rule, store)?;
111                        writer
112                            .write_event(Event::Text(BytesText::new(&replaced)))
113                            .map_err(|e| {
114                                SanitizeError::IoError(format!("XML write error: {}", e))
115                            })?;
116                    } else {
117                        writer.write_event(Event::Text(e.clone())).map_err(|e| {
118                            SanitizeError::IoError(format!("XML write error: {}", e))
119                        })?;
120                    }
121                }
122                Ok(Event::Eof) => break,
123                Ok(e) => {
124                    writer
125                        .write_event(e)
126                        .map_err(|er| SanitizeError::IoError(format!("XML write error: {}", er)))?;
127                }
128                Err(e) => {
129                    return Err(SanitizeError::ParseError {
130                        format: "XML".into(),
131                        message: format!("XML parse error: {}", e),
132                    });
133                }
134            }
135            buf.clear();
136        }
137
138        let result = writer.into_inner().into_inner();
139        Ok(result)
140    }
141}
142
143/// Process attributes of an element, replacing matched ones.
144fn process_attributes(
145    elem: &BytesStart<'_>,
146    element_path: &str,
147    profile: &FileTypeProfile,
148    store: &MappingStore,
149) -> Result<BytesStart<'static>> {
150    let name = elem.name();
151    let mut new_elem = BytesStart::new(String::from_utf8_lossy(name.as_ref()).to_string());
152
153    for attr_result in elem.attributes() {
154        let attr = attr_result.map_err(|e| SanitizeError::ParseError {
155            format: "XML".into(),
156            message: format!("XML attribute error: {}", e),
157        })?;
158        let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
159        let attr_path = format!("{}/@{}", element_path, attr_key);
160
161        if let Some(rule) = find_matching_rule(&attr_path, profile) {
162            let attr_value = attr
163                .unescape_value()
164                .map_err(|e| SanitizeError::ParseError {
165                    format: "XML".into(),
166                    message: format!("XML attr decode error: {}", e),
167                })?;
168            let replaced = replace_value(&attr_value, rule, store)?;
169            new_elem.push_attribute((attr_key.as_str(), replaced.as_str()));
170        } else {
171            let attr_value = attr
172                .unescape_value()
173                .map_err(|e| SanitizeError::ParseError {
174                    format: "XML".into(),
175                    message: format!("XML attr decode error: {}", e),
176                })?;
177            new_elem.push_attribute((attr_key.as_str(), attr_value.as_ref()));
178        }
179    }
180
181    Ok(new_elem)
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187    use crate::category::Category;
188    use crate::generator::HmacGenerator;
189    use crate::processor::profile::FieldRule;
190    use std::sync::Arc;
191
192    fn make_store() -> MappingStore {
193        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
194        MappingStore::new(gen, None)
195    }
196
197    #[test]
198    fn basic_xml_text_replacement() {
199        let store = make_store();
200        let proc = XmlProcessor;
201
202        let content =
203            b"<config><database><password>s3cret</password><port>5432</port></database></config>";
204        let profile = FileTypeProfile::new(
205            "xml",
206            vec![FieldRule::new("config/database/password")
207                .with_category(Category::Custom("pw".into()))],
208        );
209
210        let result = proc.process(content, &profile, &store).unwrap();
211        let out = String::from_utf8(result).unwrap();
212
213        assert!(!out.contains("s3cret"));
214        assert!(out.contains("<port>5432</port>"));
215    }
216
217    #[test]
218    fn xml_attribute_replacement() {
219        let store = make_store();
220        let proc = XmlProcessor;
221
222        let content = b"<config><connection host=\"db.corp.com\" port=\"5432\"/></config>";
223        let profile = FileTypeProfile::new(
224            "xml",
225            vec![FieldRule::new("config/connection/@host").with_category(Category::Hostname)],
226        );
227
228        let result = proc.process(content, &profile, &store).unwrap();
229        let out = String::from_utf8(result).unwrap();
230
231        assert!(!out.contains("db.corp.com"));
232        assert!(out.contains("5432"));
233    }
234}