Skip to main content

rustack_cloudfront_http/xml/
de.rs

1//! XML deserialization of request bodies.
2//!
3//! Parses incoming XML into a generic tree representation that callers then
4//! map to domain types. This avoids a separate serde path and matches the
5//! trim-whitespace semantics AWS applies to every element.
6
7use quick_xml::{Reader, events::Event};
8
9/// A lightweight XML tree node.
10#[derive(Debug, Default, Clone)]
11pub struct Node {
12    /// Element name.
13    pub name: String,
14    /// Direct text content (trimmed).
15    pub text: String,
16    /// Child nodes in document order.
17    pub children: Vec<Node>,
18}
19
20impl Node {
21    /// Find the first direct child element with the given name.
22    #[must_use]
23    pub fn child(&self, name: &str) -> Option<&Node> {
24        self.children.iter().find(|c| c.name == name)
25    }
26
27    /// Collect children with the given name.
28    pub fn children_named<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a Node> + 'a {
29        self.children.iter().filter(move |c| c.name == name)
30    }
31
32    /// Text content of the named child, default empty.
33    #[must_use]
34    pub fn child_text<'a>(&'a self, name: &str) -> &'a str {
35        self.child(name).map_or("", |c| c.text.as_str())
36    }
37
38    /// Parse text of named child as integer.
39    #[must_use]
40    pub fn child_i32(&self, name: &str) -> i32 {
41        self.child_text(name).parse().unwrap_or(0)
42    }
43
44    /// Parse text of named child as i64.
45    #[must_use]
46    pub fn child_i64(&self, name: &str) -> i64 {
47        self.child_text(name).parse().unwrap_or(0)
48    }
49
50    /// Parse text of named child as boolean (`true` / `false`).
51    #[must_use]
52    pub fn child_bool(&self, name: &str) -> bool {
53        matches!(self.child_text(name).trim(), "true" | "True" | "TRUE" | "1")
54    }
55
56    /// Parse a `<Wrapper><Quantity/><Items>...</Items></Wrapper>` list shape.
57    #[must_use]
58    pub fn items_named<'a>(&'a self, wrapper: &'a str, item_name: &'a str) -> Vec<&'a Node> {
59        self.child(wrapper)
60            .and_then(|w| w.child("Items"))
61            .map(|items| items.children_named(item_name).collect())
62            .unwrap_or_default()
63    }
64
65    /// Parse a `<Wrapper><Items>...</Items></Wrapper>` list of string contents.
66    #[must_use]
67    pub fn string_items(&self, wrapper: &str, item_name: &str) -> Vec<String> {
68        self.items_named(wrapper, item_name)
69            .into_iter()
70            .map(|n| n.text.clone())
71            .collect()
72    }
73
74    /// Parse a `<Items>...<Item/>...</Items>` list where `Items` is a *direct* child.
75    ///
76    /// Use this when you have already descended into the wrapper node and the
77    /// current node looks like `<Wrapper><Quantity/><Items><Item/></Items></Wrapper>`.
78    #[must_use]
79    pub fn direct_items<'a>(&'a self, item_name: &'a str) -> Vec<&'a Node> {
80        self.child("Items")
81            .map(|items| items.children_named(item_name).collect())
82            .unwrap_or_default()
83    }
84
85    /// String variant of `direct_items`: returns the text of every matching item.
86    #[must_use]
87    pub fn direct_string_items(&self, item_name: &str) -> Vec<String> {
88        self.direct_items(item_name)
89            .into_iter()
90            .map(|n| n.text.clone())
91            .collect()
92    }
93}
94
95/// Parse an XML document into a tree rooted at the first element.
96///
97/// # Errors
98/// Returns a string error on malformed XML.
99pub fn parse(xml: &[u8]) -> Result<Node, String> {
100    if xml.is_empty() {
101        return Ok(Node::default());
102    }
103    let mut reader = Reader::from_reader(xml);
104    reader.config_mut().trim_text(true);
105    reader.config_mut().check_end_names = false;
106
107    let mut stack: Vec<Node> = Vec::with_capacity(16);
108    let mut root: Option<Node> = None;
109
110    loop {
111        match reader.read_event() {
112            Ok(Event::Start(e)) => {
113                let name = std::str::from_utf8(e.name().as_ref())
114                    .map_err(|err| format!("invalid utf8 in name: {err}"))?
115                    .to_owned();
116                stack.push(Node {
117                    name,
118                    ..Node::default()
119                });
120            }
121            Ok(Event::End(_)) => {
122                if let Some(done) = stack.pop() {
123                    if let Some(parent) = stack.last_mut() {
124                        parent.children.push(done);
125                    } else {
126                        root = Some(done);
127                    }
128                }
129            }
130            Ok(Event::Empty(e)) => {
131                let name = std::str::from_utf8(e.name().as_ref())
132                    .map_err(|err| format!("invalid utf8 in name: {err}"))?
133                    .to_owned();
134                let node = Node {
135                    name,
136                    ..Node::default()
137                };
138                if let Some(parent) = stack.last_mut() {
139                    parent.children.push(node);
140                } else {
141                    root = Some(node);
142                }
143            }
144            Ok(Event::Text(t)) => {
145                if let Some(top) = stack.last_mut() {
146                    let raw =
147                        std::str::from_utf8(t.as_ref()).map_err(|e| format!("text utf8: {e}"))?;
148                    top.text.push_str(&unescape_xml(raw));
149                }
150            }
151            Ok(Event::CData(c)) => {
152                if let Some(top) = stack.last_mut() {
153                    let s = std::str::from_utf8(&c).map_err(|e| format!("cdata utf8: {e}"))?;
154                    top.text.push_str(s);
155                }
156            }
157            Ok(Event::Eof) => break,
158            Ok(_) => {}
159            Err(e) => return Err(format!("xml parse error: {e}")),
160        }
161    }
162
163    root.ok_or_else(|| "empty XML document".to_owned())
164}
165
166/// Minimal XML entity unescaping (covers the five core entities plus numeric).
167fn unescape_xml(s: &str) -> String {
168    let mut out = String::with_capacity(s.len());
169    let mut rest = s;
170    while let Some(idx) = rest.find('&') {
171        out.push_str(&rest[..idx]);
172        let remainder = &rest[idx..];
173        if let Some(end) = remainder.find(';') {
174            let entity = &remainder[1..end];
175            let replaced = match entity {
176                "amp" => "&".to_owned(),
177                "lt" => "<".to_owned(),
178                "gt" => ">".to_owned(),
179                "quot" => "\"".to_owned(),
180                "apos" => "'".to_owned(),
181                hash if hash.starts_with('#') => {
182                    let (radix, num) = if hash.starts_with("#x") || hash.starts_with("#X") {
183                        (16, &hash[2..])
184                    } else {
185                        (10, &hash[1..])
186                    };
187                    if let Ok(n) = u32::from_str_radix(num, radix) {
188                        char::from_u32(n)
189                            .map(|c| c.to_string())
190                            .unwrap_or_else(|| format!("&{entity};"))
191                    } else {
192                        format!("&{entity};")
193                    }
194                }
195                _ => format!("&{entity};"),
196            };
197            out.push_str(&replaced);
198            rest = &remainder[end + 1..];
199        } else {
200            out.push_str(remainder);
201            rest = "";
202        }
203    }
204    out.push_str(rest);
205    out
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_parses_basic() {
214        let xml = b"<Foo><Bar>hello</Bar></Foo>";
215        let n = parse(xml).unwrap();
216        assert_eq!(n.name, "Foo");
217        assert_eq!(n.child_text("Bar"), "hello");
218    }
219
220    #[test]
221    fn test_parses_items() {
222        let xml = b"<Foo><Items><Item>a</Item><Item>b</Item></Items></Foo>";
223        let n = parse(xml).unwrap();
224        let names: Vec<_> = n
225            .children_named("Items")
226            .next()
227            .unwrap()
228            .children_named("Item")
229            .map(|x| x.text.as_str())
230            .collect();
231        assert_eq!(names, vec!["a", "b"]);
232    }
233}