Skip to main content

ppt_rs/oxml/
xmlchemy.rs

1//! XML element base classes and utilities for parsing Office XML
2//!
3//! Provides XML parsing using xml-rs and a DOM-like structure for OXML elements.
4
5use crate::exc::PptxError;
6use std::collections::HashMap;
7use std::io::Read;
8use xml::reader::{EventReader, XmlEvent};
9
10/// Represents an XML element with attributes and children
11#[derive(Debug, Clone)]
12pub struct XmlElement {
13    /// Element tag name (with namespace prefix if present)
14    pub tag: String,
15    /// Local name without namespace prefix
16    pub local_name: String,
17    /// Namespace URI
18    pub namespace: Option<String>,
19    /// Element attributes
20    pub attributes: HashMap<String, String>,
21    /// Child elements
22    pub children: Vec<XmlElement>,
23    /// Text content
24    pub text: String,
25}
26
27impl XmlElement {
28    /// Create a new XML element
29    pub fn new(tag: &str) -> Self {
30        let local_name = tag.split(':').last().unwrap_or(tag).to_string();
31        XmlElement {
32            tag: tag.to_string(),
33            local_name,
34            namespace: None,
35            attributes: HashMap::new(),
36            children: Vec::new(),
37            text: String::new(),
38        }
39    }
40
41    /// Create element with namespace
42    pub fn with_namespace(tag: &str, namespace: &str) -> Self {
43        let local_name = tag.split(':').last().unwrap_or(tag).to_string();
44        XmlElement {
45            tag: tag.to_string(),
46            local_name,
47            namespace: Some(namespace.to_string()),
48            attributes: HashMap::new(),
49            children: Vec::new(),
50            text: String::new(),
51        }
52    }
53
54    /// Get attribute value
55    pub fn attr(&self, name: &str) -> Option<&str> {
56        self.attributes.get(name).map(|s| s.as_str())
57    }
58
59    /// Set attribute
60    pub fn set_attr(&mut self, name: &str, value: &str) {
61        self.attributes.insert(name.to_string(), value.to_string());
62    }
63
64    /// Add child element
65    pub fn add_child(&mut self, child: XmlElement) {
66        self.children.push(child);
67    }
68
69    /// Find first child by local name
70    pub fn find(&self, local_name: &str) -> Option<&XmlElement> {
71        self.children.iter().find(|c| c.local_name == local_name)
72    }
73
74    /// Find all children by local name
75    pub fn find_all(&self, local_name: &str) -> Vec<&XmlElement> {
76        self.children
77            .iter()
78            .filter(|c| c.local_name == local_name)
79            .collect()
80    }
81
82    /// Find first descendant by local name (recursive)
83    pub fn find_descendant(&self, local_name: &str) -> Option<&XmlElement> {
84        for child in &self.children {
85            if child.local_name == local_name {
86                return Some(child);
87            }
88            if let Some(found) = child.find_descendant(local_name) {
89                return Some(found);
90            }
91        }
92        None
93    }
94
95    /// Find all descendants by local name (recursive)
96    pub fn find_all_descendants(&self, local_name: &str) -> Vec<&XmlElement> {
97        let mut results = Vec::new();
98        self.collect_descendants(local_name, &mut results);
99        results
100    }
101
102    fn collect_descendants<'a>(&'a self, local_name: &str, results: &mut Vec<&'a XmlElement>) {
103        for child in &self.children {
104            if child.local_name == local_name {
105                results.push(child);
106            }
107            child.collect_descendants(local_name, results);
108        }
109    }
110
111    /// Get all text content recursively
112    pub fn text_content(&self) -> String {
113        let mut result = self.text.clone();
114        for child in &self.children {
115            result.push_str(&child.text_content());
116        }
117        result
118    }
119
120    /// Check if element has specific local name
121    pub fn is(&self, local_name: &str) -> bool {
122        self.local_name == local_name
123    }
124}
125
126/// XML Parser for Office XML documents
127pub struct XmlParser;
128
129impl XmlParser {
130    /// Parse XML from a string
131    pub fn parse_str(xml: &str) -> Result<XmlElement, PptxError> {
132        Self::parse(xml.as_bytes())
133    }
134
135    /// Parse XML from a reader
136    pub fn parse<R: Read>(reader: R) -> Result<XmlElement, PptxError> {
137        let parser = EventReader::new(reader);
138        let mut stack: Vec<XmlElement> = Vec::new();
139        let mut root: Option<XmlElement> = None;
140
141        for event in parser {
142            match event {
143                Ok(XmlEvent::StartElement {
144                    name,
145                    attributes,
146                    namespace,
147                }) => {
148                    let tag = if let Some(ref prefix) = name.prefix {
149                        format!("{}:{}", prefix, name.local_name)
150                    } else {
151                        name.local_name.clone()
152                    };
153
154                    let mut element = XmlElement::new(&tag);
155                    element.namespace = namespace
156                        .get(&name.prefix.clone().unwrap_or_default())
157                        .map(|s| s.to_string());
158
159                    // Add attributes
160                    for attr in attributes {
161                        let attr_name = if let Some(ref prefix) = attr.name.prefix {
162                            format!("{}:{}", prefix, attr.name.local_name)
163                        } else {
164                            attr.name.local_name
165                        };
166                        element.set_attr(&attr_name, &attr.value);
167                    }
168
169                    stack.push(element);
170                }
171                Ok(XmlEvent::EndElement { .. }) => {
172                    if let Some(element) = stack.pop() {
173                        if let Some(parent) = stack.last_mut() {
174                            parent.add_child(element);
175                        } else {
176                            root = Some(element);
177                        }
178                    }
179                }
180                Ok(XmlEvent::Characters(text)) => {
181                    if let Some(current) = stack.last_mut() {
182                        current.text.push_str(&text);
183                    }
184                }
185                Ok(XmlEvent::CData(text)) => {
186                    if let Some(current) = stack.last_mut() {
187                        current.text.push_str(&text);
188                    }
189                }
190                Err(e) => {
191                    return Err(PptxError::XmlParse(e.to_string()));
192                }
193                _ => {}
194            }
195        }
196
197        root.ok_or_else(|| PptxError::XmlParse("Empty XML document".to_string()))
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_parse_simple_xml() {
207        let xml = r#"<?xml version="1.0"?><root><child attr="value">text</child></root>"#;
208        let result = XmlParser::parse_str(xml);
209        assert!(result.is_ok());
210
211        let root = result.unwrap();
212        assert_eq!(root.local_name, "root");
213        assert_eq!(root.children.len(), 1);
214
215        let child = &root.children[0];
216        assert_eq!(child.local_name, "child");
217        assert_eq!(child.attr("attr"), Some("value"));
218        assert_eq!(child.text, "text");
219    }
220
221    #[test]
222    fn test_parse_namespaced_xml() {
223        let xml = r#"<?xml version="1.0"?>
224        <p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
225            <p:cSld>
226                <p:spTree/>
227            </p:cSld>
228        </p:sld>"#;
229
230        let result = XmlParser::parse_str(xml);
231        assert!(result.is_ok());
232
233        let root = result.unwrap();
234        assert_eq!(root.local_name, "sld");
235        assert!(root.find("cSld").is_some());
236    }
237
238    #[test]
239    fn test_find_descendants() {
240        let xml = r#"<?xml version="1.0"?>
241        <root>
242            <level1>
243                <target>found1</target>
244            </level1>
245            <level1>
246                <level2>
247                    <target>found2</target>
248                </level2>
249            </level1>
250        </root>"#;
251
252        let root = XmlParser::parse_str(xml).unwrap();
253        let targets = root.find_all_descendants("target");
254        assert_eq!(targets.len(), 2);
255        assert_eq!(targets[0].text, "found1");
256        assert_eq!(targets[1].text, "found2");
257    }
258
259    #[test]
260    fn test_text_content() {
261        let xml = r#"<?xml version="1.0"?><p>Hello <b>World</b></p>"#;
262        let root = XmlParser::parse_str(xml).unwrap();
263        assert_eq!(root.text_content(), "Hello World");
264    }
265}