Skip to main content

ppt_rs/oxml/
xmlchemy.rs

1//! XML element base classes and utilities for parsing Office XML
2//!
3//! Provides XML parsing using xml-rs and a DOM-like structure for OXML elements.
4
5use std::collections::HashMap;
6use std::io::Read;
7use xml::reader::{EventReader, XmlEvent};
8use crate::exc::PptxError;
9
10/// Represents an XML element with attributes and children
11#[derive(Debug, Clone)]
12pub struct XmlElement {
13    /// Element tag name (with namespace prefix if present)
14    pub tag: String,
15    /// Local name without namespace prefix
16    pub local_name: String,
17    /// Namespace URI
18    pub namespace: Option<String>,
19    /// Element attributes
20    pub attributes: HashMap<String, String>,
21    /// Child elements
22    pub children: Vec<XmlElement>,
23    /// Text content
24    pub text: String,
25}
26
27impl XmlElement {
28    /// Create a new XML element
29    pub fn new(tag: &str) -> Self {
30        let local_name = tag.split(':').last().unwrap_or(tag).to_string();
31        XmlElement {
32            tag: tag.to_string(),
33            local_name,
34            namespace: None,
35            attributes: HashMap::new(),
36            children: Vec::new(),
37            text: String::new(),
38        }
39    }
40
41    /// Create element with namespace
42    pub fn with_namespace(tag: &str, namespace: &str) -> Self {
43        let local_name = tag.split(':').last().unwrap_or(tag).to_string();
44        XmlElement {
45            tag: tag.to_string(),
46            local_name,
47            namespace: Some(namespace.to_string()),
48            attributes: HashMap::new(),
49            children: Vec::new(),
50            text: String::new(),
51        }
52    }
53
54    /// Get attribute value
55    pub fn attr(&self, name: &str) -> Option<&str> {
56        self.attributes.get(name).map(|s| s.as_str())
57    }
58
59    /// Set attribute
60    pub fn set_attr(&mut self, name: &str, value: &str) {
61        self.attributes.insert(name.to_string(), value.to_string());
62    }
63
64    /// Add child element
65    pub fn add_child(&mut self, child: XmlElement) {
66        self.children.push(child);
67    }
68
69    /// Find first child by local name
70    pub fn find(&self, local_name: &str) -> Option<&XmlElement> {
71        self.children.iter().find(|c| c.local_name == local_name)
72    }
73
74    /// Find all children by local name
75    pub fn find_all(&self, local_name: &str) -> Vec<&XmlElement> {
76        self.children.iter().filter(|c| c.local_name == local_name).collect()
77    }
78
79    /// Find first descendant by local name (recursive)
80    pub fn find_descendant(&self, local_name: &str) -> Option<&XmlElement> {
81        for child in &self.children {
82            if child.local_name == local_name {
83                return Some(child);
84            }
85            if let Some(found) = child.find_descendant(local_name) {
86                return Some(found);
87            }
88        }
89        None
90    }
91
92    /// Find all descendants by local name (recursive)
93    pub fn find_all_descendants(&self, local_name: &str) -> Vec<&XmlElement> {
94        let mut results = Vec::new();
95        self.collect_descendants(local_name, &mut results);
96        results
97    }
98
99    fn collect_descendants<'a>(&'a self, local_name: &str, results: &mut Vec<&'a XmlElement>) {
100        for child in &self.children {
101            if child.local_name == local_name {
102                results.push(child);
103            }
104            child.collect_descendants(local_name, results);
105        }
106    }
107
108    /// Get all text content recursively
109    pub fn text_content(&self) -> String {
110        let mut result = self.text.clone();
111        for child in &self.children {
112            result.push_str(&child.text_content());
113        }
114        result
115    }
116
117    /// Check if element has specific local name
118    pub fn is(&self, local_name: &str) -> bool {
119        self.local_name == local_name
120    }
121}
122
123/// XML Parser for Office XML documents
124pub struct XmlParser;
125
126impl XmlParser {
127    /// Parse XML from a string
128    pub fn parse_str(xml: &str) -> Result<XmlElement, PptxError> {
129        Self::parse(xml.as_bytes())
130    }
131
132    /// Parse XML from a reader
133    pub fn parse<R: Read>(reader: R) -> Result<XmlElement, PptxError> {
134        let parser = EventReader::new(reader);
135        let mut stack: Vec<XmlElement> = Vec::new();
136        let mut root: Option<XmlElement> = None;
137
138        for event in parser {
139            match event {
140                Ok(XmlEvent::StartElement { name, attributes, namespace }) => {
141                    let tag = if let Some(ref prefix) = name.prefix {
142                        format!("{}:{}", prefix, name.local_name)
143                    } else {
144                        name.local_name.clone()
145                    };
146
147                    let mut element = XmlElement::new(&tag);
148                    element.namespace = namespace.get(&name.prefix.clone().unwrap_or_default())
149                        .map(|s| s.to_string());
150
151                    // Add attributes
152                    for attr in attributes {
153                        let attr_name = if let Some(ref prefix) = attr.name.prefix {
154                            format!("{}:{}", prefix, attr.name.local_name)
155                        } else {
156                            attr.name.local_name
157                        };
158                        element.set_attr(&attr_name, &attr.value);
159                    }
160
161                    stack.push(element);
162                }
163                Ok(XmlEvent::EndElement { .. }) => {
164                    if let Some(element) = stack.pop() {
165                        if let Some(parent) = stack.last_mut() {
166                            parent.add_child(element);
167                        } else {
168                            root = Some(element);
169                        }
170                    }
171                }
172                Ok(XmlEvent::Characters(text)) => {
173                    if let Some(current) = stack.last_mut() {
174                        current.text.push_str(&text);
175                    }
176                }
177                Ok(XmlEvent::CData(text)) => {
178                    if let Some(current) = stack.last_mut() {
179                        current.text.push_str(&text);
180                    }
181                }
182                Err(e) => {
183                    return Err(PptxError::XmlParse(e.to_string()));
184                }
185                _ => {}
186            }
187        }
188
189        root.ok_or_else(|| PptxError::XmlParse("Empty XML document".to_string()))
190    }
191}
192
193/// Legacy base class for Office XML elements (kept for compatibility)
194#[allow(dead_code)]
195pub struct BaseOxmlElement {
196    element: XmlElement,
197}
198
199impl BaseOxmlElement {
200    pub fn new() -> Self {
201        BaseOxmlElement {
202            element: XmlElement::new("element"),
203        }
204    }
205
206    pub fn from_element(element: XmlElement) -> Self {
207        BaseOxmlElement { element }
208    }
209
210    pub fn element(&self) -> &XmlElement {
211        &self.element
212    }
213}
214
215impl Default for BaseOxmlElement {
216    fn default() -> Self {
217        Self::new()
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_parse_simple_xml() {
227        let xml = r#"<?xml version="1.0"?><root><child attr="value">text</child></root>"#;
228        let result = XmlParser::parse_str(xml);
229        assert!(result.is_ok());
230        
231        let root = result.unwrap();
232        assert_eq!(root.local_name, "root");
233        assert_eq!(root.children.len(), 1);
234        
235        let child = &root.children[0];
236        assert_eq!(child.local_name, "child");
237        assert_eq!(child.attr("attr"), Some("value"));
238        assert_eq!(child.text, "text");
239    }
240
241    #[test]
242    fn test_parse_namespaced_xml() {
243        let xml = r#"<?xml version="1.0"?>
244        <p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
245            <p:cSld>
246                <p:spTree/>
247            </p:cSld>
248        </p:sld>"#;
249        
250        let result = XmlParser::parse_str(xml);
251        assert!(result.is_ok());
252        
253        let root = result.unwrap();
254        assert_eq!(root.local_name, "sld");
255        assert!(root.find("cSld").is_some());
256    }
257
258    #[test]
259    fn test_find_descendants() {
260        let xml = r#"<?xml version="1.0"?>
261        <root>
262            <level1>
263                <target>found1</target>
264            </level1>
265            <level1>
266                <level2>
267                    <target>found2</target>
268                </level2>
269            </level1>
270        </root>"#;
271        
272        let root = XmlParser::parse_str(xml).unwrap();
273        let targets = root.find_all_descendants("target");
274        assert_eq!(targets.len(), 2);
275        assert_eq!(targets[0].text, "found1");
276        assert_eq!(targets[1].text, "found2");
277    }
278
279    #[test]
280    fn test_text_content() {
281        let xml = r#"<?xml version="1.0"?><p>Hello <b>World</b></p>"#;
282        let root = XmlParser::parse_str(xml).unwrap();
283        assert_eq!(root.text_content(), "Hello World");
284    }
285}