Skip to main content

azul_layout/xml/
mod.rs

1#![allow(unused_variables)]
2
3use alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec};
4use core::fmt;
5#[cfg(feature = "std")]
6use std::path::Path;
7
8#[cfg(feature = "svg")]
9pub mod svg;
10
11pub use azul_core::xml::*;
12use azul_core::{dom::Dom, impl_from, styled_dom::StyledDom, window::StringPairVec};
13#[cfg(feature = "parser")]
14use azul_css::parser2::CssParseError;
15use azul_css::{css::Css, AzString, OptionString, U8Vec};
16use xmlparser::Tokenizer;
17
18#[cfg(feature = "xml")]
19pub fn domxml_from_str(xml: &str, component_map: &mut XmlComponentMap) -> DomXml {
20    let error_css = Css::empty();
21
22    let parsed = match parse_xml_string(&xml) {
23        Ok(parsed) => parsed,
24        Err(e) => {
25            return DomXml {
26                parsed_dom: Dom::create_body()
27                    .with_children(vec![Dom::create_text(format!("{}", e))].into())
28                    .style(error_css.clone()),
29            };
30        }
31    };
32
33    let parsed_dom = match str_to_dom(parsed.as_ref(), component_map, None) {
34        Ok(o) => o,
35        Err(e) => {
36            return DomXml {
37                parsed_dom: Dom::create_body()
38                    .with_children(vec![Dom::create_text(format!("{}", e))].into())
39                    .style(error_css.clone()),
40            };
41        }
42    };
43
44    DomXml { parsed_dom }
45}
46
47/// Loads, parses and builds a DOM from an XML file
48///
49/// **Warning**: The file is reloaded from disk on every function call - do not
50/// use this in release builds! This function deliberately never fails: In an error case,
51/// the error gets rendered as a `NodeType::Label`.
52#[cfg(all(feature = "std", feature = "xml"))]
53pub fn domxml_from_file<I: AsRef<Path>>(
54    file_path: I,
55    component_map: &mut XmlComponentMap,
56) -> DomXml {
57    use std::fs;
58
59    let error_css = Css::empty();
60
61    let xml = match fs::read_to_string(file_path.as_ref()) {
62        Ok(xml) => xml,
63        Err(e) => {
64            return DomXml {
65                parsed_dom: Dom::create_body()
66                    .with_children(
67                        vec![Dom::create_text(format!(
68                            "Error reading: \"{}\": {}",
69                            file_path.as_ref().to_string_lossy(),
70                            e
71                        ))]
72                        .into(),
73                    )
74                    .style(error_css.clone()),
75            };
76        }
77    };
78
79    domxml_from_str(&xml, component_map)
80}
81
82/// Parses the XML string into an XML tree, returns
83/// the root `<app></app>` node, with the children attached to it.
84///
85/// Since the XML allows multiple root nodes, this function returns
86/// a `Vec<XmlNode>` - which are the "root" nodes, containing all their
87/// children recursively.
88#[cfg(feature = "xml")]
89pub fn parse_xml_string(xml: &str) -> Result<Vec<XmlNodeChild>, XmlError> {
90    use xmlparser::{ElementEnd::*, Token::*, Tokenizer};
91
92    use self::XmlParseError::*;
93
94    let mut root_node = XmlNode::default();
95
96    // Search for "<?xml" and "?>" tags and delete them from the XML
97    let mut xml = xml.trim();
98    if xml.starts_with("<?") {
99        let pos = xml.find("?>").ok_or(XmlError::MalformedHierarchy(
100            azul_core::xml::MalformedHierarchyError {
101                expected: "<?xml".into(),
102                got: "?>".into(),
103            },
104        ))?;
105        xml = &xml[(pos + 2)..];
106    }
107
108    // Delete <!DOCTYPE ...> if necessary (case-insensitive)
109    let mut xml = xml.trim();
110    if xml.len() > 9 && xml[..9].to_ascii_lowercase().starts_with("<!doctype") {
111        let pos = xml.find(">").ok_or(XmlError::MalformedHierarchy(
112            azul_core::xml::MalformedHierarchyError {
113                expected: "<!DOCTYPE".into(),
114                got: ">".into(),
115            },
116        ))?;
117        xml = &xml[(pos + 1)..];
118    } else if xml.starts_with("<!--") {
119        // Skip HTML comments at the start
120        if let Some(end) = xml.find("-->") {
121            xml = &xml[(end + 3)..];
122            xml = xml.trim();
123        }
124    }
125
126    let tokenizer = Tokenizer::from_fragment(xml, 0..xml.len());
127
128    // In order to insert where the item is, let's say
129    // [0 -> 1st element, 5th-element -> node]
130    // we need to trach the index of the item in the parent.
131    let mut current_hierarchy: Vec<usize> = Vec::new();
132
133    // HTML5-lite parser: List of void elements that should auto-close
134    // See: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
135    const VOID_ELEMENTS: &[&str] = &[
136        "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param",
137        "source", "track", "wbr",
138    ];
139
140    // HTML5-lite parser: Elements that auto-close when certain other elements are encountered
141    // Format: (element_name, closes_when_encountering)
142    const AUTO_CLOSE_RULES: &[(&str, &[&str])] = &[
143        // List items close when encountering another list item or when parent closes
144        ("li", &["li"]),
145        // Table cells/rows have complex closing rules
146        ("td", &["td", "th", "tr"]),
147        ("th", &["td", "th", "tr"]),
148        ("tr", &["tr"]),
149        // Paragraphs close on block-level elements
150        (
151            "p",
152            &[
153                "address",
154                "article",
155                "aside",
156                "blockquote",
157                "div",
158                "dl",
159                "fieldset",
160                "footer",
161                "form",
162                "h1",
163                "h2",
164                "h3",
165                "h4",
166                "h5",
167                "h6",
168                "header",
169                "hr",
170                "main",
171                "nav",
172                "ol",
173                "p",
174                "pre",
175                "section",
176                "table",
177                "ul",
178            ],
179        ),
180        // Option closes on another option or optgroup
181        ("option", &["option", "optgroup"]),
182        ("optgroup", &["optgroup"]),
183        // DD/DT close on each other
184        ("dd", &["dd", "dt"]),
185        ("dt", &["dd", "dt"]),
186    ];
187
188    // Track which hierarchy level is a void element (shouldn't be pushed to hierarchy)
189    let mut last_was_void = false;
190
191    for token in tokenizer {
192        let token = token.map_err(|e| XmlError::ParserError(translate_xmlparser_error(e)))?;
193        match token {
194            ElementStart { local, .. } => {
195                let tag_name = local.to_string();
196                let is_void_element = VOID_ELEMENTS.contains(&tag_name.as_str());
197
198                // HTML5-lite: If last element was a void element (like <img src="...">),
199                // pop it from hierarchy before processing the new element
200                if last_was_void {
201                    current_hierarchy.pop();
202                    last_was_void = false;
203                }
204
205                // HTML5-lite: Check if we need to auto-close the current element
206                if !current_hierarchy.is_empty() {
207                    if let Some(current_element) = get_item(&current_hierarchy, &mut root_node) {
208                        let current_tag = current_element.node_type.as_str();
209
210                        // Check if current element should auto-close when encountering this new tag
211                        for (element, closes_on) in AUTO_CLOSE_RULES {
212                            if current_tag == *element && closes_on.contains(&tag_name.as_str()) {
213                                // Auto-close the current element
214                                current_hierarchy.pop();
215                                break;
216                            }
217                        }
218                    }
219                }
220
221                if let Some(current_parent) = get_item(&current_hierarchy, &mut root_node) {
222                    let children_len = current_parent.children.len();
223
224                    current_parent.children.push(XmlNodeChild::Element(XmlNode {
225                        node_type: tag_name.into(),
226                        attributes: StringPairVec::new().into(),
227                        children: Vec::new().into(),
228                    }));
229
230                    // Always push to hierarchy so attributes get assigned correctly
231                    // For void elements, we'll pop immediately after attributes are processed
232                    current_hierarchy.push(children_len);
233                    last_was_void = is_void_element;
234                }
235            }
236            ElementEnd { end: Empty, .. } => {
237                // Pop hierarchy for all elements (including void elements after their attributes)
238                current_hierarchy.pop();
239                last_was_void = false;
240            }
241            ElementEnd {
242                end: Close(_, close_value),
243                ..
244            } => {
245                // HTML5-lite: If last element was a void element, pop it first
246                if last_was_void {
247                    current_hierarchy.pop();
248                    last_was_void = false;
249                }
250
251                // HTML5-lite: Check if this is a void element - if so, ignore the closing tag
252                let is_void_element = VOID_ELEMENTS.contains(&close_value.as_str());
253                if is_void_element {
254                    // Void elements shouldn't have closing tags, but tolerate them
255                    continue;
256                }
257
258                // HTML5-lite: Auto-close any elements that should be closed
259                // Walk up the hierarchy and auto-close elements until we find a match
260                let mut found_match = false;
261                let close_value_str = close_value.as_str();
262
263                // Check if the closing tag matches any element in the current hierarchy
264                for i in (0..current_hierarchy.len()).rev() {
265                    if let Some(node) = get_item(&current_hierarchy[..=i], &mut root_node) {
266                        if node.node_type.as_str() == close_value_str {
267                            found_match = true;
268                            // Auto-close all elements from current position to the matching element
269                            let elements_to_close = current_hierarchy.len() - i;
270                            for _ in 0..elements_to_close {
271                                current_hierarchy.pop();
272                            }
273                            break;
274                        }
275                    }
276                }
277
278                if !found_match {
279                    // HTML5-lite: If no match found, this might be an optional closing tag
280                    // Just ignore it instead of erroring (lenient parsing)
281                    // In strict XML mode, we would return an error here
282                }
283
284                last_was_void = false;
285            }
286            Attribute { local, value, .. } => {
287                if let Some(last) = get_item(&current_hierarchy, &mut root_node) {
288                    // NOTE: Only lowercase the key ("local"), not the value!
289                    last.attributes.push(azul_core::window::AzStringPair {
290                        key: local.to_string().into(),
291                        value: value.as_str().to_string().into(),
292                    });
293                }
294            }
295            Text { text } => {
296                // HTML5-lite: If last element was a void element, pop it before adding text
297                if last_was_void {
298                    current_hierarchy.pop();
299                    last_was_void = false;
300                }
301
302                // Skip whitespace-only text nodes between block elements
303                // but preserve them within inline contexts (e.g., between inline elements)
304                let is_whitespace_only = text.trim().is_empty();
305
306                if !is_whitespace_only {
307                    if let Some(current_parent) = get_item(&current_hierarchy, &mut root_node) {
308                        // Add text as a child node
309                        current_parent
310                            .children
311                            .push(XmlNodeChild::Text(text.to_string().into()));
312                    }
313                }
314            }
315            _ => {}
316        }
317    }
318
319    // Clean up: if we ended with a void element, pop it
320    if last_was_void {
321        current_hierarchy.pop();
322    }
323
324    Ok(root_node.children.into())
325}
326
327#[cfg(feature = "xml")]
328pub fn parse_xml(s: &str) -> Result<Xml, XmlError> {
329    Ok(Xml {
330        root: parse_xml_string(s)?.into(),
331    })
332}
333
334#[cfg(not(feature = "xml"))]
335pub fn parse_xml(s: &str) -> Result<Xml, XmlError> {
336    Err(XmlError::NoParserAvailable)
337}
338
339// to_string(&self) -> String
340
341#[cfg(feature = "xml")]
342pub fn translate_roxmltree_expandedname<'a, 'b>(
343    e: roxmltree::ExpandedName<'a, 'b>,
344) -> XmlQualifiedName {
345    let ns: Option<AzString> = e.namespace().map(|e| e.to_string().into());
346    XmlQualifiedName {
347        local_name: e.name().to_string().into(),
348        namespace: ns.into(),
349    }
350}
351
352#[cfg(feature = "xml")]
353fn translate_roxmltree_attribute(e: roxmltree::Attribute) -> XmlQualifiedName {
354    XmlQualifiedName {
355        local_name: e.name().to_string().into(),
356        namespace: e.namespace().map(|e| e.to_string().into()).into(),
357    }
358}
359
360#[cfg(feature = "xml")]
361fn translate_xmlparser_streamerror(e: xmlparser::StreamError) -> XmlStreamError {
362    match e {
363        xmlparser::StreamError::UnexpectedEndOfStream => XmlStreamError::UnexpectedEndOfStream,
364        xmlparser::StreamError::InvalidName => XmlStreamError::InvalidName,
365        xmlparser::StreamError::InvalidReference => XmlStreamError::InvalidReference,
366        xmlparser::StreamError::InvalidExternalID => XmlStreamError::InvalidExternalID,
367        xmlparser::StreamError::InvalidCommentData => XmlStreamError::InvalidCommentData,
368        xmlparser::StreamError::InvalidCommentEnd => XmlStreamError::InvalidCommentEnd,
369        xmlparser::StreamError::InvalidCharacterData => XmlStreamError::InvalidCharacterData,
370        xmlparser::StreamError::NonXmlChar(c, tp) => XmlStreamError::NonXmlChar(NonXmlCharError {
371            ch: c.into(),
372            pos: translate_xmlparser_textpos(tp),
373        }),
374        xmlparser::StreamError::InvalidChar(a, b, tp) => {
375            XmlStreamError::InvalidChar(InvalidCharError {
376                expected: a,
377                got: b,
378                pos: translate_xmlparser_textpos(tp),
379            })
380        }
381        xmlparser::StreamError::InvalidCharMultiple(a, b, tp) => {
382            XmlStreamError::InvalidCharMultiple(InvalidCharMultipleError {
383                expected: a,
384                got: b.to_vec().into(),
385                pos: translate_xmlparser_textpos(tp),
386            })
387        }
388        xmlparser::StreamError::InvalidQuote(a, tp) => {
389            XmlStreamError::InvalidQuote(InvalidQuoteError {
390                got: a.into(),
391                pos: translate_xmlparser_textpos(tp),
392            })
393        }
394        xmlparser::StreamError::InvalidSpace(a, tp) => {
395            XmlStreamError::InvalidSpace(InvalidSpaceError {
396                got: a.into(),
397                pos: translate_xmlparser_textpos(tp),
398            })
399        }
400        xmlparser::StreamError::InvalidString(a, tp) => {
401            XmlStreamError::InvalidString(InvalidStringError {
402                got: a.to_string().into(),
403                pos: translate_xmlparser_textpos(tp),
404            })
405        }
406    }
407}
408
409#[cfg(feature = "xml")]
410fn translate_xmlparser_error(e: xmlparser::Error) -> XmlParseError {
411    match e {
412        xmlparser::Error::InvalidDeclaration(se, tp) => {
413            XmlParseError::InvalidDeclaration(XmlTextError {
414                stream_error: translate_xmlparser_streamerror(se),
415                pos: translate_xmlparser_textpos(tp),
416            })
417        }
418        xmlparser::Error::InvalidComment(se, tp) => XmlParseError::InvalidComment(XmlTextError {
419            stream_error: translate_xmlparser_streamerror(se),
420            pos: translate_xmlparser_textpos(tp),
421        }),
422        xmlparser::Error::InvalidPI(se, tp) => XmlParseError::InvalidPI(XmlTextError {
423            stream_error: translate_xmlparser_streamerror(se),
424            pos: translate_xmlparser_textpos(tp),
425        }),
426        xmlparser::Error::InvalidDoctype(se, tp) => XmlParseError::InvalidDoctype(XmlTextError {
427            stream_error: translate_xmlparser_streamerror(se),
428            pos: translate_xmlparser_textpos(tp),
429        }),
430        xmlparser::Error::InvalidEntity(se, tp) => XmlParseError::InvalidEntity(XmlTextError {
431            stream_error: translate_xmlparser_streamerror(se),
432            pos: translate_xmlparser_textpos(tp),
433        }),
434        xmlparser::Error::InvalidElement(se, tp) => XmlParseError::InvalidElement(XmlTextError {
435            stream_error: translate_xmlparser_streamerror(se),
436            pos: translate_xmlparser_textpos(tp),
437        }),
438        xmlparser::Error::InvalidAttribute(se, tp) => {
439            XmlParseError::InvalidAttribute(XmlTextError {
440                stream_error: translate_xmlparser_streamerror(se),
441                pos: translate_xmlparser_textpos(tp),
442            })
443        }
444        xmlparser::Error::InvalidCdata(se, tp) => XmlParseError::InvalidCdata(XmlTextError {
445            stream_error: translate_xmlparser_streamerror(se),
446            pos: translate_xmlparser_textpos(tp),
447        }),
448        xmlparser::Error::InvalidCharData(se, tp) => XmlParseError::InvalidCharData(XmlTextError {
449            stream_error: translate_xmlparser_streamerror(se),
450            pos: translate_xmlparser_textpos(tp),
451        }),
452        xmlparser::Error::UnknownToken(tp) => {
453            XmlParseError::UnknownToken(translate_xmlparser_textpos(tp))
454        }
455    }
456}
457
458#[cfg(feature = "xml")]
459pub fn translate_roxmltree_error(e: roxmltree::Error) -> XmlError {
460    match e {
461        roxmltree::Error::InvalidXmlPrefixUri(s) => {
462            XmlError::InvalidXmlPrefixUri(translate_roxml_textpos(s))
463        }
464        roxmltree::Error::UnexpectedXmlUri(s) => {
465            XmlError::UnexpectedXmlUri(translate_roxml_textpos(s))
466        }
467        roxmltree::Error::UnexpectedXmlnsUri(s) => {
468            XmlError::UnexpectedXmlnsUri(translate_roxml_textpos(s))
469        }
470        roxmltree::Error::InvalidElementNamePrefix(s) => {
471            XmlError::InvalidElementNamePrefix(translate_roxml_textpos(s))
472        }
473        roxmltree::Error::DuplicatedNamespace(s, tp) => {
474            XmlError::DuplicatedNamespace(DuplicatedNamespaceError {
475                ns: s.into(),
476                pos: translate_roxml_textpos(tp),
477            })
478        }
479        roxmltree::Error::UnknownNamespace(s, tp) => {
480            XmlError::UnknownNamespace(UnknownNamespaceError {
481                ns: s.into(),
482                pos: translate_roxml_textpos(tp),
483            })
484        }
485        roxmltree::Error::UnexpectedCloseTag(expected, actual, pos) => {
486            XmlError::UnexpectedCloseTag(UnexpectedCloseTagError {
487                expected: expected.into(),
488                actual: actual.into(),
489                pos: translate_roxml_textpos(pos),
490            })
491        }
492        roxmltree::Error::UnexpectedEntityCloseTag(s) => {
493            XmlError::UnexpectedEntityCloseTag(translate_roxml_textpos(s))
494        }
495        roxmltree::Error::UnknownEntityReference(s, tp) => {
496            XmlError::UnknownEntityReference(UnknownEntityReferenceError {
497                entity: s.into(),
498                pos: translate_roxml_textpos(tp),
499            })
500        }
501        roxmltree::Error::MalformedEntityReference(s) => {
502            XmlError::MalformedEntityReference(translate_roxml_textpos(s))
503        }
504        roxmltree::Error::EntityReferenceLoop(s) => {
505            XmlError::EntityReferenceLoop(translate_roxml_textpos(s))
506        }
507        roxmltree::Error::InvalidAttributeValue(s) => {
508            XmlError::InvalidAttributeValue(translate_roxml_textpos(s))
509        }
510        roxmltree::Error::DuplicatedAttribute(s, tp) => {
511            XmlError::DuplicatedAttribute(DuplicatedAttributeError {
512                attribute: s.into(),
513                pos: translate_roxml_textpos(tp),
514            })
515        }
516        roxmltree::Error::NoRootNode => XmlError::NoRootNode,
517        roxmltree::Error::DtdDetected => XmlError::DtdDetected,
518        roxmltree::Error::UnclosedRootNode => XmlError::UnclosedRootNode,
519        roxmltree::Error::UnexpectedDeclaration(tp) => {
520            XmlError::UnexpectedDeclaration(translate_roxml_textpos(tp))
521        }
522        roxmltree::Error::NodesLimitReached => XmlError::NodesLimitReached,
523        roxmltree::Error::AttributesLimitReached => XmlError::AttributesLimitReached,
524        roxmltree::Error::NamespacesLimitReached => XmlError::NamespacesLimitReached,
525        roxmltree::Error::InvalidName(tp) => XmlError::InvalidName(translate_roxml_textpos(tp)),
526        roxmltree::Error::NonXmlChar(_, tp) => XmlError::NonXmlChar(translate_roxml_textpos(tp)),
527        roxmltree::Error::InvalidChar(_, _, tp) => {
528            XmlError::InvalidChar(translate_roxml_textpos(tp))
529        }
530        roxmltree::Error::InvalidChar2(_, _, tp) => {
531            XmlError::InvalidChar2(translate_roxml_textpos(tp))
532        }
533        roxmltree::Error::InvalidString(_, tp) => {
534            XmlError::InvalidString(translate_roxml_textpos(tp))
535        }
536        roxmltree::Error::InvalidExternalID(tp) => {
537            XmlError::InvalidExternalID(translate_roxml_textpos(tp))
538        }
539        roxmltree::Error::InvalidComment(tp) => {
540            XmlError::InvalidComment(translate_roxml_textpos(tp))
541        }
542        roxmltree::Error::InvalidCharacterData(tp) => {
543            XmlError::InvalidCharacterData(translate_roxml_textpos(tp))
544        }
545        roxmltree::Error::UnknownToken(tp) => XmlError::UnknownToken(translate_roxml_textpos(tp)),
546        roxmltree::Error::UnexpectedEndOfStream => XmlError::UnexpectedEndOfStream,
547        roxmltree::Error::EntityResolver(tp, s) => {
548            // New in roxmltree 0.21: EntityResolver error variant
549            // For now, treat as a generic entity reference error
550            XmlError::UnknownEntityReference(UnknownEntityReferenceError {
551                entity: s.into(),
552                pos: translate_roxml_textpos(tp),
553            })
554        }
555    }
556}
557
558#[cfg(feature = "xml")]
559#[inline(always)]
560const fn translate_xmlparser_textpos(o: xmlparser::TextPos) -> XmlTextPos {
561    XmlTextPos {
562        row: o.row,
563        col: o.col,
564    }
565}
566
567#[cfg(feature = "xml")]
568#[inline(always)]
569const fn translate_roxml_textpos(o: roxmltree::TextPos) -> XmlTextPos {
570    XmlTextPos {
571        row: o.row,
572        col: o.col,
573    }
574}
575
576/// Extension trait to add XML parsing capabilities to Dom
577///
578/// This trait provides methods to parse XML/XHTML strings and convert them
579/// into Azul DOM trees. It's implemented as a trait to avoid circular dependencies
580/// between azul-core and azul-layout.
581#[cfg(feature = "xml")]
582pub trait DomXmlExt {
583    /// Parse XML/XHTML string into a DOM tree
584    ///
585    /// This method parses the XML string and converts it to an Azul StyledDom.
586    /// On error, it returns a StyledDom displaying the error message.
587    ///
588    /// # Arguments
589    /// * `xml` - The XML/XHTML string to parse
590    ///
591    /// # Returns
592    /// A `StyledDom` tree representing the parsed XML, or an error DOM on parse failure
593    fn from_xml_string<S: AsRef<str>>(xml: S) -> StyledDom;
594}
595
596#[cfg(feature = "xml")]
597impl DomXmlExt for Dom {
598    fn from_xml_string<S: AsRef<str>>(xml: S) -> StyledDom {
599        let mut component_map = XmlComponentMap::default();
600        let dom_xml = domxml_from_str(xml.as_ref(), &mut component_map);
601        dom_xml.parsed_dom
602    }
603}