typst_library/loading/
xml.rs

1use roxmltree::ParsingOptions;
2use typst_syntax::Spanned;
3
4use crate::diag::{LoadError, LoadedWithin, SourceResult, format_xml_like_error};
5use crate::engine::Engine;
6use crate::foundations::{Array, Dict, IntoValue, Str, Value, dict, func, scope};
7use crate::loading::{DataSource, Load, Readable};
8
9/// Reads structured data from an XML file.
10///
11/// The XML file is parsed into an array of dictionaries and strings. XML nodes
12/// can be elements or strings. Elements are represented as dictionaries with
13/// the following keys:
14///
15/// - `tag`: The name of the element as a string.
16/// - `attrs`: A dictionary of the element's attributes as strings.
17/// - `children`: An array of the element's child nodes.
18///
19/// The XML file in the example contains a root `news` tag with multiple
20/// `article` tags. Each article has a `title`, `author`, and `content` tag. The
21/// `content` tag contains one or more paragraphs, which are represented as `p`
22/// tags.
23///
24/// # Example
25/// ```example
26/// #let find-child(elem, tag) = {
27///   elem.children
28///     .find(e => "tag" in e and e.tag == tag)
29/// }
30///
31/// #let article(elem) = {
32///   let title = find-child(elem, "title")
33///   let author = find-child(elem, "author")
34///   let pars = find-child(elem, "content")
35///
36///   [= #title.children.first()]
37///   text(10pt, weight: "medium")[
38///     Published by
39///     #author.children.first()
40///   ]
41///
42///   for p in pars.children {
43///     if type(p) == dictionary {
44///       parbreak()
45///       p.children.first()
46///     }
47///   }
48/// }
49///
50/// #let data = xml("example.xml")
51/// #for elem in data.first().children {
52///   if type(elem) == dictionary {
53///     article(elem)
54///   }
55/// }
56/// ```
57#[func(scope, title = "XML")]
58pub fn xml(
59    engine: &mut Engine,
60    /// A [path]($syntax/#paths) to an XML file or raw XML bytes.
61    source: Spanned<DataSource>,
62) -> SourceResult<Value> {
63    let loaded = source.load(engine.world)?;
64    let text = loaded.data.as_str().within(&loaded)?;
65    let document = roxmltree::Document::parse_with_options(
66        text,
67        ParsingOptions { allow_dtd: true, ..Default::default() },
68    )
69    .map_err(format_xml_error)
70    .within(&loaded)?;
71    Ok(convert_xml(document.root()))
72}
73
74#[scope]
75impl xml {
76    /// Reads structured data from an XML string/bytes.
77    #[func(title = "Decode XML")]
78    #[deprecated(
79        message = "`xml.decode` is deprecated, directly pass bytes to `xml` instead",
80        until = "0.15.0"
81    )]
82    pub fn decode(
83        engine: &mut Engine,
84        /// XML data.
85        data: Spanned<Readable>,
86    ) -> SourceResult<Value> {
87        xml(engine, data.map(Readable::into_source))
88    }
89}
90
91/// Convert an XML node to a Typst value.
92fn convert_xml(node: roxmltree::Node) -> Value {
93    if node.is_text() {
94        return node.text().unwrap_or_default().into_value();
95    }
96
97    let children: Array = node.children().map(convert_xml).collect();
98    if node.is_root() {
99        return Value::Array(children);
100    }
101
102    let tag: Str = node.tag_name().name().into();
103    let attrs: Dict = node
104        .attributes()
105        .map(|attr| (attr.name().into(), attr.value().into_value()))
106        .collect();
107
108    Value::Dict(dict! {
109        "tag" => tag,
110        "attrs" => attrs,
111        "children" => children,
112    })
113}
114
115/// Format the user-facing XML error message.
116fn format_xml_error(error: roxmltree::Error) -> LoadError {
117    format_xml_like_error("XML", error)
118}