typst_library/loading/
xml.rs

1use ecow::EcoString;
2use roxmltree::ParsingOptions;
3use typst_syntax::Spanned;
4
5use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
6use crate::engine::Engine;
7use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
8use crate::loading::{DataSource, Load, Readable};
9
10/// Reads structured data from an XML file.
11///
12/// The XML file is parsed into an array of dictionaries and strings. XML nodes
13/// can be elements or strings. Elements are represented as dictionaries with
14/// the following keys:
15///
16/// - `tag`: The name of the element as a string.
17/// - `attrs`: A dictionary of the element's attributes as strings.
18/// - `children`: An array of the element's child nodes.
19///
20/// The XML file in the example contains a root `news` tag with multiple
21/// `article` tags. Each article has a `title`, `author`, and `content` tag. The
22/// `content` tag contains one or more paragraphs, which are represented as `p`
23/// tags.
24///
25/// # Example
26/// ```example
27/// #let find-child(elem, tag) = {
28///   elem.children
29///     .find(e => "tag" in e and e.tag == tag)
30/// }
31///
32/// #let article(elem) = {
33///   let title = find-child(elem, "title")
34///   let author = find-child(elem, "author")
35///   let pars = find-child(elem, "content")
36///
37///   [= #title.children.first()]
38///   text(10pt, weight: "medium")[
39///     Published by
40///     #author.children.first()
41///   ]
42///
43///   for p in pars.children {
44///     if type(p) == dictionary {
45///       parbreak()
46///       p.children.first()
47///     }
48///   }
49/// }
50///
51/// #let data = xml("example.xml")
52/// #for elem in data.first().children {
53///   if type(elem) == dictionary {
54///     article(elem)
55///   }
56/// }
57/// ```
58#[func(scope, title = "XML")]
59pub fn xml(
60    engine: &mut Engine,
61    /// A [path]($syntax/#paths) to an XML file or raw XML bytes.
62    source: Spanned<DataSource>,
63) -> SourceResult<Value> {
64    let data = source.load(engine.world)?;
65    let text = data.as_str().map_err(FileError::from).at(source.span)?;
66    let document = roxmltree::Document::parse_with_options(
67        text,
68        ParsingOptions { allow_dtd: true, ..Default::default() },
69    )
70    .map_err(format_xml_error)
71    .at(source.span)?;
72    Ok(convert_xml(document.root()))
73}
74
75#[scope]
76impl xml {
77    /// Reads structured data from an XML string/bytes.
78    #[func(title = "Decode XML")]
79    #[deprecated = "`xml.decode` is deprecated, directly pass bytes to `xml` instead"]
80    pub fn decode(
81        engine: &mut Engine,
82        /// XML data.
83        data: Spanned<Readable>,
84    ) -> SourceResult<Value> {
85        xml(engine, data.map(Readable::into_source))
86    }
87}
88
89/// Convert an XML node to a Typst value.
90fn convert_xml(node: roxmltree::Node) -> Value {
91    if node.is_text() {
92        return node.text().unwrap_or_default().into_value();
93    }
94
95    let children: Array = node.children().map(convert_xml).collect();
96    if node.is_root() {
97        return Value::Array(children);
98    }
99
100    let tag: Str = node.tag_name().name().into();
101    let attrs: Dict = node
102        .attributes()
103        .map(|attr| (attr.name().into(), attr.value().into_value()))
104        .collect();
105
106    Value::Dict(dict! {
107        "tag" => tag,
108        "attrs" => attrs,
109        "children" => children,
110    })
111}
112
113/// Format the user-facing XML error message.
114fn format_xml_error(error: roxmltree::Error) -> EcoString {
115    format_xml_like_error("XML", error)
116}