Skip to main content

oak_xml/ast/
mod.rs

1#![doc = include_str!("readme.md")]
2
3use crate::{XmlElementType, XmlLanguage, XmlTokenType};
4use core::range::Range;
5use oak_core::{source::Source, tree::RedNode};
6use std::borrow::Cow;
7
8/// Root node of the XML AST.
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10#[derive(Clone, Debug, PartialEq)]
11pub struct XmlRoot {
12    /// The root value.
13    pub value: XmlValue,
14}
15
16/// A node in the XML red tree.
17pub type XmlNode<'a> = RedNode<'a, XmlLanguage>;
18
19/// Represents a value in XML.
20#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
21#[derive(Clone, Debug, PartialEq)]
22pub enum XmlValue {
23    /// An XML element.
24    Element(XmlElement),
25    /// Text content.
26    Text(String),
27    /// A comment.
28    Comment(String),
29    /// CDATA section.
30    CData(String),
31    /// Processing instruction.
32    ProcessingInstruction(XmlPI),
33    /// A fragment of multiple values.
34    Fragment(Vec<XmlValue>),
35}
36
37/// Represents an XML element.
38#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
39#[derive(Clone, Debug, PartialEq)]
40pub struct XmlElement {
41    /// The tag name.
42    pub name: String,
43    /// Attributes of the element.
44    pub attributes: Vec<XmlAttribute>,
45    /// Children of the element.
46    pub children: Vec<XmlValue>,
47    /// Source range of the element.
48    #[cfg_attr(feature = "serde", serde(with = "oak_core::serde_range"))]
49    pub span: Range<usize>,
50}
51
52/// Represents an XML attribute.
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54#[derive(Clone, Debug, PartialEq)]
55pub struct XmlAttribute {
56    /// The attribute name.
57    pub name: String,
58    /// The attribute value.
59    pub value: String,
60    /// Source range of the attribute.
61    #[cfg_attr(feature = "serde", serde(with = "oak_core::serde_range"))]
62    pub span: Range<usize>,
63}
64
65/// Represents an XML processing instruction.
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67#[derive(Clone, Debug, PartialEq)]
68pub struct XmlPI {
69    /// The PI target.
70    pub target: String,
71    /// The PI data.
72    pub data: Option<String>,
73    /// Source range of the PI.
74    #[cfg_attr(feature = "serde", serde(with = "oak_core::serde_range"))]
75    pub span: Range<usize>,
76}
77
78impl XmlValue {
79    /// Returns the element if the value is an element.
80    pub fn as_element(&self) -> Option<&XmlElement> {
81        match self {
82            XmlValue::Element(e) => Some(e),
83            _ => None,
84        }
85    }
86
87    /// Returns the text if the value is text.
88    pub fn as_str(&self) -> Option<&str> {
89        match self {
90            XmlValue::Text(s) => Some(s),
91            _ => None,
92        }
93    }
94
95    /// Converts the value to an XML string representation.
96    pub fn to_string(&self) -> String {
97        match self {
98            XmlValue::Text(t) => t.clone(),
99            XmlValue::Comment(c) => format!("<!--{}-->", c),
100            XmlValue::CData(d) => format!("<![CDATA[{}]]>", d),
101            XmlValue::ProcessingInstruction(pi) => {
102                if let Some(ref data) = pi.data {
103                    format!("<?{} {}?>", pi.target, data)
104                }
105                else {
106                    format!("<?{}?>", pi.target)
107                }
108            }
109            XmlValue::Fragment(fs) => {
110                let mut s = String::new();
111                for f in fs {
112                    s.push_str(&f.to_string());
113                }
114                s
115            }
116            XmlValue::Element(e) => {
117                let mut s = format!("<{}", e.name);
118                for attr in &e.attributes {
119                    s.push_str(&format!(" {}=\"{}\"", attr.name, attr.value));
120                }
121                if e.children.is_empty() {
122                    s.push_str("/>");
123                }
124                else {
125                    s.push('>');
126                    for child in &e.children {
127                        s.push_str(&child.to_string());
128                    }
129                    s.push_str(&format!("</{}>", e.name));
130                }
131                s
132            }
133        }
134    }
135}
136
137/// Extension trait for XML red nodes.
138pub trait XmlNodeExt<'a> {
139    /// Returns the tag name of the element.
140    fn tag_name<'s, S: Source + ?Sized>(&self, source: &'s S) -> Option<Cow<'s, str>>;
141    /// Returns the attributes of the element.
142    fn attributes<S: Source + ?Sized>(&self, source: &S) -> Vec<(String, String)>;
143    /// Returns an iterator over the element's children that are elements.
144    fn xml_children(&self) -> impl Iterator<Item = RedNode<'a, XmlLanguage>>;
145    /// Returns a recursive iterator over all element descendants.
146    fn xml_children_recursive(&self) -> impl Iterator<Item = RedNode<'a, XmlLanguage>>;
147    /// Returns the text content of the node.
148    fn text<S: Source + ?Sized>(&self, source: &S) -> String;
149    /// Reads an attribute value by name.
150    fn read_attr<S: Source + ?Sized>(&self, source: &S, name: &str) -> Option<String>;
151}
152
153impl<'a> XmlNodeExt<'a> for RedNode<'a, XmlLanguage> {
154    fn tag_name<'s, S: Source + ?Sized>(&self, source: &'s S) -> Option<Cow<'s, str>> {
155        if self.green.kind != XmlElementType::Element {
156            return None;
157        }
158        for child in self.children() {
159            if let Some(node) = child.as_node() {
160                if node.green.kind == XmlElementType::StartTag || node.green.kind == XmlElementType::SelfClosingTag {
161                    for gc in node.children() {
162                        if let Some(leaf) = gc.as_token() {
163                            if leaf.kind == XmlTokenType::Identifier {
164                                return Some(source.get_text_in(leaf.span));
165                            }
166                        }
167                    }
168                }
169            }
170        }
171        None
172    }
173
174    fn attributes<S: Source + ?Sized>(&self, source: &S) -> Vec<(String, String)> {
175        let mut attrs = Vec::new();
176        if self.green.kind != XmlElementType::Element {
177            return attrs;
178        }
179        for child in self.children() {
180            if let Some(node) = child.as_node() {
181                if node.green.kind == XmlElementType::StartTag || node.green.kind == XmlElementType::SelfClosingTag {
182                    for gc in node.children() {
183                        if let Some(n) = gc.as_node() {
184                            if n.green.kind == XmlElementType::Attribute {
185                                let mut name = String::new();
186                                let mut value = String::new();
187                                for ggc in n.children() {
188                                    if let Some(leaf) = ggc.as_token() {
189                                        if leaf.kind == XmlTokenType::Identifier {
190                                            name = source.get_text_in(leaf.span).into_owned();
191                                        }
192                                        else if leaf.kind == XmlTokenType::AttributeValue {
193                                            let v = source.get_text_in(leaf.span);
194                                            value = v.trim_matches('"').trim_matches('\'').to_string();
195                                        }
196                                    }
197                                }
198                                if !name.is_empty() {
199                                    attrs.push((name, value));
200                                }
201                            }
202                        }
203                    }
204                }
205            }
206        }
207        attrs
208    }
209
210    fn xml_children(&self) -> impl Iterator<Item = RedNode<'a, XmlLanguage>> {
211        self.children().filter_map(|c| c.as_node().filter(|node| node.green.kind == XmlElementType::Element))
212    }
213
214    fn xml_children_recursive(&self) -> impl Iterator<Item = RedNode<'a, XmlLanguage>> {
215        let mut stack = Vec::new();
216        for child in self.xml_children() {
217            stack.push(child);
218        }
219
220        std::iter::from_fn(move || {
221            let next = stack.pop()?;
222            let children = next.xml_children().collect::<Vec<_>>();
223            for child in children.into_iter().rev() {
224                stack.push(child);
225            }
226            Some(next)
227        })
228    }
229
230    fn text<S: Source + ?Sized>(&self, source: &S) -> String {
231        let mut text = String::new();
232        for child in self.children() {
233            if let Some(leaf) = child.as_token() {
234                if leaf.kind == XmlTokenType::Text {
235                    text.push_str(&source.get_text_in(leaf.span));
236                }
237            }
238            else if let Some(node) = child.as_node() {
239                if node.green.kind == XmlElementType::Element {
240                    text.push_str(&node.text(source));
241                }
242            }
243        }
244        text
245    }
246
247    fn read_attr<S: Source + ?Sized>(&self, source: &S, name: &str) -> Option<String> {
248        self.attributes(source).into_iter().find(|(n, _)| n == name).map(|(_, v)| v)
249    }
250}