Skip to main content

xdoc/core/
tree.rs

1use std::fmt;
2
3use super::{
4    validate_xml_name, ErrorKind, NamespaceDeclaration, NamespaceTable, QName, XmlError, XmlResult,
5};
6
7/// Opaque identifier for a node stored in a document arena.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
9pub struct NodeId(usize);
10
11impl NodeId {
12    pub fn index(self) -> usize {
13        self.0
14    }
15}
16
17/// XML document represented as a typed tree backed by node IDs.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct Document {
20    root: Option<NodeId>,
21    nodes: Vec<Node>,
22    namespaces: NamespaceTable,
23}
24
25impl Document {
26    pub fn new() -> Self {
27        Self {
28            root: None,
29            nodes: Vec::new(),
30            namespaces: NamespaceTable::new(),
31        }
32    }
33
34    pub fn root(&self) -> Option<NodeId> {
35        self.root
36    }
37
38    pub fn namespaces(&self) -> &NamespaceTable {
39        &self.namespaces
40    }
41
42    pub fn namespaces_mut(&mut self) -> &mut NamespaceTable {
43        &mut self.namespaces
44    }
45
46    pub fn node(&self, id: NodeId) -> XmlResult<&Node> {
47        self.nodes
48            .get(id.index())
49            .filter(|node| node.id == id)
50            .ok_or_else(|| invalid_node_id(id))
51    }
52
53    pub fn parent(&self, id: NodeId) -> XmlResult<Option<NodeId>> {
54        Ok(self.node(id)?.parent)
55    }
56
57    pub fn children(&self, id: NodeId) -> XmlResult<&[NodeId]> {
58        match &self.node(id)?.kind {
59            NodeKind::Element(element) => Ok(&element.children),
60            _ => Err(XmlError::new(
61                ErrorKind::InvalidOperation,
62                "only element nodes can have children",
63            )),
64        }
65    }
66
67    pub fn add_root_element(&mut self, name: QName) -> XmlResult<NodeId> {
68        if self.root.is_some() {
69            return Err(XmlError::new(
70                ErrorKind::InvalidOperation,
71                "document already has a root element",
72            ));
73        }
74
75        let root = self.push_node(None, NodeKind::Element(ElementData::new(name)));
76        self.root = Some(root);
77        Ok(root)
78    }
79
80    pub fn add_element(&mut self, parent: NodeId, name: QName) -> XmlResult<NodeId> {
81        self.add_child(parent, NodeKind::Element(ElementData::new(name)))
82    }
83
84    pub fn add_text(&mut self, parent: NodeId, text: impl Into<String>) -> XmlResult<NodeId> {
85        self.add_child(parent, NodeKind::Text(text.into()))
86    }
87
88    pub fn add_comment(&mut self, parent: NodeId, text: impl Into<String>) -> XmlResult<NodeId> {
89        self.add_child(parent, NodeKind::Comment(text.into()))
90    }
91
92    pub fn add_cdata(&mut self, parent: NodeId, text: impl Into<String>) -> XmlResult<NodeId> {
93        self.add_child(parent, NodeKind::CData(text.into()))
94    }
95
96    pub fn add_processing_instruction(
97        &mut self,
98        parent: NodeId,
99        target: impl Into<String>,
100        data: Option<impl Into<String>>,
101    ) -> XmlResult<NodeId> {
102        let target = target.into();
103        validate_xml_name(&target)?;
104        self.add_child(
105            parent,
106            NodeKind::ProcessingInstruction {
107                target,
108                data: data.map(Into::into),
109            },
110        )
111    }
112
113    pub fn add_attribute(&mut self, element: NodeId, attribute: Attribute) -> XmlResult<()> {
114        self.element_mut(element)?.attributes.push(attribute);
115        Ok(())
116    }
117
118    pub fn add_namespace_declaration(
119        &mut self,
120        element: NodeId,
121        declaration: NamespaceDeclaration,
122    ) -> XmlResult<()> {
123        self.element_mut(element)?
124            .namespace_declarations
125            .push(declaration);
126        Ok(())
127    }
128
129    pub fn path(&self, id: NodeId) -> XmlResult<XmlPath> {
130        self.node(id)?;
131
132        let mut segments = Vec::new();
133        let mut current = Some(id);
134        while let Some(current_id) = current {
135            let node = self.node(current_id)?;
136            segments.push(path_segment(node));
137            current = node.parent;
138        }
139        segments.reverse();
140
141        Ok(XmlPath::new(segments))
142    }
143
144    fn add_child(&mut self, parent: NodeId, kind: NodeKind) -> XmlResult<NodeId> {
145        self.node(parent)?;
146        if !matches!(self.node(parent)?.kind, NodeKind::Element(_)) {
147            return Err(XmlError::new(
148                ErrorKind::InvalidOperation,
149                "only element nodes can have children",
150            ));
151        }
152
153        let child = self.push_node(Some(parent), kind);
154        self.element_mut(parent)?.children.push(child);
155        Ok(child)
156    }
157
158    fn push_node(&mut self, parent: Option<NodeId>, kind: NodeKind) -> NodeId {
159        let id = NodeId(self.nodes.len());
160        self.nodes.push(Node { id, parent, kind });
161        id
162    }
163
164    fn element_mut(&mut self, id: NodeId) -> XmlResult<&mut ElementData> {
165        match &mut self
166            .nodes
167            .get_mut(id.index())
168            .filter(|node| node.id == id)
169            .ok_or_else(|| invalid_node_id(id))?
170            .kind
171        {
172            NodeKind::Element(element) => Ok(element),
173            _ => Err(XmlError::new(
174                ErrorKind::InvalidOperation,
175                "operation requires an element node",
176            )),
177        }
178    }
179}
180
181impl Default for Document {
182    fn default() -> Self {
183        Self::new()
184    }
185}
186
187/// Node stored in a document arena.
188#[derive(Debug, Clone, PartialEq, Eq)]
189pub struct Node {
190    id: NodeId,
191    parent: Option<NodeId>,
192    kind: NodeKind,
193}
194
195impl Node {
196    pub fn id(&self) -> NodeId {
197        self.id
198    }
199
200    pub fn parent(&self) -> Option<NodeId> {
201        self.parent
202    }
203
204    pub fn kind(&self) -> &NodeKind {
205        &self.kind
206    }
207}
208
209/// Supported XML node kinds in the core tree model.
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub enum NodeKind {
212    Element(ElementData),
213    Text(String),
214    Comment(String),
215    CData(String),
216    ProcessingInstruction {
217        target: String,
218        data: Option<String>,
219    },
220}
221
222/// Data carried by an XML element node.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct ElementData {
225    name: QName,
226    attributes: Vec<Attribute>,
227    children: Vec<NodeId>,
228    namespace_declarations: Vec<NamespaceDeclaration>,
229}
230
231impl ElementData {
232    pub fn new(name: QName) -> Self {
233        Self {
234            name,
235            attributes: Vec::new(),
236            children: Vec::new(),
237            namespace_declarations: Vec::new(),
238        }
239    }
240
241    pub fn name(&self) -> &QName {
242        &self.name
243    }
244
245    pub fn attributes(&self) -> &[Attribute] {
246        &self.attributes
247    }
248
249    pub fn children(&self) -> &[NodeId] {
250        &self.children
251    }
252
253    pub fn namespace_declarations(&self) -> &[NamespaceDeclaration] {
254        &self.namespace_declarations
255    }
256}
257
258/// XML attribute stored on an element.
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct Attribute {
261    name: QName,
262    value: String,
263}
264
265impl Attribute {
266    pub fn new(name: QName, value: impl Into<String>) -> Self {
267        Self {
268            name,
269            value: value.into(),
270        }
271    }
272
273    pub fn name(&self) -> &QName {
274        &self.name
275    }
276
277    pub fn value(&self) -> &str {
278        &self.value
279    }
280}
281
282/// Logical path to a node in a document tree.
283#[derive(Debug, Clone, PartialEq, Eq)]
284pub struct XmlPath {
285    segments: Vec<String>,
286}
287
288impl XmlPath {
289    pub fn new(segments: Vec<String>) -> Self {
290        Self { segments }
291    }
292
293    pub fn segments(&self) -> &[String] {
294        &self.segments
295    }
296}
297
298impl fmt::Display for XmlPath {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        if self.segments.is_empty() {
301            return f.write_str("/");
302        }
303
304        write!(f, "/{}", self.segments.join("/"))
305    }
306}
307
308fn invalid_node_id(id: NodeId) -> XmlError {
309    XmlError::new(
310        ErrorKind::NotFound,
311        format!("node id {} does not exist in this document", id.index()),
312    )
313}
314
315fn path_segment(node: &Node) -> String {
316    match &node.kind {
317        NodeKind::Element(element) => element.name().lexical_name(),
318        NodeKind::Text(_) => "text()".to_owned(),
319        NodeKind::Comment(_) => "comment()".to_owned(),
320        NodeKind::CData(_) => "cdata()".to_owned(),
321        NodeKind::ProcessingInstruction { target, .. } => {
322            format!("processing-instruction({target})")
323        }
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::core::{NamespacePrefix, NamespaceUri};
331
332    fn qname(local: &str) -> QName {
333        QName::new(local).expect("valid name")
334    }
335
336    #[test]
337    fn document_new_creates_empty_document() {
338        let document = Document::new();
339
340        assert_eq!(document.root(), None);
341        assert!(document.namespaces().is_empty());
342    }
343
344    #[test]
345    fn document_tree_creates_three_levels() {
346        let mut document = Document::new();
347        let root = document
348            .add_root_element(qname("Root"))
349            .expect("root element");
350        let child = document
351            .add_element(root, qname("Child"))
352            .expect("child element");
353        let grandchild = document
354            .add_element(child, qname("Grandchild"))
355            .expect("grandchild element");
356        let text = document.add_text(grandchild, "value").expect("text node");
357
358        assert_eq!(document.root(), Some(root));
359        assert_eq!(document.parent(child).expect("parent"), Some(root));
360        assert_eq!(document.parent(grandchild).expect("parent"), Some(child));
361        assert_eq!(document.parent(text).expect("parent"), Some(grandchild));
362    }
363
364    #[test]
365    fn navigation_lists_children_and_parent() {
366        let mut document = Document::new();
367        let root = document
368            .add_root_element(qname("Root"))
369            .expect("root element");
370        let first = document.add_element(root, qname("First")).expect("child");
371        let second = document.add_element(root, qname("Second")).expect("child");
372
373        assert_eq!(document.children(root).expect("children"), &[first, second]);
374        assert_eq!(document.parent(first).expect("parent"), Some(root));
375        assert_eq!(document.parent(root).expect("parent"), None);
376    }
377
378    #[test]
379    fn node_kinds_support_text_comment_cdata_and_processing_instruction() {
380        let mut document = Document::new();
381        let root = document
382            .add_root_element(qname("Root"))
383            .expect("root element");
384
385        let text = document.add_text(root, "text").expect("text");
386        let comment = document.add_comment(root, "comment").expect("comment");
387        let cdata = document.add_cdata(root, "raw <xml>").expect("cdata");
388        let instruction = document
389            .add_processing_instruction(root, "xml-stylesheet", Some("href=\"style.xsl\""))
390            .expect("processing instruction");
391
392        assert!(
393            matches!(document.node(text).expect("node").kind(), NodeKind::Text(value) if value == "text")
394        );
395        assert!(
396            matches!(document.node(comment).expect("node").kind(), NodeKind::Comment(value) if value == "comment")
397        );
398        assert!(
399            matches!(document.node(cdata).expect("node").kind(), NodeKind::CData(value) if value == "raw <xml>")
400        );
401        assert!(matches!(
402            document.node(instruction).expect("node").kind(),
403            NodeKind::ProcessingInstruction { target, data }
404                if target == "xml-stylesheet" && data.as_deref() == Some("href=\"style.xsl\"")
405        ));
406    }
407
408    #[test]
409    fn document_adds_attributes_and_namespaces_to_elements() {
410        let mut document = Document::new();
411        let root = document
412            .add_root_element(QName::qualified("doc", "Root", "urn:doc").expect("valid qname"))
413            .expect("root element");
414
415        document
416            .add_attribute(root, Attribute::new(qname("id"), "123"))
417            .expect("attribute");
418        document
419            .add_namespace_declaration(
420                root,
421                NamespaceDeclaration::prefixed("doc", "urn:doc").expect("namespace"),
422            )
423            .expect("namespace declaration");
424
425        let root_node = document.node(root).expect("root node");
426        let NodeKind::Element(element) = root_node.kind() else {
427            panic!("root must be element");
428        };
429
430        assert_eq!(element.attributes()[0].name().local(), "id");
431        assert_eq!(element.attributes()[0].value(), "123");
432        assert_eq!(
433            element.namespace_declarations()[0]
434                .prefix()
435                .map(NamespacePrefix::as_str),
436            Some("doc")
437        );
438        assert_eq!(
439            element.namespace_declarations()[0].uri().as_str(),
440            "urn:doc"
441        );
442    }
443
444    #[test]
445    fn document_rejects_second_root() {
446        let mut document = Document::new();
447        document
448            .add_root_element(qname("Root"))
449            .expect("root element");
450
451        let error = document
452            .add_root_element(qname("OtherRoot"))
453            .expect_err("second root must fail");
454
455        assert_eq!(error.kind(), &ErrorKind::InvalidOperation);
456    }
457
458    #[test]
459    fn document_rejects_invalid_node_id() {
460        let document = Document::new();
461        let error = document
462            .node(NodeId(99))
463            .expect_err("invalid node id must fail");
464
465        assert_eq!(error.kind(), &ErrorKind::NotFound);
466    }
467
468    #[test]
469    fn node_rejects_children_on_non_element_nodes() {
470        let mut document = Document::new();
471        let root = document
472            .add_root_element(qname("Root"))
473            .expect("root element");
474        let text = document.add_text(root, "text").expect("text node");
475
476        let error = document
477            .add_text(text, "nested")
478            .expect_err("text cannot have child nodes");
479
480        assert_eq!(error.kind(), &ErrorKind::InvalidOperation);
481    }
482
483    #[test]
484    fn xml_path_identifies_logical_location() {
485        let mut document = Document::new();
486        let root = document
487            .add_root_element(qname("Root"))
488            .expect("root element");
489        let child = document.add_element(root, qname("Child")).expect("child");
490        let text = document.add_text(child, "value").expect("text");
491
492        let path = document.path(text).expect("path");
493
494        assert_eq!(
495            path.segments(),
496            &["Root".to_owned(), "Child".to_owned(), "text()".to_owned()]
497        );
498        assert_eq!(path.to_string(), "/Root/Child/text()");
499    }
500
501    #[test]
502    fn document_namespace_table_is_available() {
503        let mut document = Document::new();
504        document
505            .namespaces_mut()
506            .declare_default("urn:default")
507            .expect("default namespace");
508
509        assert_eq!(
510            document
511                .namespaces()
512                .default_namespace()
513                .map(NamespaceUri::as_str),
514            Some("urn:default")
515        );
516    }
517}