Skip to main content

xml_3dm/node/
mod.rs

1//! Node structures for XML tree representation.
2//!
3//! This module provides the core node types used to represent XML documents
4//! as trees. The design mirrors the Java implementation with BaseNode and
5//! BranchNode, unified into a single `NodeInner` with different `NodeKind`s.
6
7pub mod base;
8pub mod branch;
9mod match_area;
10mod matched_nodes;
11pub mod namespace;
12mod xml_content;
13
14pub use base::BaseNode;
15pub use branch::BranchNode;
16pub use match_area::{MatchArea, WeakNodeRef};
17pub use matched_nodes::MatchedNodes;
18pub use namespace::{is_xmlns_attr, split_qname, ExpandedName, NamespaceContext};
19pub use xml_content::{
20    java_string_hash, XmlComment, XmlContent, XmlElement, XmlProcessingInstruction, XmlText,
21};
22
23use bitflags::bitflags;
24use std::cell::RefCell;
25use std::rc::{Rc, Weak};
26use std::sync::atomic::{AtomicU64, Ordering};
27
28/// Global counter for generating unique node IDs.
29static NODE_ID_COUNTER: AtomicU64 = AtomicU64::new(1);
30
31/// Generates a unique node ID.
32fn next_node_id() -> u64 {
33    NODE_ID_COUNTER.fetch_add(1, Ordering::Relaxed)
34}
35
36bitflags! {
37    /// Match type flags indicating how a branch node matches its base node.
38    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39    pub struct MatchType: u8 {
40        /// No match.
41        const NONE = 0;
42        /// Content (tag name and attributes, or text) matches.
43        const CONTENT = 1;
44        /// Children structure matches.
45        const CHILDREN = 2;
46        /// Both content and children match.
47        const FULL = Self::CONTENT.bits() | Self::CHILDREN.bits();
48    }
49}
50
51/// A reference-counted pointer to a node.
52pub type NodeRef = Rc<RefCell<NodeInner>>;
53
54/// Creates a new node reference.
55pub fn new_node_ref(inner: NodeInner) -> NodeRef {
56    Rc::new(RefCell::new(inner))
57}
58
59/// The inner data of a node in the parse tree.
60///
61/// Each node has:
62/// - 0 or more children
63/// - XML content (element or text)
64/// - A parent (except for root)
65/// - A position among siblings
66/// - An optional match area tag
67/// - Kind-specific data (Base or Branch)
68#[derive(Debug)]
69pub struct NodeInner {
70    /// Unique identifier for this node.
71    id: u64,
72    /// Child nodes.
73    children: Vec<NodeRef>,
74    /// XML content of this node.
75    content: Option<XmlContent>,
76    /// Weak reference to parent node.
77    parent: Weak<RefCell<NodeInner>>,
78    /// Zero-based position among siblings (-1 for root).
79    child_pos: i32,
80    /// Match area tag for grouping matched subtrees.
81    match_area: Option<Rc<MatchArea>>,
82    /// Kind-specific data (Base or Branch).
83    kind: NodeKind,
84}
85
86/// Kind-specific data for nodes.
87#[derive(Debug)]
88pub enum NodeKind {
89    /// A base node matched to two branches.
90    Base {
91        /// Matches from the left branch.
92        left: Rc<RefCell<MatchedNodes>>,
93        /// Matches from the right branch.
94        right: Rc<RefCell<MatchedNodes>>,
95    },
96    /// A branch node (left or right) matched to a base tree.
97    Branch {
98        /// The base node this branch node is matched to.
99        base_match: Weak<RefCell<NodeInner>>,
100        /// Partner nodes in the other branch tree.
101        partners: Option<Rc<RefCell<MatchedNodes>>>,
102        /// How this node matches its base node.
103        match_type: MatchType,
104        /// Whether this node is in the left tree.
105        is_left_tree: Option<bool>,
106    },
107}
108
109impl NodeInner {
110    /// Creates a new base node with the given content.
111    pub fn new_base(content: Option<XmlContent>) -> Self {
112        let id = next_node_id();
113        // Create a placeholder weak ref - will be set properly after node is wrapped in Rc
114        let weak_self = Weak::new();
115        let left = Rc::new(RefCell::new(MatchedNodes::new(weak_self.clone())));
116        let right = Rc::new(RefCell::new(MatchedNodes::new(weak_self)));
117
118        NodeInner {
119            id,
120            children: Vec::new(),
121            content,
122            parent: Weak::new(),
123            child_pos: -1,
124            match_area: None,
125            kind: NodeKind::Base { left, right },
126        }
127    }
128
129    /// Creates a new branch node with the given content.
130    pub fn new_branch(content: Option<XmlContent>) -> Self {
131        NodeInner {
132            id: next_node_id(),
133            children: Vec::new(),
134            content,
135            parent: Weak::new(),
136            child_pos: -1,
137            match_area: None,
138            kind: NodeKind::Branch {
139                base_match: Weak::new(),
140                partners: None,
141                match_type: MatchType::NONE,
142                is_left_tree: None,
143            },
144        }
145    }
146
147    /// Returns the unique ID of this node.
148    pub fn id(&self) -> u64 {
149        self.id
150    }
151
152    /// Returns true if this is a base node.
153    pub fn is_base(&self) -> bool {
154        matches!(self.kind, NodeKind::Base { .. })
155    }
156
157    /// Returns true if this is a branch node.
158    pub fn is_branch(&self) -> bool {
159        matches!(self.kind, NodeKind::Branch { .. })
160    }
161
162    /// Returns the content of this node.
163    pub fn content(&self) -> Option<&XmlContent> {
164        self.content.as_ref()
165    }
166
167    /// Returns a mutable reference to the content.
168    pub fn content_mut(&mut self) -> Option<&mut XmlContent> {
169        self.content.as_mut()
170    }
171
172    /// Sets the content of this node.
173    pub fn set_content(&mut self, content: Option<XmlContent>) {
174        self.content = content;
175    }
176
177    /// Returns the number of children.
178    pub fn child_count(&self) -> usize {
179        self.children.len()
180    }
181
182    /// Returns a reference to the child at the given index.
183    pub fn child(&self, index: usize) -> Option<&NodeRef> {
184        self.children.get(index)
185    }
186
187    /// Returns the children as a slice.
188    pub fn children(&self) -> &[NodeRef] {
189        &self.children
190    }
191
192    /// Returns a weak reference to the parent.
193    pub fn parent(&self) -> &Weak<RefCell<NodeInner>> {
194        &self.parent
195    }
196
197    /// Returns the child position (0-based index among siblings, -1 for root).
198    pub fn child_pos(&self) -> i32 {
199        self.child_pos
200    }
201
202    /// Returns the match area, if set.
203    pub fn match_area(&self) -> Option<&Rc<MatchArea>> {
204        self.match_area.as_ref()
205    }
206
207    /// Sets the match area.
208    pub fn set_match_area(&mut self, area: Option<Rc<MatchArea>>) {
209        self.match_area = area;
210    }
211
212    /// Returns the node kind.
213    pub fn kind(&self) -> &NodeKind {
214        &self.kind
215    }
216
217    /// Returns a mutable reference to the node kind.
218    pub fn kind_mut(&mut self) -> &mut NodeKind {
219        &mut self.kind
220    }
221
222    /// Returns true if this node has a left sibling.
223    pub fn has_left_sibling(&self) -> bool {
224        self.child_pos > 0
225    }
226
227    /// Returns true if this node has a right sibling.
228    pub fn has_right_sibling(&self) -> bool {
229        if let Some(parent) = self.parent.upgrade() {
230            let parent_borrowed = parent.borrow();
231            self.child_pos >= 0 && (self.child_pos as usize) < parent_borrowed.children.len() - 1
232        } else {
233            false
234        }
235    }
236
237    /// Returns the match type for branch nodes, None for base nodes.
238    pub fn match_type(&self) -> Option<MatchType> {
239        match &self.kind {
240            NodeKind::Branch { match_type, .. } => Some(*match_type),
241            NodeKind::Base { .. } => None,
242        }
243    }
244
245    /// Returns the base match for branch nodes (a weak reference to the matched base node).
246    /// Returns None for base nodes or if no match has been set.
247    pub fn get_base_match(&self) -> Option<&Weak<RefCell<NodeInner>>> {
248        match &self.kind {
249            NodeKind::Branch { base_match, .. } => {
250                // Return Some only if the weak ref is non-empty (can be upgraded)
251                if base_match.strong_count() > 0 {
252                    Some(base_match)
253                } else {
254                    None
255                }
256            }
257            NodeKind::Base { .. } => None,
258        }
259    }
260
261    /// Sets the base match for a branch node.
262    /// Does nothing for base nodes.
263    pub fn set_base_match(&mut self, base: &NodeRef) {
264        if let NodeKind::Branch { base_match, .. } = &mut self.kind {
265            *base_match = Rc::downgrade(base);
266        }
267    }
268
269    /// Sets the match type for a branch node.
270    /// Does nothing for base nodes.
271    pub fn set_match_type(&mut self, mt: MatchType) {
272        if let NodeKind::Branch { match_type, .. } = &mut self.kind {
273            *match_type = mt;
274        }
275    }
276
277    /// Clears the base match for a branch node.
278    pub fn del_base_match(&mut self) {
279        if let NodeKind::Branch { base_match, .. } = &mut self.kind {
280            *base_match = Weak::new();
281        }
282    }
283}
284
285/// Helper functions that work with NodeRef.
286impl NodeInner {
287    /// Adds a child node. Must be called on the NodeRef wrapper.
288    pub fn add_child_to_ref(parent_ref: &NodeRef, child_ref: NodeRef) {
289        {
290            let mut child = child_ref.borrow_mut();
291            child.parent = Rc::downgrade(parent_ref);
292            child.child_pos = parent_ref.borrow().children.len() as i32;
293        }
294        parent_ref.borrow_mut().children.push(child_ref);
295    }
296
297    /// Inserts a child at the given index.
298    pub fn add_child_at_to_ref(parent_ref: &NodeRef, index: usize, child_ref: NodeRef) {
299        {
300            let mut child = child_ref.borrow_mut();
301            child.parent = Rc::downgrade(parent_ref);
302            child.child_pos = index as i32;
303        }
304        {
305            let mut parent = parent_ref.borrow_mut();
306            parent.children.insert(index, child_ref);
307            // Update child positions for siblings after the insertion point
308            for i in (index + 1)..parent.children.len() {
309                parent.children[i].borrow_mut().child_pos = i as i32;
310            }
311        }
312    }
313
314    /// Replaces a child at the given index.
315    pub fn replace_child_to_ref(parent_ref: &NodeRef, index: usize, child_ref: NodeRef) {
316        let mut parent = parent_ref.borrow_mut();
317        if index < parent.children.len() {
318            parent.children[index] = child_ref;
319        }
320    }
321
322    /// Removes the child at the given index.
323    pub fn remove_child_to_ref(parent_ref: &NodeRef, index: usize) {
324        let mut parent = parent_ref.borrow_mut();
325        if index < parent.children.len() {
326            parent.children.remove(index);
327            // Update child positions for siblings after the removal point
328            for i in index..parent.children.len() {
329                parent.children[i].borrow_mut().child_pos = i as i32;
330            }
331        }
332    }
333
334    /// Removes all children.
335    pub fn remove_children_to_ref(parent_ref: &NodeRef) {
336        parent_ref.borrow_mut().children.clear();
337    }
338
339    /// Gets the left sibling of a node.
340    pub fn left_sibling_of_ref(node_ref: &NodeRef) -> Option<NodeRef> {
341        let node = node_ref.borrow();
342        if node.child_pos <= 0 {
343            return None;
344        }
345        if let Some(parent) = node.parent.upgrade() {
346            let parent_borrowed = parent.borrow();
347            let sibling_index = (node.child_pos - 1) as usize;
348            parent_borrowed.children.get(sibling_index).cloned()
349        } else {
350            None
351        }
352    }
353
354    /// Gets the right sibling of a node.
355    pub fn right_sibling_of_ref(node_ref: &NodeRef) -> Option<NodeRef> {
356        let node = node_ref.borrow();
357        if node.child_pos < 0 {
358            return None;
359        }
360        if let Some(parent) = node.parent.upgrade() {
361            let parent_borrowed = parent.borrow();
362            let sibling_index = (node.child_pos + 1) as usize;
363            if sibling_index < parent_borrowed.children.len() {
364                parent_borrowed.children.get(sibling_index).cloned()
365            } else {
366                None
367            }
368        } else {
369            None
370        }
371    }
372
373    /// Initializes the owner weak refs for a base node's MatchedNodes.
374    /// Must be called after the node is wrapped in Rc.
375    pub fn init_base_node_owner(node_ref: &NodeRef) {
376        let is_base = {
377            let node = node_ref.borrow();
378            matches!(node.kind, NodeKind::Base { .. })
379        };
380
381        if is_base {
382            let weak_self = Rc::downgrade(node_ref);
383            // We need to recreate the MatchedNodes with the proper owner
384            let new_left = Rc::new(RefCell::new(MatchedNodes::new(weak_self.clone())));
385            let new_right = Rc::new(RefCell::new(MatchedNodes::new(weak_self)));
386
387            let mut node = node_ref.borrow_mut();
388            if let NodeKind::Base { left, right } = &mut node.kind {
389                *left = new_left;
390                *right = new_right;
391            }
392        }
393    }
394}
395
396/// Creates a new base node wrapped in NodeRef with properly initialized owner refs.
397pub fn new_base_node(content: Option<XmlContent>) -> NodeRef {
398    let node_ref = new_node_ref(NodeInner::new_base(content));
399    NodeInner::init_base_node_owner(&node_ref);
400    node_ref
401}
402
403/// Creates a new branch node wrapped in NodeRef.
404pub fn new_branch_node(content: Option<XmlContent>) -> NodeRef {
405    new_node_ref(NodeInner::new_branch(content))
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use std::collections::HashMap;
412
413    #[test]
414    fn test_node_creation() {
415        let base = new_base_node(Some(XmlContent::Element(XmlElement::new(
416            "root".to_string(),
417            HashMap::new(),
418        ))));
419        assert!(base.borrow().is_base());
420        assert!(!base.borrow().is_branch());
421
422        let branch = new_branch_node(Some(XmlContent::Text(XmlText::new("hello"))));
423        assert!(!branch.borrow().is_base());
424        assert!(branch.borrow().is_branch());
425    }
426
427    #[test]
428    fn test_add_child() {
429        let parent = new_base_node(Some(XmlContent::Element(XmlElement::new(
430            "parent".to_string(),
431            HashMap::new(),
432        ))));
433        let child1 = new_base_node(Some(XmlContent::Element(XmlElement::new(
434            "child1".to_string(),
435            HashMap::new(),
436        ))));
437        let child2 = new_base_node(Some(XmlContent::Element(XmlElement::new(
438            "child2".to_string(),
439            HashMap::new(),
440        ))));
441
442        NodeInner::add_child_to_ref(&parent, child1.clone());
443        NodeInner::add_child_to_ref(&parent, child2.clone());
444
445        assert_eq!(parent.borrow().child_count(), 2);
446        assert_eq!(child1.borrow().child_pos(), 0);
447        assert_eq!(child2.borrow().child_pos(), 1);
448    }
449
450    #[test]
451    fn test_siblings() {
452        let parent = new_base_node(None);
453        let child1 = new_base_node(None);
454        let child2 = new_base_node(None);
455        let child3 = new_base_node(None);
456
457        NodeInner::add_child_to_ref(&parent, child1.clone());
458        NodeInner::add_child_to_ref(&parent, child2.clone());
459        NodeInner::add_child_to_ref(&parent, child3.clone());
460
461        // child1 has no left sibling, has right sibling
462        assert!(!child1.borrow().has_left_sibling());
463        assert!(child1.borrow().has_right_sibling());
464        assert!(NodeInner::left_sibling_of_ref(&child1).is_none());
465        assert!(NodeInner::right_sibling_of_ref(&child1).is_some());
466
467        // child2 has both siblings
468        assert!(child2.borrow().has_left_sibling());
469        assert!(child2.borrow().has_right_sibling());
470        let left = NodeInner::left_sibling_of_ref(&child2).unwrap();
471        let right = NodeInner::right_sibling_of_ref(&child2).unwrap();
472        assert_eq!(left.borrow().id(), child1.borrow().id());
473        assert_eq!(right.borrow().id(), child3.borrow().id());
474
475        // child3 has left sibling, no right sibling
476        assert!(child3.borrow().has_left_sibling());
477        assert!(!child3.borrow().has_right_sibling());
478    }
479
480    #[test]
481    fn test_remove_child() {
482        let parent = new_base_node(None);
483        let child1 = new_base_node(None);
484        let child2 = new_base_node(None);
485        let child3 = new_base_node(None);
486
487        NodeInner::add_child_to_ref(&parent, child1.clone());
488        NodeInner::add_child_to_ref(&parent, child2.clone());
489        NodeInner::add_child_to_ref(&parent, child3.clone());
490
491        // Remove middle child
492        NodeInner::remove_child_to_ref(&parent, 1);
493
494        assert_eq!(parent.borrow().child_count(), 2);
495        assert_eq!(child1.borrow().child_pos(), 0);
496        assert_eq!(child3.borrow().child_pos(), 1);
497    }
498
499    #[test]
500    fn test_insert_child() {
501        let parent = new_base_node(None);
502        let child1 = new_base_node(None);
503        let child2 = new_base_node(None);
504        let child3 = new_base_node(None);
505
506        NodeInner::add_child_to_ref(&parent, child1.clone());
507        NodeInner::add_child_to_ref(&parent, child3.clone());
508
509        // Insert child2 in the middle
510        NodeInner::add_child_at_to_ref(&parent, 1, child2.clone());
511
512        assert_eq!(parent.borrow().child_count(), 3);
513        assert_eq!(child1.borrow().child_pos(), 0);
514        assert_eq!(child2.borrow().child_pos(), 1);
515        assert_eq!(child3.borrow().child_pos(), 2);
516    }
517
518    #[test]
519    fn test_match_type_flags() {
520        assert_eq!(MatchType::FULL, MatchType::CONTENT | MatchType::CHILDREN);
521        assert!(MatchType::FULL.contains(MatchType::CONTENT));
522        assert!(MatchType::FULL.contains(MatchType::CHILDREN));
523        assert!(!MatchType::CONTENT.contains(MatchType::CHILDREN));
524    }
525
526    #[test]
527    fn test_unique_node_ids() {
528        let node1 = new_base_node(None);
529        let node2 = new_base_node(None);
530        let node3 = new_branch_node(None);
531
532        assert_ne!(node1.borrow().id(), node2.borrow().id());
533        assert_ne!(node2.borrow().id(), node3.borrow().id());
534        assert_ne!(node1.borrow().id(), node3.borrow().id());
535    }
536}