Skip to main content

xml_3dm/node/
mod.rs

1//! Node structures for XML tree representation.
2//!
3//! This module provides the core node types used to represent XML documents
4//! as trees. The design mirrors the Java implementation with BaseNode and
5//! BranchNode, unified into a single `NodeInner` with different `NodeKind`s.
6
7pub mod base;
8pub mod branch;
9mod match_area;
10mod matched_nodes;
11mod xml_content;
12
13pub use base::BaseNode;
14pub use branch::BranchNode;
15pub use match_area::{MatchArea, WeakNodeRef};
16pub use matched_nodes::MatchedNodes;
17pub use xml_content::{java_string_hash, XmlComment, XmlContent, XmlElement, XmlText};
18
19use bitflags::bitflags;
20use std::cell::RefCell;
21use std::rc::{Rc, Weak};
22use std::sync::atomic::{AtomicU64, Ordering};
23
24/// Global counter for generating unique node IDs.
25static NODE_ID_COUNTER: AtomicU64 = AtomicU64::new(1);
26
27/// Generates a unique node ID.
28fn next_node_id() -> u64 {
29    NODE_ID_COUNTER.fetch_add(1, Ordering::Relaxed)
30}
31
32bitflags! {
33    /// Match type flags indicating how a branch node matches its base node.
34    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
35    pub struct MatchType: u8 {
36        /// No match.
37        const NONE = 0;
38        /// Content (tag name and attributes, or text) matches.
39        const CONTENT = 1;
40        /// Children structure matches.
41        const CHILDREN = 2;
42        /// Both content and children match.
43        const FULL = Self::CONTENT.bits() | Self::CHILDREN.bits();
44    }
45}
46
47/// A reference-counted pointer to a node.
48pub type NodeRef = Rc<RefCell<NodeInner>>;
49
50/// Creates a new node reference.
51pub fn new_node_ref(inner: NodeInner) -> NodeRef {
52    Rc::new(RefCell::new(inner))
53}
54
55/// The inner data of a node in the parse tree.
56///
57/// Each node has:
58/// - 0 or more children
59/// - XML content (element or text)
60/// - A parent (except for root)
61/// - A position among siblings
62/// - An optional match area tag
63/// - Kind-specific data (Base or Branch)
64#[derive(Debug)]
65pub struct NodeInner {
66    /// Unique identifier for this node.
67    id: u64,
68    /// Child nodes.
69    children: Vec<NodeRef>,
70    /// XML content of this node.
71    content: Option<XmlContent>,
72    /// Weak reference to parent node.
73    parent: Weak<RefCell<NodeInner>>,
74    /// Zero-based position among siblings (-1 for root).
75    child_pos: i32,
76    /// Match area tag for grouping matched subtrees.
77    match_area: Option<Rc<MatchArea>>,
78    /// Kind-specific data (Base or Branch).
79    kind: NodeKind,
80}
81
82/// Kind-specific data for nodes.
83#[derive(Debug)]
84pub enum NodeKind {
85    /// A base node matched to two branches.
86    Base {
87        /// Matches from the left branch.
88        left: Rc<RefCell<MatchedNodes>>,
89        /// Matches from the right branch.
90        right: Rc<RefCell<MatchedNodes>>,
91    },
92    /// A branch node (left or right) matched to a base tree.
93    Branch {
94        /// The base node this branch node is matched to.
95        base_match: Weak<RefCell<NodeInner>>,
96        /// Partner nodes in the other branch tree.
97        partners: Option<Rc<RefCell<MatchedNodes>>>,
98        /// How this node matches its base node.
99        match_type: MatchType,
100        /// Whether this node is in the left tree.
101        is_left_tree: Option<bool>,
102    },
103}
104
105impl NodeInner {
106    /// Creates a new base node with the given content.
107    pub fn new_base(content: Option<XmlContent>) -> Self {
108        let id = next_node_id();
109        // Create a placeholder weak ref - will be set properly after node is wrapped in Rc
110        let weak_self = Weak::new();
111        let left = Rc::new(RefCell::new(MatchedNodes::new(weak_self.clone())));
112        let right = Rc::new(RefCell::new(MatchedNodes::new(weak_self)));
113
114        NodeInner {
115            id,
116            children: Vec::new(),
117            content,
118            parent: Weak::new(),
119            child_pos: -1,
120            match_area: None,
121            kind: NodeKind::Base { left, right },
122        }
123    }
124
125    /// Creates a new branch node with the given content.
126    pub fn new_branch(content: Option<XmlContent>) -> Self {
127        NodeInner {
128            id: next_node_id(),
129            children: Vec::new(),
130            content,
131            parent: Weak::new(),
132            child_pos: -1,
133            match_area: None,
134            kind: NodeKind::Branch {
135                base_match: Weak::new(),
136                partners: None,
137                match_type: MatchType::NONE,
138                is_left_tree: None,
139            },
140        }
141    }
142
143    /// Returns the unique ID of this node.
144    pub fn id(&self) -> u64 {
145        self.id
146    }
147
148    /// Returns true if this is a base node.
149    pub fn is_base(&self) -> bool {
150        matches!(self.kind, NodeKind::Base { .. })
151    }
152
153    /// Returns true if this is a branch node.
154    pub fn is_branch(&self) -> bool {
155        matches!(self.kind, NodeKind::Branch { .. })
156    }
157
158    /// Returns the content of this node.
159    pub fn content(&self) -> Option<&XmlContent> {
160        self.content.as_ref()
161    }
162
163    /// Returns a mutable reference to the content.
164    pub fn content_mut(&mut self) -> Option<&mut XmlContent> {
165        self.content.as_mut()
166    }
167
168    /// Sets the content of this node.
169    pub fn set_content(&mut self, content: Option<XmlContent>) {
170        self.content = content;
171    }
172
173    /// Returns the number of children.
174    pub fn child_count(&self) -> usize {
175        self.children.len()
176    }
177
178    /// Returns a reference to the child at the given index.
179    pub fn child(&self, index: usize) -> Option<&NodeRef> {
180        self.children.get(index)
181    }
182
183    /// Returns the children as a slice.
184    pub fn children(&self) -> &[NodeRef] {
185        &self.children
186    }
187
188    /// Returns a weak reference to the parent.
189    pub fn parent(&self) -> &Weak<RefCell<NodeInner>> {
190        &self.parent
191    }
192
193    /// Returns the child position (0-based index among siblings, -1 for root).
194    pub fn child_pos(&self) -> i32 {
195        self.child_pos
196    }
197
198    /// Returns the match area, if set.
199    pub fn match_area(&self) -> Option<&Rc<MatchArea>> {
200        self.match_area.as_ref()
201    }
202
203    /// Sets the match area.
204    pub fn set_match_area(&mut self, area: Option<Rc<MatchArea>>) {
205        self.match_area = area;
206    }
207
208    /// Returns the node kind.
209    pub fn kind(&self) -> &NodeKind {
210        &self.kind
211    }
212
213    /// Returns a mutable reference to the node kind.
214    pub fn kind_mut(&mut self) -> &mut NodeKind {
215        &mut self.kind
216    }
217
218    /// Returns true if this node has a left sibling.
219    pub fn has_left_sibling(&self) -> bool {
220        self.child_pos > 0
221    }
222
223    /// Returns true if this node has a right sibling.
224    pub fn has_right_sibling(&self) -> bool {
225        if let Some(parent) = self.parent.upgrade() {
226            let parent_borrowed = parent.borrow();
227            self.child_pos >= 0 && (self.child_pos as usize) < parent_borrowed.children.len() - 1
228        } else {
229            false
230        }
231    }
232
233    /// Returns the match type for branch nodes, None for base nodes.
234    pub fn match_type(&self) -> Option<MatchType> {
235        match &self.kind {
236            NodeKind::Branch { match_type, .. } => Some(*match_type),
237            NodeKind::Base { .. } => None,
238        }
239    }
240
241    /// Returns the base match for branch nodes (a weak reference to the matched base node).
242    /// Returns None for base nodes or if no match has been set.
243    pub fn get_base_match(&self) -> Option<&Weak<RefCell<NodeInner>>> {
244        match &self.kind {
245            NodeKind::Branch { base_match, .. } => {
246                // Return Some only if the weak ref is non-empty (can be upgraded)
247                if base_match.strong_count() > 0 {
248                    Some(base_match)
249                } else {
250                    None
251                }
252            }
253            NodeKind::Base { .. } => None,
254        }
255    }
256
257    /// Sets the base match for a branch node.
258    /// Does nothing for base nodes.
259    pub fn set_base_match(&mut self, base: &NodeRef) {
260        if let NodeKind::Branch { base_match, .. } = &mut self.kind {
261            *base_match = Rc::downgrade(base);
262        }
263    }
264
265    /// Sets the match type for a branch node.
266    /// Does nothing for base nodes.
267    pub fn set_match_type(&mut self, mt: MatchType) {
268        if let NodeKind::Branch { match_type, .. } = &mut self.kind {
269            *match_type = mt;
270        }
271    }
272
273    /// Clears the base match for a branch node.
274    pub fn del_base_match(&mut self) {
275        if let NodeKind::Branch { base_match, .. } = &mut self.kind {
276            *base_match = Weak::new();
277        }
278    }
279}
280
281/// Helper functions that work with NodeRef.
282impl NodeInner {
283    /// Adds a child node. Must be called on the NodeRef wrapper.
284    pub fn add_child_to_ref(parent_ref: &NodeRef, child_ref: NodeRef) {
285        {
286            let mut child = child_ref.borrow_mut();
287            child.parent = Rc::downgrade(parent_ref);
288            child.child_pos = parent_ref.borrow().children.len() as i32;
289        }
290        parent_ref.borrow_mut().children.push(child_ref);
291    }
292
293    /// Inserts a child at the given index.
294    pub fn add_child_at_to_ref(parent_ref: &NodeRef, index: usize, child_ref: NodeRef) {
295        {
296            let mut child = child_ref.borrow_mut();
297            child.parent = Rc::downgrade(parent_ref);
298            child.child_pos = index as i32;
299        }
300        {
301            let mut parent = parent_ref.borrow_mut();
302            parent.children.insert(index, child_ref);
303            // Update child positions for siblings after the insertion point
304            for i in (index + 1)..parent.children.len() {
305                parent.children[i].borrow_mut().child_pos = i as i32;
306            }
307        }
308    }
309
310    /// Replaces a child at the given index.
311    pub fn replace_child_to_ref(parent_ref: &NodeRef, index: usize, child_ref: NodeRef) {
312        let mut parent = parent_ref.borrow_mut();
313        if index < parent.children.len() {
314            parent.children[index] = child_ref;
315        }
316    }
317
318    /// Removes the child at the given index.
319    pub fn remove_child_to_ref(parent_ref: &NodeRef, index: usize) {
320        let mut parent = parent_ref.borrow_mut();
321        if index < parent.children.len() {
322            parent.children.remove(index);
323            // Update child positions for siblings after the removal point
324            for i in index..parent.children.len() {
325                parent.children[i].borrow_mut().child_pos = i as i32;
326            }
327        }
328    }
329
330    /// Removes all children.
331    pub fn remove_children_to_ref(parent_ref: &NodeRef) {
332        parent_ref.borrow_mut().children.clear();
333    }
334
335    /// Gets the left sibling of a node.
336    pub fn left_sibling_of_ref(node_ref: &NodeRef) -> Option<NodeRef> {
337        let node = node_ref.borrow();
338        if node.child_pos <= 0 {
339            return None;
340        }
341        if let Some(parent) = node.parent.upgrade() {
342            let parent_borrowed = parent.borrow();
343            let sibling_index = (node.child_pos - 1) as usize;
344            parent_borrowed.children.get(sibling_index).cloned()
345        } else {
346            None
347        }
348    }
349
350    /// Gets the right sibling of a node.
351    pub fn right_sibling_of_ref(node_ref: &NodeRef) -> Option<NodeRef> {
352        let node = node_ref.borrow();
353        if node.child_pos < 0 {
354            return None;
355        }
356        if let Some(parent) = node.parent.upgrade() {
357            let parent_borrowed = parent.borrow();
358            let sibling_index = (node.child_pos + 1) as usize;
359            if sibling_index < parent_borrowed.children.len() {
360                parent_borrowed.children.get(sibling_index).cloned()
361            } else {
362                None
363            }
364        } else {
365            None
366        }
367    }
368
369    /// Initializes the owner weak refs for a base node's MatchedNodes.
370    /// Must be called after the node is wrapped in Rc.
371    pub fn init_base_node_owner(node_ref: &NodeRef) {
372        let is_base = {
373            let node = node_ref.borrow();
374            matches!(node.kind, NodeKind::Base { .. })
375        };
376
377        if is_base {
378            let weak_self = Rc::downgrade(node_ref);
379            // We need to recreate the MatchedNodes with the proper owner
380            let new_left = Rc::new(RefCell::new(MatchedNodes::new(weak_self.clone())));
381            let new_right = Rc::new(RefCell::new(MatchedNodes::new(weak_self)));
382
383            let mut node = node_ref.borrow_mut();
384            if let NodeKind::Base { left, right } = &mut node.kind {
385                *left = new_left;
386                *right = new_right;
387            }
388        }
389    }
390}
391
392/// Creates a new base node wrapped in NodeRef with properly initialized owner refs.
393pub fn new_base_node(content: Option<XmlContent>) -> NodeRef {
394    let node_ref = new_node_ref(NodeInner::new_base(content));
395    NodeInner::init_base_node_owner(&node_ref);
396    node_ref
397}
398
399/// Creates a new branch node wrapped in NodeRef.
400pub fn new_branch_node(content: Option<XmlContent>) -> NodeRef {
401    new_node_ref(NodeInner::new_branch(content))
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use std::collections::HashMap;
408
409    #[test]
410    fn test_node_creation() {
411        let base = new_base_node(Some(XmlContent::Element(XmlElement::new(
412            "root".to_string(),
413            HashMap::new(),
414        ))));
415        assert!(base.borrow().is_base());
416        assert!(!base.borrow().is_branch());
417
418        let branch = new_branch_node(Some(XmlContent::Text(XmlText::new("hello"))));
419        assert!(!branch.borrow().is_base());
420        assert!(branch.borrow().is_branch());
421    }
422
423    #[test]
424    fn test_add_child() {
425        let parent = new_base_node(Some(XmlContent::Element(XmlElement::new(
426            "parent".to_string(),
427            HashMap::new(),
428        ))));
429        let child1 = new_base_node(Some(XmlContent::Element(XmlElement::new(
430            "child1".to_string(),
431            HashMap::new(),
432        ))));
433        let child2 = new_base_node(Some(XmlContent::Element(XmlElement::new(
434            "child2".to_string(),
435            HashMap::new(),
436        ))));
437
438        NodeInner::add_child_to_ref(&parent, child1.clone());
439        NodeInner::add_child_to_ref(&parent, child2.clone());
440
441        assert_eq!(parent.borrow().child_count(), 2);
442        assert_eq!(child1.borrow().child_pos(), 0);
443        assert_eq!(child2.borrow().child_pos(), 1);
444    }
445
446    #[test]
447    fn test_siblings() {
448        let parent = new_base_node(None);
449        let child1 = new_base_node(None);
450        let child2 = new_base_node(None);
451        let child3 = new_base_node(None);
452
453        NodeInner::add_child_to_ref(&parent, child1.clone());
454        NodeInner::add_child_to_ref(&parent, child2.clone());
455        NodeInner::add_child_to_ref(&parent, child3.clone());
456
457        // child1 has no left sibling, has right sibling
458        assert!(!child1.borrow().has_left_sibling());
459        assert!(child1.borrow().has_right_sibling());
460        assert!(NodeInner::left_sibling_of_ref(&child1).is_none());
461        assert!(NodeInner::right_sibling_of_ref(&child1).is_some());
462
463        // child2 has both siblings
464        assert!(child2.borrow().has_left_sibling());
465        assert!(child2.borrow().has_right_sibling());
466        let left = NodeInner::left_sibling_of_ref(&child2).unwrap();
467        let right = NodeInner::right_sibling_of_ref(&child2).unwrap();
468        assert_eq!(left.borrow().id(), child1.borrow().id());
469        assert_eq!(right.borrow().id(), child3.borrow().id());
470
471        // child3 has left sibling, no right sibling
472        assert!(child3.borrow().has_left_sibling());
473        assert!(!child3.borrow().has_right_sibling());
474    }
475
476    #[test]
477    fn test_remove_child() {
478        let parent = new_base_node(None);
479        let child1 = new_base_node(None);
480        let child2 = new_base_node(None);
481        let child3 = new_base_node(None);
482
483        NodeInner::add_child_to_ref(&parent, child1.clone());
484        NodeInner::add_child_to_ref(&parent, child2.clone());
485        NodeInner::add_child_to_ref(&parent, child3.clone());
486
487        // Remove middle child
488        NodeInner::remove_child_to_ref(&parent, 1);
489
490        assert_eq!(parent.borrow().child_count(), 2);
491        assert_eq!(child1.borrow().child_pos(), 0);
492        assert_eq!(child3.borrow().child_pos(), 1);
493    }
494
495    #[test]
496    fn test_insert_child() {
497        let parent = new_base_node(None);
498        let child1 = new_base_node(None);
499        let child2 = new_base_node(None);
500        let child3 = new_base_node(None);
501
502        NodeInner::add_child_to_ref(&parent, child1.clone());
503        NodeInner::add_child_to_ref(&parent, child3.clone());
504
505        // Insert child2 in the middle
506        NodeInner::add_child_at_to_ref(&parent, 1, child2.clone());
507
508        assert_eq!(parent.borrow().child_count(), 3);
509        assert_eq!(child1.borrow().child_pos(), 0);
510        assert_eq!(child2.borrow().child_pos(), 1);
511        assert_eq!(child3.borrow().child_pos(), 2);
512    }
513
514    #[test]
515    fn test_match_type_flags() {
516        assert_eq!(MatchType::FULL, MatchType::CONTENT | MatchType::CHILDREN);
517        assert!(MatchType::FULL.contains(MatchType::CONTENT));
518        assert!(MatchType::FULL.contains(MatchType::CHILDREN));
519        assert!(!MatchType::CONTENT.contains(MatchType::CHILDREN));
520    }
521
522    #[test]
523    fn test_unique_node_ids() {
524        let node1 = new_base_node(None);
525        let node2 = new_base_node(None);
526        let node3 = new_branch_node(None);
527
528        assert_ne!(node1.borrow().id(), node2.borrow().id());
529        assert_ne!(node2.borrow().id(), node3.borrow().id());
530        assert_ne!(node1.borrow().id(), node3.borrow().id());
531    }
532}