rpds_pathtree/
tree.rs

1// SPDX-FileCopyrightText: The rpds-pathtree authors
2// SPDX-License-Identifier: MPL-2.0
3
4use std::{borrow::Borrow, fmt, hash::Hash, marker::PhantomData, num::NonZeroUsize, sync::Arc};
5
6use derive_more::{Display, Error};
7
8use crate::{
9    new_hash_map, HalfEdge, HalfEdgeOwned, HalfEdgeTreeNode, HashMap, InnerNode, LeafNode, Node,
10    NodeValue, PathSegment, RootPath, SegmentedPath as _,
11};
12
13pub trait NewNodeId<T> {
14    fn new_node_id(&mut self) -> T;
15}
16
17/// Type system for [`PathTree`].
18pub trait PathTreeTypes: Clone + Default + fmt::Debug {
19    type NodeId: Clone + Copy + Eq + Hash + fmt::Debug + fmt::Display;
20    type NewNodeId: NewNodeId<Self::NodeId> + Clone + fmt::Debug;
21    type InnerValue: Clone + fmt::Debug;
22    type LeafValue: Clone + fmt::Debug;
23    type PathSegmentOwned: Clone + Eq + Hash + fmt::Debug + Borrow<Self::PathSegment>;
24    type PathSegment: PathSegment + ?Sized;
25    type RootPath: RootPath<Self::PathSegment> + ?Sized;
26
27    fn path_segment_to_owned(path_segment: &Self::PathSegment) -> Self::PathSegmentOwned;
28}
29
30/// A conflicting path from a parent to a child node.
31#[derive(Debug)]
32pub struct TreeNodeParentChildPathConflict<T>
33where
34    T: PathTreeTypes,
35{
36    pub parent_node: Arc<TreeNode<T>>,
37    pub child_path_segment: T::PathSegmentOwned,
38}
39
40#[derive(Debug, Display, Error)]
41pub enum InsertOrUpdateNodeValueError<T>
42where
43    T: PathTreeTypes,
44{
45    #[display("path conflict")]
46    PathConflict {
47        conflict: TreeNodeParentChildPathConflict<T>,
48        value: NodeValue<T>,
49    },
50    #[display("value type mismatch")]
51    ValueTypeMismatch { value: NodeValue<T> },
52}
53
54#[derive(Debug, Display, Error)]
55pub enum UpdateNodeValueError<T>
56where
57    T: PathTreeTypes,
58{
59    #[display("value type mismatch")]
60    ValueTypeMismatch { value: NodeValue<T> },
61}
62
63impl<T> From<UpdateNodeValueError<T>> for InsertOrUpdateNodeValueError<T>
64where
65    T: PathTreeTypes,
66{
67    fn from(from: UpdateNodeValueError<T>) -> Self {
68        let UpdateNodeValueError::ValueTypeMismatch { value } = from;
69        Self::ValueTypeMismatch { value }
70    }
71}
72
73#[derive(Debug, Clone)]
74pub struct NodeInsertedOrUpdated<T>
75where
76    T: PathTreeTypes,
77{
78    /// The new child node.
79    ///
80    /// The inserted or updated child node.
81    pub node: Arc<TreeNode<T>>,
82
83    /// The changes of the parent node.
84    ///
85    /// `None` if the node has no parent (i.e. is the root node of the tree)
86    /// or if the parent node has not been updated.
87    pub parent: Option<ParentNodeUpdated<T>>,
88}
89
90#[derive(Debug, Clone)]
91pub struct ParentNodeUpdated<T>
92where
93    T: PathTreeTypes,
94{
95    /// The new parent node.
96    pub node: Arc<TreeNode<T>>,
97
98    /// The removed subtree.
99    pub removed_subtree: Option<PathTree<T>>,
100}
101
102/// Return type when removing a node from the tree.
103#[derive(Debug, Clone)]
104pub struct SubtreeRemoved<T>
105where
106    T: PathTreeTypes,
107{
108    /// New parent node.
109    ///
110    /// Updated parent node of the removed node that remains in the tree.
111    pub parent_node: Arc<TreeNode<T>>,
112
113    /// Child path segment.
114    ///
115    /// Path segment between the parent node and its former child node,
116    /// which has become the root node of the removed subtree.
117    pub child_path_segment: T::PathSegmentOwned,
118
119    /// Removed subtree.
120    ///
121    /// A new tree built from the removed node and all its descendants.
122    pub removed_subtree: PathTree<T>,
123}
124
125/// Return type when inserting or replacing a subtree.
126#[derive(Debug, Clone)]
127pub struct SubtreeInsertedOrReplaced<T>
128where
129    T: PathTreeTypes,
130{
131    /// The root node of the inserted subtree.
132    pub child_node_id: T::NodeId,
133
134    /// New parent node.
135    ///
136    /// Updated parent node that contains the subtree as child.
137    pub parent: ParentNodeUpdated<T>,
138}
139
140impl<T> InsertOrUpdateNodeValueError<T>
141where
142    T: PathTreeTypes,
143{
144    pub fn into_value(self) -> NodeValue<T> {
145        match self {
146            Self::PathConflict { value, .. } | Self::ValueTypeMismatch { value } => value,
147        }
148    }
149}
150
151/// Cheaply clonable path tree structure.
152///
153/// Could be shared safely between multiple threads.
154#[derive(Debug, Clone)]
155pub struct PathTree<T>
156where
157    T: PathTreeTypes,
158{
159    root_node_id: T::NodeId,
160    nodes: HashMap<T::NodeId, Arc<TreeNode<T>>>,
161    new_node_id: T::NewNodeId,
162    _types: PhantomData<T>,
163}
164
165#[derive(Debug, Default)]
166struct TreeNodeParentChildContext<'a, T>
167where
168    T: PathTreeTypes,
169{
170    parent_node: Option<Arc<TreeNode<T>>>,
171    child_path_segment: Option<&'a T::PathSegment>,
172}
173
174#[derive(Debug, Clone, Copy, PartialEq, Eq)]
175pub enum MatchNodePath {
176    Full,
177    PartialOrFull,
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq)]
181pub enum NodePathMatched {
182    Full {
183        /// Number of path segments.
184        ///
185        /// Both the total and matched number of path segments are equals.
186        number_of_segments: usize,
187    },
188    Partial {
189        /// Number of matched path segments.
190        ///
191        /// Strictly less than the total number of path segments.
192        number_of_matched_segments: NonZeroUsize,
193    },
194}
195
196#[derive(Debug, Clone)]
197pub struct NodePathResolved<'a, T>
198where
199    T: PathTreeTypes,
200{
201    pub node: &'a Arc<TreeNode<T>>,
202    pub matched_path: NodePathMatched,
203}
204
205impl<T: PathTreeTypes> PathTree<T> {
206    /// Create a new path tree with the given root node.
207    #[must_use]
208    pub fn new(mut new_node_id: T::NewNodeId, root_node_value: NodeValue<T>) -> Self {
209        let root_node_id = new_node_id.new_node_id();
210        let root_node = TreeNode {
211            id: root_node_id,
212            parent: None,
213            node: Node::from_value_without_children(root_node_value),
214        };
215        let mut nodes = new_hash_map();
216        nodes.insert_mut(root_node_id, Arc::new(root_node));
217        Self {
218            root_node_id,
219            new_node_id,
220            nodes,
221            _types: PhantomData,
222        }
223    }
224
225    fn new_node_id(&mut self) -> T::NodeId {
226        self.new_node_id.new_node_id()
227    }
228
229    #[must_use]
230    pub const fn root_node_id(&self) -> T::NodeId {
231        self.root_node_id
232    }
233
234    #[must_use]
235    pub fn root_node(&self) -> &Arc<TreeNode<T>> {
236        self.get_node(self.root_node_id)
237    }
238
239    #[must_use]
240    pub fn lookup_node(&self, id: T::NodeId) -> Option<&Arc<TreeNode<T>>> {
241        self.nodes.get(&id)
242    }
243
244    /// Get an existing node by its id.
245    ///
246    /// Only used internally for node ids that must exist. If the node does not exist
247    /// the tree is probably in an inconsistent state!
248    ///
249    /// # Panics
250    ///
251    /// Panics if the node does not exist.
252    #[must_use]
253    fn get_node(&self, id: T::NodeId) -> &Arc<TreeNode<T>> {
254        self.nodes.get(&id).expect("node exists")
255    }
256
257    /// Find a node by its path.
258    #[must_use]
259    #[cfg_attr(debug_assertions, allow(clippy::missing_panics_doc))] // Never panics
260    pub fn find_node(&self, path: &T::RootPath) -> Option<&Arc<TreeNode<T>>> {
261        self.resolve_node_path(path, MatchNodePath::Full).map(
262            |NodePathResolved { node, matched_path }| {
263                debug_assert_eq!(
264                    matched_path,
265                    NodePathMatched::Full {
266                        number_of_segments: path.segments().count()
267                    }
268                );
269                node
270            },
271        )
272    }
273
274    #[must_use]
275    pub fn contains_node(&self, node: &Arc<TreeNode<T>>) -> bool {
276        self.lookup_node(node.id)
277            .is_some_and(|tree_node| Arc::ptr_eq(tree_node, node))
278    }
279
280    /// Find a node by its path.
281    ///
282    /// Returns the found node and the number of resolved path segments.
283    #[must_use]
284    #[cfg_attr(debug_assertions, allow(clippy::missing_panics_doc))] // Never panics
285    pub fn resolve_node_path<'a>(
286        &'a self,
287        path: &T::RootPath,
288        match_path: MatchNodePath,
289    ) -> Option<NodePathResolved<'a, T>> {
290        // TODO: Use a trie data structure and Aho-Corasick algo for faster lookup?
291        let root_node = self.get_node(self.root_node_id);
292        let mut last_visited_node = root_node;
293        let mut number_of_matched_path_segments = 0;
294        let mut partial_path_match = false;
295        for path_segment in path.segments() {
296            debug_assert!(!path_segment.is_empty());
297            match &last_visited_node.node {
298                Node::Leaf(_) => {
299                    // Path is too long, i.e. the remaining path segments could not be resolved.
300                    match match_path {
301                        MatchNodePath::Full => {
302                            return None;
303                        }
304                        MatchNodePath::PartialOrFull => {
305                            partial_path_match = true;
306                            break;
307                        }
308                    }
309                }
310                Node::Inner(inner_node) => {
311                    let child_node = inner_node
312                        .children
313                        .get(path_segment)
314                        .map(|node_id| self.get_node(*node_id));
315                    if let Some(child_node) = child_node {
316                        last_visited_node = child_node;
317                        number_of_matched_path_segments += 1;
318                    } else {
319                        // Path segment mismatch.
320                        match match_path {
321                            MatchNodePath::Full => {
322                                return None;
323                            }
324                            MatchNodePath::PartialOrFull => {
325                                partial_path_match = true;
326                                break;
327                            }
328                        }
329                    }
330                }
331            }
332            debug_assert_eq!(
333                path_segment,
334                last_visited_node
335                    .parent
336                    .as_ref()
337                    .expect("has parent")
338                    .path_segment
339                    .borrow()
340            );
341        }
342        let matched_path = if partial_path_match {
343            // At least 1 segment must match for a partial match.
344            let number_of_matched_segments = NonZeroUsize::new(number_of_matched_path_segments)?;
345            debug_assert!(number_of_matched_segments.get() < path.segments().count());
346            NodePathMatched::Partial {
347                number_of_matched_segments,
348            }
349        } else {
350            debug_assert_eq!(number_of_matched_path_segments, path.segments().count());
351            NodePathMatched::Full {
352                number_of_segments: number_of_matched_path_segments,
353            }
354        };
355        Some(NodePathResolved {
356            node: last_visited_node,
357            matched_path,
358        })
359    }
360
361    fn create_missing_ancestor_nodes<'a>(
362        &mut self,
363        child_path: &'a T::RootPath,
364        mut new_inner_value: impl FnMut() -> T::InnerValue,
365        try_clone_leaf_into_inner_value: impl FnOnce(&T::LeafValue) -> Option<T::InnerValue>,
366    ) -> Result<TreeNodeParentChildContext<'a, T>, TreeNodeParentChildPathConflict<T>> {
367        if child_path.is_root() {
368            return Ok(TreeNodeParentChildContext {
369                parent_node: None,
370                child_path_segment: None,
371            });
372        }
373        let mut try_clone_leaf_into_inner_value = Some(try_clone_leaf_into_inner_value);
374        let mut next_parent_node = Arc::clone(self.root_node());
375        let (parent_path_segments, child_path_segment) = child_path.parent_child_segments();
376        debug_assert!(child_path_segment.is_some());
377        for path_segment in parent_path_segments {
378            next_parent_node = match try_replace_leaf_with_inner_node(
379                &mut self.nodes,
380                next_parent_node,
381                &mut try_clone_leaf_into_inner_value,
382            ) {
383                Ok(next_parent_node) => next_parent_node,
384                Err(parent_node) => {
385                    return Err(TreeNodeParentChildPathConflict {
386                        parent_node,
387                        child_path_segment: T::path_segment_to_owned(path_segment),
388                    });
389                }
390            };
391            let Node::Inner(inner_node) = &next_parent_node.node else {
392                break;
393            };
394            let child_node = inner_node
395                .children
396                .get(path_segment)
397                .map(|node_id| self.get_node(*node_id));
398            if let Some(child_node) = child_node {
399                log::debug!("Found child node {child_node:?} for path segment {path_segment:?}");
400                next_parent_node = Arc::clone(child_node);
401            } else {
402                // Add new, empty inner node
403                let child_node_id = self.new_node_id();
404                debug_assert_ne!(child_node_id, next_parent_node.id);
405                let child_node = TreeNode {
406                    id: child_node_id,
407                    parent: Some(HalfEdgeOwned {
408                        path_segment: T::path_segment_to_owned(path_segment),
409                        node_id: next_parent_node.id,
410                    }),
411                    node: Node::Inner(InnerNode::new(new_inner_value())),
412                };
413                log::debug!(
414                    "Inserting new child node {child_node:?} for path segment {path_segment:?}"
415                );
416                let child_node = Arc::new(child_node);
417                let new_next_parent_node = Arc::clone(&child_node);
418                self.nodes.insert_mut(child_node.id, child_node);
419                let mut inner_node = inner_node.clone();
420                inner_node
421                    .children
422                    .insert_mut(T::path_segment_to_owned(path_segment), child_node_id);
423                // Replace the parent node with the modified one.
424                update_parent_node(
425                    &mut self.nodes,
426                    TreeNode {
427                        id: next_parent_node.id,
428                        parent: next_parent_node.parent.clone(),
429                        node: inner_node.into(),
430                    },
431                );
432                next_parent_node = new_next_parent_node;
433            }
434            debug_assert_eq!(
435                path_segment,
436                next_parent_node
437                    .parent
438                    .as_ref()
439                    .expect("has parent")
440                    .path_segment
441                    .borrow()
442            );
443        }
444        let next_parent_node = match try_replace_leaf_with_inner_node(
445            &mut self.nodes,
446            next_parent_node,
447            &mut try_clone_leaf_into_inner_value,
448        ) {
449            Ok(next_parent_node) => next_parent_node,
450            Err(parent_node) => {
451                return Err(TreeNodeParentChildPathConflict {
452                    parent_node,
453                    child_path_segment: child_path_segment
454                        .map(T::path_segment_to_owned)
455                        .expect("child path segment should exist"),
456                });
457            }
458        };
459        let parent_node = match next_parent_node.node {
460            Node::Inner(_) => Some(next_parent_node),
461            Node::Leaf(_) => None,
462        };
463        Ok(TreeNodeParentChildContext {
464            parent_node,
465            child_path_segment,
466        })
467    }
468
469    /// Insert or update a node in the tree.
470    ///
471    /// All missing parent nodes are created recursively and initialized
472    /// with the value returned by `new_inner_value`.
473    ///
474    /// If the parent node exists and is a leaf node then it is replaced
475    /// with an inner node by calling `try_clone_leaf_into_inner_value`.
476    ///
477    /// Returns the updated parent node and the inserted/updated child node.
478    /// The parent node is `None` if the root node has been updated.
479    ///
480    /// In case of an error, the new value is returned back to the caller.
481    #[allow(clippy::missing_panics_doc)] // Never panics
482    pub fn insert_or_update_node_value(
483        &mut self,
484        path: &T::RootPath,
485        new_value: NodeValue<T>,
486        new_inner_value: &mut impl FnMut() -> T::InnerValue,
487        try_clone_leaf_into_inner_value: impl FnOnce(&T::LeafValue) -> Option<T::InnerValue>,
488    ) -> Result<NodeInsertedOrUpdated<T>, InsertOrUpdateNodeValueError<T>> {
489        let TreeNodeParentChildContext {
490            parent_node,
491            child_path_segment,
492        } = match self.create_missing_ancestor_nodes(
493            path,
494            new_inner_value,
495            try_clone_leaf_into_inner_value,
496        ) {
497            Ok(context) => context,
498            Err(conflict) => {
499                return Err(InsertOrUpdateNodeValueError::PathConflict {
500                    conflict,
501                    value: new_value,
502                });
503            }
504        };
505        let Some(parent_node) = parent_node else {
506            // Update the root node.
507            let old_root_node = Arc::clone(self.root_node());
508            let new_root_node = self.update_node_value(&old_root_node, new_value)?;
509            return Ok(NodeInsertedOrUpdated {
510                node: new_root_node,
511                parent: None,
512            });
513        };
514        debug_assert!(matches!(parent_node.node, Node::Inner(_)));
515        let child_path_segment = child_path_segment.expect("should never be empty");
516        self.insert_or_update_child_node_value(&parent_node, child_path_segment, None, new_value)
517    }
518
519    /// Insert or update a child node in the tree.
520    ///
521    /// The parent node must exist and it must be an inner node.
522    ///
523    /// By providing `old_child_path_segment` an existing node could
524    /// be renamed and updated. This will retain its `NodeId`.
525    ///
526    /// Returns the updated parent node and the inserted/updated child node.
527    ///
528    /// In case of an error, the new value is returned back to the caller.
529    #[allow(clippy::missing_panics_doc)] // Never panics
530    #[allow(clippy::too_many_lines)] // TODO
531    pub fn insert_or_update_child_node_value(
532        &mut self,
533        parent_node: &Arc<TreeNode<T>>,
534        child_path_segment: &T::PathSegment,
535        old_child_path_segment: Option<&T::PathSegment>,
536        new_value: NodeValue<T>,
537    ) -> Result<NodeInsertedOrUpdated<T>, InsertOrUpdateNodeValueError<T>> {
538        debug_assert!(self.contains_node(parent_node));
539        debug_assert!(matches!(parent_node.node, Node::Inner(_)));
540        let Node::Inner(inner_node) = &parent_node.node else {
541            return Err(InsertOrUpdateNodeValueError::PathConflict {
542                conflict: TreeNodeParentChildPathConflict {
543                    parent_node: Arc::clone(parent_node),
544                    child_path_segment: T::path_segment_to_owned(child_path_segment),
545                },
546                value: new_value,
547            });
548        };
549        let old_child_path_segment = old_child_path_segment.unwrap_or(child_path_segment);
550        let (child_node, inner_node_and_removed_subtree) = if let Some(child_node) = inner_node
551            .children
552            .get(old_child_path_segment)
553            .map(|node_id| self.get_node(*node_id))
554        {
555            let child_node_id = child_node.id;
556            log::debug!("Updating value of existing child node {child_node_id}");
557            let old_child_node = Arc::clone(child_node);
558            if old_child_path_segment == child_path_segment {
559                // No renaming.
560                let new_child_node = self.update_node_value(&old_child_node, new_value)?;
561                (new_child_node, None)
562            } else {
563                let new_parent = HalfEdgeOwned {
564                    path_segment: T::path_segment_to_owned(child_path_segment),
565                    node_id: parent_node.id,
566                };
567                let updated_child_node =
568                    old_child_node.try_clone_with_parent_and_value(Some(new_parent), new_value)?;
569                let (mut inner_node, removed_subtree) = if let Some(subtree_root_node_id) =
570                    parent_node.node.find_child(child_path_segment)
571                {
572                    log::debug!("Removing child node {child_node_id} with subtree from {child_path_segment:?}");
573                    let removed_subtree = self.remove_subtree_by_id(subtree_root_node_id);
574                    debug_assert!(removed_subtree.is_some());
575                    let SubtreeRemoved {
576                        parent_node,
577                        child_path_segment: removed_child_path_segment,
578                        removed_subtree,
579                    } = removed_subtree.expect("subtree has been removed");
580                    debug_assert_eq!(removed_child_path_segment.borrow(), child_path_segment);
581                    let Node::Inner(inner_node) = &parent_node.node else {
582                        unreachable!();
583                    };
584                    (inner_node.clone(), Some(removed_subtree))
585                } else {
586                    // The (new) child path segment is not occupied by a node/subtree.
587                    (inner_node.clone(), None)
588                };
589                // Move the updated node to the new, empty location.
590                log::debug!("Moving child node {child_node_id} from {old_child_path_segment:?} to {child_path_segment:?}");
591                inner_node.children.remove_mut(old_child_path_segment);
592                debug_assert!(self.nodes.contains_key(&child_node_id));
593                let child_node_id = updated_child_node.id;
594                let new_child_node = Arc::new(updated_child_node);
595                self.nodes
596                    .insert_mut(child_node_id, Arc::clone(&new_child_node));
597                debug_assert!(!inner_node.children.contains_key(child_path_segment));
598                inner_node
599                    .children
600                    .insert_mut(T::path_segment_to_owned(child_path_segment), child_node_id);
601                (new_child_node, Some((inner_node, removed_subtree)))
602            }
603        } else {
604            let child_node_id = self.new_node_id();
605            log::debug!("Adding new child node {child_node_id}");
606            debug_assert!(!self.nodes.contains_key(&child_node_id));
607            let new_child_node = TreeNode {
608                id: child_node_id,
609                parent: Some(HalfEdgeOwned {
610                    path_segment: T::path_segment_to_owned(child_path_segment),
611                    node_id: parent_node.id,
612                }),
613                node: Node::from_value_without_children(new_value),
614            };
615            let child_node_id = new_child_node.id;
616            let new_child_node = Arc::new(new_child_node);
617            self.nodes
618                .insert_mut(child_node_id, Arc::clone(&new_child_node));
619            log::debug!(
620                "Inserted new child node {new_child_node:?}",
621                new_child_node = *new_child_node,
622            );
623            let mut inner_node = inner_node.clone();
624            if let Some(child_node_id_mut) = inner_node.children.get_mut(child_path_segment) {
625                *child_node_id_mut = child_node_id;
626            } else {
627                inner_node
628                    .children
629                    .insert_mut(T::path_segment_to_owned(child_path_segment), child_node_id);
630            }
631            (new_child_node, Some((inner_node, None)))
632        };
633        let parent = inner_node_and_removed_subtree.map(|(inner_node, removed_subtree)| {
634            let new_parent_node = update_parent_node(
635                &mut self.nodes,
636                TreeNode {
637                    id: parent_node.id,
638                    parent: parent_node.parent.clone(),
639                    node: Node::Inner(inner_node),
640                },
641            );
642            ParentNodeUpdated {
643                node: new_parent_node,
644                removed_subtree,
645            }
646        });
647        Ok(NodeInsertedOrUpdated {
648            node: child_node,
649            parent,
650        })
651    }
652
653    /// Update a node value in the tree.
654    ///
655    /// Inner nodes with children could only be updated with an inner value.
656    ///
657    /// Returns the updated node with the new value.
658    ///
659    /// In case of an error, the new value is returned back to the caller.
660    ///
661    /// Undefined behavior if the given node does not belong to the tree.
662    /// This precondition is only checked by debug assertions.
663    #[allow(clippy::missing_panics_doc)] // Never panics
664    pub fn update_node_value(
665        &mut self,
666        node: &Arc<TreeNode<T>>,
667        new_value: NodeValue<T>,
668    ) -> Result<Arc<TreeNode<T>>, UpdateNodeValueError<T>> {
669        debug_assert!(self.contains_node(node));
670        let new_node = Arc::new(node.try_clone_with_value(new_value)?);
671        self.nodes.insert_mut(node.id, Arc::clone(&new_node));
672        log::debug!("Updated node value: {node:?} -> {new_node:?}");
673        Ok(new_node)
674    }
675
676    /// Remove a node and its children from the tree.
677    ///
678    /// Removes and returns the entire subtree rooted at the given node.
679    ///
680    /// The root node cannot be removed and the tree remains unchanged.
681    ///
682    /// Returns the removed subtree or `None` if unchanged.
683    /// The node ids in the removed subtree remain unchanged.
684    #[allow(clippy::missing_panics_doc)] // Never panics
685    pub fn remove_subtree_by_id(&mut self, node_id: T::NodeId) -> Option<SubtreeRemoved<T>> {
686        if node_id == self.root_node_id {
687            // Cannot remove the root node.
688            return None;
689        }
690        let nodes_count_before = self.nodes_count();
691        let node = self.nodes.get(&node_id).map(Arc::clone)?;
692        let removed = self.nodes.remove_mut(&node_id);
693        debug_assert!(removed);
694        // The descendants of the removed node could still be collected,
695        // even though the tree is already incomplete.
696        let descendant_node_ids = node
697            .node
698            .descendants(self)
699            .map(
700                |HalfEdge {
701                     path_segment: _,
702                     node_id,
703                 }| node_id,
704            )
705            .collect::<Vec<_>>();
706        // Split off the nodes of the subtree from the remaining nodes.
707        let mut subtree_nodes: HashMap<_, _> = descendant_node_ids
708            .into_iter()
709            .filter_map(|node_id| {
710                let node = self.nodes.get(&node_id).map(Arc::clone)?;
711                let removed = self.nodes.remove_mut(&node_id);
712                debug_assert!(removed);
713                Some((node_id, node))
714            })
715            .collect();
716        // Disconnect the subtree from the parent node. The old parent node
717        // still references the root node of the removed subtree as a child.
718        let new_parent_node = {
719            debug_assert!(node.parent.is_some());
720            let HalfEdgeOwned {
721                path_segment: parent_path_segment,
722                node_id: parent_node_id,
723            } = node.parent.as_ref().expect("has parent");
724            let parent_node = self.nodes.get(parent_node_id).expect("has a parent");
725            debug_assert!(matches!(parent_node.node, Node::Inner(_)));
726            let Node::Inner(inner_node) = &parent_node.node else {
727                unreachable!();
728            };
729            let mut inner_node = inner_node.clone();
730            inner_node.children.remove_mut(parent_path_segment.borrow());
731            TreeNode {
732                id: parent_node.id,
733                parent: parent_node.parent.clone(),
734                node: Node::Inner(inner_node),
735            }
736        };
737        let new_parent_node = update_parent_node(&mut self.nodes, new_parent_node);
738        // The tree is now back in a consistent state and we can use the public API again.
739        let nodes_count_after = self.nodes_count();
740        debug_assert!(nodes_count_before >= nodes_count_after);
741        let removed_nodes_count = nodes_count_before.get() - nodes_count_after.get();
742        let TreeNode { id, parent, node } = Arc::unwrap_or_clone(node);
743        let parent = parent.expect("has a parent");
744        debug_assert_eq!(parent.node_id, new_parent_node.id);
745        let child_path_segment = parent.path_segment;
746        let subtree_root_node = Arc::new(TreeNode {
747            id,
748            parent: None,
749            node,
750        });
751        subtree_nodes.insert_mut(node_id, subtree_root_node);
752        let removed_subtree = Self {
753            root_node_id: node_id,
754            nodes: subtree_nodes,
755            new_node_id: self.new_node_id.clone(),
756            _types: PhantomData,
757        };
758        debug_assert_eq!(removed_nodes_count, removed_subtree.nodes_count().get());
759        Some(SubtreeRemoved {
760            parent_node: new_parent_node,
761            child_path_segment,
762            removed_subtree,
763        })
764    }
765
766    /// Insert a subtree.
767    ///
768    /// The root node of the subtree will replace an existing node.
769    /// The existing node must node have any children, otherwise the
770    /// insertion will fail.
771    ///
772    /// The inserted nodes from the subtree will be assigned new ids
773    /// that are generated by this tree.
774    ///
775    /// By providing `old_child_path_segment` an existing node could
776    /// be renamed and replaced by the subtree. This will retain its
777    /// `NodeId`.
778    #[allow(clippy::missing_panics_doc)] // Never panics
779    pub fn insert_or_replace_subtree(
780        &mut self,
781        parent_node: &Arc<TreeNode<T>>,
782        child_path_segment: &T::PathSegment,
783        old_child_path_segment: Option<&T::PathSegment>,
784        mut subtree: Self,
785    ) -> Result<SubtreeInsertedOrReplaced<T>, InsertOrUpdateNodeValueError<T>> {
786        debug_assert!(self.contains_node(parent_node));
787        // Initialized with the old node id, which will be replaced with the new node id
788        // after the root node of the subtree has been inserted/replaced.
789        let mut subtree_root_node_id = subtree.root_node_id();
790        let mut subtree_root_parent_updated = None;
791        {
792            let subtree_node_ids = std::iter::once(subtree.root_node_id())
793                .chain(subtree.root_node().node.descendants(&subtree).map(
794                    |HalfEdge {
795                         path_segment: _,
796                         node_id,
797                     }| node_id,
798                ))
799                .collect::<Vec<_>>();
800            let mut old_to_new_node_id =
801                std::collections::HashMap::<T::NodeId, T::NodeId>::with_capacity(
802                    subtree_node_ids.len(),
803                );
804            for old_node_id in subtree_node_ids {
805                let old_node = subtree
806                    .nodes
807                    .get(&old_node_id)
808                    .map(Arc::clone)
809                    .expect("node exists");
810                let removed = subtree.nodes.remove_mut(&old_node_id);
811                debug_assert!(removed);
812                // Ideally, the nodes in the subtree are not referenced in the outer
813                // context to avoid cloning them. For most use cases this assumption
814                // should be valid.
815                let TreeNode {
816                    id: _,
817                    parent,
818                    node,
819                } = Arc::unwrap_or_clone(old_node);
820                // TODO: This could be optimized when not reusing insert_or_update_child_node_value()
821                // and instead inserting or replacing the node directly.
822                let (parent_node, child_path_segment, old_child_path_segment) =
823                    if let Some(parent) = parent {
824                        debug_assert!(old_to_new_node_id.contains_key(&parent.node_id));
825                        let parent_node_id = old_to_new_node_id
826                            .get(&parent.node_id)
827                            .copied()
828                            .expect("parent node has already been inserted");
829                        let parent_node = self
830                            .nodes
831                            .get(&parent_node_id)
832                            .expect("parent node has already been inserted");
833                        (parent_node, parent.path_segment, None)
834                    } else {
835                        // Root node.
836                        debug_assert_eq!(old_node_id, subtree.root_node_id());
837                        (
838                            parent_node,
839                            T::path_segment_to_owned(child_path_segment),
840                            old_child_path_segment,
841                        )
842                    };
843                let node_value = match node {
844                    Node::Inner(inner) => NodeValue::Inner(inner.value),
845                    Node::Leaf(leaf) => NodeValue::Leaf(leaf.value),
846                };
847                let NodeInsertedOrUpdated {
848                    node: child_node,
849                    parent,
850                } = self
851                    .insert_or_update_child_node_value(
852                        &Arc::clone(parent_node),
853                        child_path_segment.borrow(),
854                        old_child_path_segment,
855                        node_value,
856                    )
857                    .inspect_err(|_| {
858                        // Insertion could only fail for the first node,
859                        // which is the root node of the subtree. This ensures
860                        // that `self` remains unchanged on error.
861                        debug_assert_eq!(old_node_id, subtree.root_node_id());
862                    })?;
863                let child_node_id = child_node.id;
864                if old_node_id == subtree.root_node_id() {
865                    // Subtree root node inserted/updated.
866                    subtree_root_node_id = child_node_id;
867                    subtree_root_parent_updated = parent;
868                }
869                debug_assert!(!old_to_new_node_id.contains_key(&old_node_id));
870                old_to_new_node_id.insert(old_node_id, child_node_id);
871            }
872        }
873        Ok(SubtreeInsertedOrReplaced {
874            child_node_id: subtree_root_node_id,
875            parent: subtree_root_parent_updated.expect("parent node has been updated"),
876        })
877    }
878
879    /// Retain only the nodes that match the given predicate.
880    ///
881    /// The root node is always retained and cannot be removed.
882    ///
883    /// Returns the number of nodes that have been removed.
884    #[allow(clippy::missing_panics_doc)] // Never panics
885    pub fn retain_nodes(&mut self, mut predicate: impl FnMut(&TreeNode<T>) -> bool) {
886        // TODO: Optimize by traversing the tree structure instead of iterating over
887        // all nodes in no particular order. If a subtree is removed then all its
888        // children don't need to be visited.
889        let mut node_ids_to_remove = Vec::new();
890        for node in self.nodes() {
891            if !predicate(node) && node.id != self.root_node_id() {
892                node_ids_to_remove.push(node.id);
893            }
894        }
895        // Remove the subtrees in reverse order of the depth of their root node.
896        node_ids_to_remove.sort_by(|lhs_id, rhs_id| {
897            let lhs_node = self.get_node(*lhs_id);
898            let rhs_node = self.get_node(*rhs_id);
899            let lhs_depth = self.ancestor_nodes_count(lhs_node);
900            let rhs_depth = self.ancestor_nodes_count(rhs_node);
901            lhs_depth.cmp(&rhs_depth)
902        });
903        for node_id in node_ids_to_remove {
904            self.remove_subtree_by_id(node_id);
905        }
906    }
907
908    /// All nodes in no particular order.
909    pub fn nodes(&self) -> impl ExactSizeIterator<Item = &Arc<TreeNode<T>>> {
910        self.nodes.values()
911    }
912
913    /// Total number of nodes in the tree.
914    ///
915    /// Executed in constant time, i.e. O(1). But only if not both
916    /// debug assertions and the feature "expensive-debug-assertions"
917    /// are enabled.
918    #[must_use]
919    pub fn nodes_count(&self) -> NonZeroUsize {
920        debug_assert!(!self.nodes.is_empty());
921        let nodes_count = self.nodes.size();
922        #[cfg(feature = "expensive-debug-assertions")]
923        {
924            // Verify invariants
925            debug_assert_eq!(
926                nodes_count,
927                1 + self.root_node().node.descendants_count(self)
928            );
929            debug_assert_eq!(nodes_count, self.nodes().count());
930        }
931        // SAFETY: A tree always contains at least a root node.
932        debug_assert!(nodes_count > 0);
933        #[allow(unsafe_code)]
934        unsafe {
935            NonZeroUsize::new_unchecked(nodes_count)
936        }
937    }
938
939    /// Iterator over all ancestor nodes of the given node.
940    ///
941    /// Returns the parent node and the respective path segment from the child node
942    /// in bottom-up order.
943    pub fn ancestor_nodes<'a>(
944        &'a self,
945        node: &'a Arc<TreeNode<T>>,
946    ) -> impl Iterator<Item = HalfEdgeTreeNode<'a, T>> + Clone {
947        AncestorTreeNodeIter::new(self, node)
948    }
949
950    /// The number of parent nodes of the given node up to the root node.
951    #[must_use]
952    pub fn ancestor_nodes_count(&self, node: &Arc<TreeNode<T>>) -> usize {
953        self.ancestor_nodes(node).count()
954    }
955
956    /// Returns an iterator over all descendants of this node
957    ///
958    /// Recursively traverses the subtree.
959    ///
960    /// The ordering of nodes is undefined and an implementation detail. Only parent
961    /// nodes are guaranteed to be visited before their children.
962    pub fn descendant_nodes<'a>(
963        &'a self,
964        node: &'a Arc<TreeNode<T>>,
965    ) -> impl Iterator<Item = HalfEdge<'a, T>> {
966        debug_assert!(self.contains_node(node));
967        node.node.descendants(self)
968    }
969
970    /// Number of child nodes of the given node (recursively).
971    #[must_use]
972    pub fn descendant_nodes_count(&self, node: &Arc<TreeNode<T>>) -> usize {
973        debug_assert!(self.contains_node(node));
974        node.node.descendants_count(self)
975    }
976}
977
978/// Immutable node in the tree.
979#[derive(Debug, Clone)]
980pub struct TreeNode<T: PathTreeTypes> {
981    /// Identifier for direct lookup.
982    pub id: T::NodeId,
983
984    /// Link to the parent node.
985    ///
986    /// Must be `None` for the root node and `Some` for all other nodes.
987    pub parent: Option<HalfEdgeOwned<T>>,
988
989    /// The actual content of this node.
990    pub node: Node<T>,
991}
992
993impl<T: PathTreeTypes> TreeNode<T> {
994    fn try_clone_with_value(
995        &self,
996        new_value: NodeValue<T>,
997    ) -> Result<Self, UpdateNodeValueError<T>> {
998        self.try_clone_with_parent_and_value(None, new_value)
999    }
1000
1001    /// Clones the node with a new parent and value.
1002    ///
1003    /// Leaf values could be replaced by both leaf and inner values.
1004    /// An inner value could only be replaced by a leaf value, if the
1005    /// node does not have any children.
1006    ///
1007    /// Fails if the type of the new value is incompatible with the
1008    /// current value type of the node, depending on its children.
1009    fn try_clone_with_parent_and_value(
1010        &self,
1011        new_parent: Option<HalfEdgeOwned<T>>,
1012        new_value: NodeValue<T>,
1013    ) -> Result<Self, UpdateNodeValueError<T>> {
1014        let new_node = match &self.node {
1015            Node::Inner(InnerNode { children, .. }) => {
1016                match new_value {
1017                    NodeValue::Inner(new_value) => {
1018                        // Remains an inner node with the current children and the new value.
1019                        Self {
1020                            id: self.id,
1021                            parent: new_parent.or_else(|| self.parent.clone()),
1022                            node: Node::Inner(InnerNode {
1023                                children: children.clone(),
1024                                value: new_value,
1025                            }),
1026                        }
1027                    }
1028                    new_value @ NodeValue::Leaf(_) => {
1029                        if !children.is_empty() {
1030                            return Err(UpdateNodeValueError::ValueTypeMismatch {
1031                                value: new_value,
1032                            });
1033                        }
1034                        Self {
1035                            id: self.id,
1036                            parent: new_parent.or_else(|| self.parent.clone()),
1037                            node: Node::from_value_without_children(new_value),
1038                        }
1039                    }
1040                }
1041            }
1042            Node::Leaf(_) => {
1043                // Leaf node values could be replaced by both leaf and inner node values.
1044                Self {
1045                    id: self.id,
1046                    parent: new_parent.or_else(|| self.parent.clone()),
1047                    node: Node::from_value_without_children(new_value),
1048                }
1049            }
1050        };
1051        debug_assert_eq!(self.id, new_node.id);
1052        debug_assert_eq!(self.node.children_count(), new_node.node.children_count());
1053        debug_assert!(self
1054            .node
1055            .children()
1056            .zip(new_node.node.children())
1057            .all(|(old, new)| old == new));
1058        Ok(new_node)
1059    }
1060}
1061
1062fn try_replace_leaf_with_inner_node<T: PathTreeTypes>(
1063    nodes: &mut HashMap<T::NodeId, Arc<TreeNode<T>>>,
1064    node: Arc<TreeNode<T>>,
1065    try_clone_leaf_into_inner_value: &mut Option<
1066        impl FnOnce(&T::LeafValue) -> Option<T::InnerValue>,
1067    >,
1068) -> Result<Arc<TreeNode<T>>, Arc<TreeNode<T>>> {
1069    let TreeNode {
1070        id,
1071        parent,
1072        node: Node::Leaf(LeafNode { value: leaf_value }),
1073    } = &*node
1074    else {
1075        return Ok(node);
1076    };
1077    let try_clone_leaf_into_inner_value = try_clone_leaf_into_inner_value
1078        .take()
1079        .expect("consumed at most once");
1080    let Some(inner_value) = try_clone_leaf_into_inner_value(leaf_value) else {
1081        // Keep this leaf node
1082        return Err(node);
1083    };
1084    // Replace leaf node with empty inner node
1085    let inner_node = TreeNode {
1086        id: *id,
1087        parent: parent.clone(),
1088        node: InnerNode::new(inner_value).into(),
1089    };
1090    log::debug!(
1091        "Replacing leaf node {leaf_node:?} with inner node {inner_node:?}",
1092        leaf_node = *node
1093    );
1094    let inner_node = Arc::new(inner_node);
1095    nodes.insert_mut(inner_node.id, Arc::clone(&inner_node));
1096    Ok(inner_node)
1097}
1098
1099/// Iterator over all ancestor nodes of the given node.
1100///
1101/// Returns the node and the respective path segment from the child node.
1102#[derive(Debug, Clone)]
1103pub struct AncestorTreeNodeIter<'a, T: PathTreeTypes> {
1104    tree: &'a PathTree<T>,
1105    next_node: Option<&'a Arc<TreeNode<T>>>,
1106}
1107
1108impl<'a, T: PathTreeTypes> AncestorTreeNodeIter<'a, T> {
1109    /// Create a new iterator over all ancestor nodes of the given node.
1110    ///
1111    /// The given node must exist in the tree. This is only checked in
1112    /// debug builds. Otherwise the iterator will be empty.
1113    #[must_use]
1114    pub fn new(tree: &'a PathTree<T>, node: &'a Arc<TreeNode<T>>) -> Self {
1115        debug_assert!(tree.contains_node(node));
1116        Self {
1117            tree,
1118            next_node: Some(node),
1119        }
1120    }
1121}
1122
1123impl<'a, T: PathTreeTypes> Iterator for AncestorTreeNodeIter<'a, T> {
1124    type Item = HalfEdgeTreeNode<'a, T>;
1125
1126    fn next(&mut self) -> Option<Self::Item> {
1127        let parent = self.next_node.as_ref()?.parent.as_ref()?;
1128        self.next_node = self.tree.lookup_node(parent.node_id);
1129        self.next_node.map(|node| HalfEdgeTreeNode {
1130            path_segment: parent.path_segment.borrow(),
1131            node,
1132        })
1133    }
1134}
1135
1136fn update_parent_node<T: PathTreeTypes>(
1137    nodes: &mut HashMap<T::NodeId, Arc<TreeNode<T>>>,
1138    parent_node: TreeNode<T>,
1139) -> Arc<TreeNode<T>> {
1140    debug_assert!(matches!(parent_node.node, Node::Inner(_)));
1141    let parent_node_id = parent_node.id;
1142    let new_parent_node = Arc::new(parent_node);
1143    debug_assert!(nodes.contains_key(&parent_node_id));
1144    nodes.insert_mut(parent_node_id, Arc::clone(&new_parent_node));
1145    log::debug!(
1146        "Updated parent node {new_parent_node:?}",
1147        new_parent_node = *new_parent_node,
1148    );
1149    new_parent_node
1150}