Skip to main content

litcheck_core/fs/
path_tree.rs

1use std::{
2    borrow::Cow,
3    path::{Path, PathBuf},
4};
5
6use either::Either::{self, Left, Right};
7
8/// Represents an entry in a [PathPrefixTree]
9pub struct Entry<'a, V> {
10    pub path: PathBuf,
11    pub data: &'a V,
12}
13impl<'a, V> Entry<'a, V> {
14    #[inline(always)]
15    pub fn into_path(self) -> PathBuf {
16        self.path
17    }
18
19    #[inline(always)]
20    pub fn into_value(self) -> &'a V {
21        self.data
22    }
23}
24impl<'a, V> AsRef<Path> for Entry<'a, V> {
25    #[inline(always)]
26    fn as_ref(&self) -> &Path {
27        self.path.as_path()
28    }
29}
30impl<'a, V> AsRef<V> for Entry<'a, V> {
31    #[inline(always)]
32    fn as_ref(&self) -> &V {
33        self.data
34    }
35}
36
37#[derive(Debug, thiserror::Error)]
38pub enum TryInsertError {
39    #[error("unable to insert path due to existing entry with conflicting file type")]
40    PathTypeConflict,
41    #[error("the path '{}' is unreachable: an ancestor of this path is a file", .0.display())]
42    Unreachable(PathBuf),
43}
44
45/// A [PathPrefixTree] is a tree data structure optimized for indexing/searching
46/// a directory hierarchy in various ways. In particular, it is efficient to ask:
47///
48/// * If a path is contained in the tree
49/// * The nearest ancestor contained in the tree for a given path
50///
51/// Furthermore, the structure of the tree is very compact, only requiring branching
52/// where multiple paths with the same prefix diverge. As a result, the depth of the
53/// tree is typically much less than the depth of the corresponding directory hierarchy,
54/// and is always bounded by the depth of the directory structure itself.
55///
56/// This data structure is lexicographically ordered, and can be reified as a flat set
57/// of paths, or traversed like a tree.
58pub struct PathPrefixTree<V> {
59    tree: PrefixTree,
60    /// Storage for the data associated with nodes in the tree
61    data: Vec<V>,
62}
63impl<V> Default for PathPrefixTree<V> {
64    fn default() -> Self {
65        Self {
66            tree: PrefixTree::default(),
67            data: vec![],
68        }
69    }
70}
71impl<V: Clone> Clone for PathPrefixTree<V> {
72    fn clone(&self) -> Self {
73        let tree = self.tree.clone();
74        let data = self.data.clone();
75        Self { tree, data }
76    }
77}
78impl<'a, V> FromIterator<&'a str> for PathPrefixTree<V>
79where
80    V: Default,
81{
82    fn from_iter<T>(iter: T) -> Self
83    where
84        T: IntoIterator<Item = &'a str>,
85    {
86        let mut tree = Self::default();
87        for path in iter {
88            tree.insert(path, V::default());
89        }
90        tree
91    }
92}
93impl<'a, V> FromIterator<&'a Path> for PathPrefixTree<V>
94where
95    V: Default,
96{
97    fn from_iter<T>(iter: T) -> Self
98    where
99        T: IntoIterator<Item = &'a Path>,
100    {
101        let mut tree = Self::default();
102        for path in iter {
103            tree.insert(path, V::default());
104        }
105        tree
106    }
107}
108impl<V, P> FromIterator<(P, V)> for PathPrefixTree<V>
109where
110    P: AsRef<Path>,
111{
112    fn from_iter<T>(iter: T) -> Self
113    where
114        T: IntoIterator<Item = (P, V)>,
115    {
116        let mut tree = Self::default();
117        for (path, value) in iter {
118            tree.insert(path.as_ref(), value);
119        }
120        tree
121    }
122}
123impl<V> PathPrefixTree<V> {
124    /// Get a new, empty [PathPrefixTree]
125    pub fn new() -> Self {
126        Self::default()
127    }
128
129    /// Insert `path` in this tree with the given value associated with the resulting node.
130    ///
131    /// If `path` is already contained in this tree (using the definition of containment
132    /// as described in `contains`), then this operation will replace the value for that node.
133    pub fn insert<P: AsRef<Path>>(&mut self, path: P, value: V) {
134        self.try_insert(path, value).expect("insert failed")
135    }
136
137    /// Try to insert `path` in this tree with the given value associated with the resulting node.
138    ///
139    /// If `path` is already contained in this tree (using the definition of containment
140    /// as described in `contains`), then this operation will replace the value for that node,
141    /// unless the file type for that node is in conflict, in which case an error will be returned.
142    pub fn try_insert<P: AsRef<Path>>(&mut self, path: P, value: V) -> Result<(), TryInsertError> {
143        let data = DataKey(self.data.len());
144        self.data.push(value);
145        self.tree.insert(path.as_ref(), data)
146    }
147
148    /// Reset this tree to its default, empty state.
149    pub fn clear(&mut self) {
150        self.tree.clear();
151        self.data.clear();
152    }
153
154    /// Returns true if `path` has been inserted in this tree.
155    pub fn contains<P: AsRef<Path>>(&self, path: P) -> bool {
156        self.tree.contains(path.as_ref())
157    }
158
159    /// Get a reference to the data associated with `path`, if present in the tree
160    pub fn get<P: AsRef<Path>>(&self, path: P) -> Option<&V> {
161        let key = self.tree.get(path.as_ref())?;
162        Some(&self.data[key.as_usize()])
163    }
164
165    /// Get a mutable reference to the data associated with `path`, if present in the tree
166    pub fn get_mut<P: AsRef<Path>>(&mut self, path: P) -> Option<&mut V> {
167        let key = self.tree.get(path.as_ref())?;
168        Some(&mut self.data[key.as_usize()])
169    }
170
171    /// Get the nearest entry in the tree which is an ancestor of `path`
172    pub fn nearest_ancestor<P: AsRef<Path>>(&self, path: P) -> Option<Entry<'_, V>> {
173        self.tree
174            .nearest_ancestor(path.as_ref())
175            .map(|(path, key)| Entry {
176                path,
177                data: &self.data[key.as_usize()],
178            })
179    }
180
181    /// Iterate over all of the entries inserted in this tree.
182    pub fn iter(&self) -> impl Iterator<Item = Entry<'_, V>> + '_ {
183        Dfs::new(self, /*only_leaves=*/ false, None)
184    }
185
186    /// Iterate over all of the entries inserted in this tree,
187    /// with a path that refers to a file.
188    ///
189    /// This is like `iter`, but does not emit entries for directories.
190    pub fn files(&self) -> impl Iterator<Item = Entry<'_, V>> + '_ {
191        Dfs::new(self, /*only_leaves=*/ true, None)
192    }
193
194    /// Iterate over all entries in the tree which are descendants of `path`.
195    ///
196    /// If `max_depth` is set, the search is restricted to entries with a path
197    /// containing no more than `max_depth` additional path components than
198    /// `path` itself.
199    ///
200    /// For example, with a tree containing `/foo/bar`, `/foo/bar/baz` and
201    /// `/foo/bar/baz/qux`, calling this function with `/foo/bar` and a max depth
202    /// of 2, will only return `/foo/bar/baz`, not `foo/bar/baz/qux`, since
203    /// the latter is 2 levels deeper in the hierarchy.
204    pub fn children<P: AsRef<Path>>(
205        &self,
206        path: P,
207        max_depth: Option<usize>,
208    ) -> impl Iterator<Item = Entry<'_, V>> + '_ {
209        Dfs::at(self, path.as_ref(), /*only_leaves=*/ false, max_depth)
210    }
211}
212
213#[derive(Debug, Copy, Clone, PartialEq, Eq)]
214enum PathType {
215    /// The path does not exist, and we've observed no descendant paths yet
216    Unknown,
217    /// The path is known to exist, and is not a directory
218    File,
219    /// The path is either known to exist and is a directory, or was previously
220    /// Unknown, but one or more descendant paths have been observed, which
221    /// dictates that this path must be a directory.
222    Dir,
223}
224
225/// This is the type-erased implementation backing [PathPrefixTree]
226#[derive(Default, Clone)]
227struct PrefixTree {
228    /// We permit multiple top-level root nodes in this tree,
229    /// making it actually a forest, but you could also think
230    /// of this field as representing children of a theoretical
231    /// "top" node that is the parent of all possible paths.
232    ///
233    /// On a Unix system, that would implicitly be `/`, but on
234    /// Windows there can be multiple prefixes, e.g. `C:\`, etc.
235    roots: Vec<Node>,
236}
237impl PrefixTree {
238    /// Insert `path` into this tree with `data`
239    pub fn insert(&mut self, path: &Path, data: DataKey) -> Result<(), TryInsertError> {
240        let (path, ty) = canonicalize_and_type(path);
241
242        // Try to find existing root to insert into
243        if let Some(sort) = self.try_insert_existing(&path, ty, data)? {
244            // Found root with common prefix
245            if sort {
246                self.roots.sort();
247            }
248            return Ok(());
249        }
250
251        self.insert_new(path.into_owned(), ty, data);
252
253        Ok(())
254    }
255
256    fn try_insert_existing(
257        &mut self,
258        path: &Path,
259        ty: PathType,
260        data: DataKey,
261    ) -> Result<Option<bool>, TryInsertError> {
262        for root in self.roots.iter_mut() {
263            if root.has_common_prefix(path) {
264                // If we modified `root` due to this insertion, require `roots` to be re-sorted
265                return insert_into_prefix(root, path, ty, data).map(|depth| Some(depth == 0));
266            }
267        }
268
269        Ok(None)
270    }
271
272    fn insert_new(&mut self, mut path: PathBuf, ty: PathType, data: DataKey) {
273        // Create new root node
274        let root = match ty {
275            PathType::Dir => Node::Branch {
276                component: path,
277                children: vec![],
278                data: Some(data),
279            },
280            ty @ (PathType::Unknown | PathType::File) => {
281                let mut components = path.components();
282                let file = PathBuf::from(components.next_back().unwrap().as_os_str());
283                let child = Node::Leaf {
284                    component: file,
285                    ty,
286                    data: Some(data),
287                };
288                path.pop();
289                Node::Branch {
290                    component: path,
291                    children: vec![child],
292                    data: None,
293                }
294            }
295        };
296
297        // Insert the new root node, and ensure the roots are ordered
298        self.roots.push(root);
299        self.roots.sort();
300    }
301
302    /// Reset this tree to its default, empty state.
303    pub fn clear(&mut self) {
304        self.roots.clear();
305    }
306
307    pub fn contains(&self, path: &Path) -> bool {
308        let (path, _) = canonicalize_and_type(path);
309        self.roots.iter().any(|root| root.contains(&path))
310    }
311
312    pub fn get(&self, path: &Path) -> Option<DataKey> {
313        let (path, _) = canonicalize_and_type(path);
314        self.roots.iter().find_map(|root| {
315            if root.has_common_prefix(&path) {
316                root.get(&path).and_then(|n| n.data())
317            } else {
318                None
319            }
320        })
321    }
322
323    pub fn nearest_ancestor(&self, path: &Path) -> Option<(PathBuf, DataKey)> {
324        let (path, _) = canonicalize_and_type(path);
325        self.roots
326            .iter()
327            .find_map(|root| root.nearest_ancestor(&path))
328    }
329}
330
331/// This is similar to `Path::canonicalize`, except it does a few things differently:
332///
333/// * If the given path cannot be canonicalized the "standard" way, we use a fallback
334///   method as described in the following points.
335/// * In the fallback canonicalization, the path type is always Unknown. If such a path
336///   is inserted in the tree, it may become Dir if descendant paths are inserted in the
337///   tree.
338/// * As a result of the above this does not try to determine if a file path is treated as a
339///   directory, e.g. `/foo/bar.txt/qux`. We assume that the user of the [PathPrefixTree] is
340///   either validating paths themselves, or is ok with invalid paths.
341/// * The fallback canonicalization only partially expands parent references (i.e. `..`). When
342///   these occur at the start of the path, they are preserved, not expanded. When they occur
343///   following some other type of path component, they cause that component to be dropped, and
344///   the `..` is considered resolved (and also dropped).
345fn canonicalize_and_type<'a>(path: &'a Path) -> (Cow<'a, Path>, PathType) {
346    use smallvec::SmallVec;
347    use std::path::Component;
348
349    const PATH_SEPARATOR_SIZE: usize = std::path::MAIN_SEPARATOR_STR.len();
350
351    if let Ok(path) = path.canonicalize() {
352        let path: Cow<'a, Path> = Cow::Owned(path);
353        let ty = match path.metadata() {
354            Err(_) => PathType::Unknown,
355            Ok(metadata) => {
356                if metadata.is_dir() {
357                    PathType::Dir
358                } else {
359                    PathType::File
360                }
361            }
362        };
363        return (path, ty);
364    }
365
366    let mut components = SmallVec::<[Component; 4]>::default();
367    let mut canonical = true;
368    let mut size_in_bytes = 0;
369    for component in path.components() {
370        match component {
371            component @ (Component::Normal(_) | Component::RootDir | Component::Prefix(_)) => {
372                size_in_bytes += component.as_os_str().len() + PATH_SEPARATOR_SIZE;
373                components.push(component);
374            }
375            // This only occurs when `.` is the first path component, so drop this
376            // component as we automatically assume relative paths are relative to
377            // the current working directory
378            Component::CurDir => {
379                canonical = false;
380            }
381            // This occurs when `..` is found anywhere in the path. We normalize
382            // these by dropping the preceding component when there is a preceding
383            // component, and when it is not `..`. If there are no preceding
384            // components, then `..` becomes the first component.
385            Component::ParentDir => match components.last() {
386                None | Some(Component::ParentDir) => {
387                    let component = Component::ParentDir;
388                    size_in_bytes += component.as_os_str().len() + PATH_SEPARATOR_SIZE;
389                    components.push(component);
390                }
391                Some(_) => {
392                    canonical = false;
393                    let component = unsafe { components.pop().unwrap_unchecked() };
394                    size_in_bytes -= component.as_os_str().len() + PATH_SEPARATOR_SIZE;
395                }
396            },
397        }
398    }
399
400    // If `path` is already canonical, return it unmodified
401    if canonical {
402        return (Cow::Borrowed(path), PathType::Unknown);
403    }
404
405    // Otherwise, we need to construct the canonical path
406    let mut path = PathBuf::with_capacity(size_in_bytes);
407    for component in components.into_iter() {
408        path.push(component);
409    }
410    (Cow::Owned(path), PathType::Unknown)
411}
412
413#[derive(Debug, Copy, Clone, PartialEq, Eq)]
414struct DataKey(usize);
415impl DataKey {
416    #[inline(always)]
417    const fn as_usize(self) -> usize {
418        self.0
419    }
420}
421
422#[derive(Debug, PartialEq, Eq)]
423pub enum Prefix {
424    /// The node path exactly equals the input path
425    Exact,
426    /// The input path is an ancestor of the node path.
427    Ancestor,
428    /// The input path is a child of the node path
429    Child,
430    /// The input path and node path diverge, and the
431    /// given path is the common prefix of the two.
432    Partial(PathBuf),
433}
434
435#[derive(Debug, Clone, PartialEq, Eq)]
436enum Node {
437    /// A leaf node is always a file
438    Leaf {
439        component: PathBuf,
440        ty: PathType,
441        data: Option<DataKey>,
442    },
443    /// A branch node is always a directory, and may have zero or more children
444    Branch {
445        component: PathBuf,
446        children: Vec<Node>,
447        data: Option<DataKey>,
448    },
449}
450impl Ord for Node {
451    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
452        self.component().cmp(other.component())
453    }
454}
455impl PartialOrd for Node {
456    #[inline]
457    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
458        Some(self.cmp(other))
459    }
460}
461impl Node {
462    /// Get the path component for this node
463    #[inline]
464    pub fn component(&self) -> &Path {
465        match self {
466            Self::Leaf { component, .. } | Self::Branch { component, .. } => component.as_path(),
467        }
468    }
469
470    /// Get the data key for this node, if applicable
471    #[inline]
472    pub fn data(&self) -> Option<DataKey> {
473        match self {
474            Self::Leaf { data, .. } | Self::Branch { data, .. } => *data,
475        }
476    }
477
478    /// Returns the common prefix with `path`.
479    ///
480    /// If `None`, there is no common prefix.
481    ///
482    /// If `Some`, see the docs for [PrefixMatch] for details on what the various
483    /// match types mean.
484    pub fn common_prefix(&self, path: &Path) -> Option<Prefix> {
485        assert_ne!(
486            path,
487            Path::new(""),
488            "invalid empty path given to common_prefix"
489        );
490
491        let mut ac = self.component().components();
492        let mut bc = path.components();
493        let mut common_prefix = PathBuf::new();
494        loop {
495            match (ac.next(), bc.next()) {
496                // We've reached the end of the components of `self`, so we've
497                // got at least some common prefix with `path`, go ahead and return
498                // it without further consideration.
499                (None, None) => break Some(Prefix::Exact),
500                (None, Some(_)) => break Some(Prefix::Child),
501                // The next component in both paths is common, so append it to
502                // our common prefix.
503                (Some(a), Some(b)) if a == b => {
504                    common_prefix.push(a);
505                }
506                // We diverge from `path` here.
507                (Some(_), Some(_)) => {
508                    if common_prefix == Path::new("") {
509                        break None;
510                    } else {
511                        break Some(Prefix::Partial(common_prefix));
512                    }
513                }
514                // We've reached the end of the components of `path` before `self`.
515                // This indicates that `path` is shallower than `self`, and thus `self`
516                // must be split to accomodate `path`.
517                (Some(_), None) => {
518                    break Some(Prefix::Ancestor);
519                }
520            }
521        }
522    }
523
524    /// Returns true if `self` has a common prefix with `path`.
525    pub fn has_common_prefix(&self, path: &Path) -> bool {
526        assert_ne!(
527            path,
528            Path::new(""),
529            "invalid empty path given to has_common_prefix"
530        );
531
532        let mut ac = self.component().components();
533        let mut bc = path.components();
534        let mut has_minimum_prefix = false;
535        loop {
536            match (ac.next(), bc.next()) {
537                (None, _) => break true,
538                (Some(a), Some(b)) if a == b => {
539                    has_minimum_prefix = true;
540                }
541                (Some(_), Some(_)) => break has_minimum_prefix,
542                (Some(_), None) => break true,
543            }
544        }
545    }
546
547    /// Returns true if `path` is explicitly represented under this node
548    ///
549    /// Explicit representation means there is a `Node` in the tree whose full path
550    /// is equal to `path`. Implicit representation is only applicable to directories,
551    /// and refers to when there is a `Node` in the tree for which `path` is a prefix,
552    /// but the full path for that node refers to an item deeper in the directory tree,
553    /// e.g. `/foo/bar` is implicitly represented by a node whose path is `/foo/bar/baz.txt`
554    pub fn contains(&self, mut path: &Path) -> bool {
555        let mut next = Some(self);
556        while let Some(node) = next.take() {
557            match node {
558                Self::Branch {
559                    component,
560                    children,
561                    data,
562                } => {
563                    if let Ok(rest) = path.strip_prefix(component) {
564                        if rest == Path::new("") {
565                            return data.is_some();
566                        }
567                        match children.iter().position(|c| c.has_common_prefix(rest)) {
568                            Some(index) => {
569                                path = rest;
570                                next = Some(&children[index]);
571                                continue;
572                            }
573                            None => {
574                                // the remainder of `path` is not covered by any children
575                                break;
576                            }
577                        }
578                    } else {
579                        // `component` may have a common prefix with `path`, but `path` is
580                        // not explicitly represented in the tree, so we must return false
581                        break;
582                    }
583                }
584                Self::Leaf {
585                    component, data, ..
586                } => {
587                    // `component` may have a common prefix with `path`, but unless `path`
588                    // is equal to `component`, it is not explicitly represented in the tree
589                    return path == component && data.is_some();
590                }
591            }
592        }
593
594        false
595    }
596
597    pub fn get<'a>(&'a self, mut path: &Path) -> Option<&'a Node> {
598        let mut next = Some(self);
599        while let Some(node) = next.take() {
600            match node {
601                Self::Branch {
602                    component,
603                    children,
604                    ..
605                } => {
606                    if let Ok(rest) = path.strip_prefix(component) {
607                        if rest == Path::new("") {
608                            return Some(node);
609                        }
610                        match children.iter().position(|c| c.has_common_prefix(rest)) {
611                            Some(index) => {
612                                path = rest;
613                                next = Some(&children[index]);
614                                continue;
615                            }
616                            None => {
617                                break;
618                            }
619                        }
620                    }
621                }
622                Self::Leaf { component, .. } => {
623                    if path == component {
624                        return Some(node);
625                    }
626                    break;
627                }
628            }
629        }
630
631        None
632    }
633
634    /// Return the path and data key associated with it, which is the nearest
635    /// ancestor of `path`, with data, represented under this node.
636    ///
637    /// For example, consider a tree in which `/foo/bar/baz/qux.txt` and
638    /// `/foo/bar/qux2.txt` are both inserted:
639    ///
640    /// * Given `/foo/bar/baz/other.txt` this function will select `/foo/bar/qux2.txt`, because
641    ///   `/foo/bar/baz/qux.txt` is a sibling, but not an ancestor.
642    /// * Given `/foo/bar/qux2.txt`, it will select None, because there is no ancestor explicitly
643    ///   represented in the tree for this path
644    /// * Given `/foo/bar/baz/qux.txt`, it will select `/foo/bar/qux2.txt`, because
645    ///   `/foo/bar/baz/qux.txt` is not an ancestor of itself, and its nearest ancestor is the
646    ///   selected path.
647    pub fn nearest_ancestor(&self, mut path: &Path) -> Option<(PathBuf, DataKey)> {
648        use smallvec::SmallVec;
649        if !self.has_common_prefix(path) {
650            return None;
651        }
652
653        let mut candidates = SmallVec::<[(PathBuf, DataKey); 2]>::default();
654        let mut ancestor = PathBuf::new();
655        let mut next = Some(self);
656        while let Some(node) = next.take() {
657            match node {
658                Self::Branch {
659                    component,
660                    children,
661                    data,
662                } => {
663                    if let Ok(rest) = path.strip_prefix(component) {
664                        // We found `path`, so take the most recent candidate,
665                        // if one is available. If not, then there are no nearest
666                        // ancestors for `path`
667                        if rest == Path::new("") {
668                            return candidates.pop();
669                        }
670
671                        // Otherwise, find the next child node to descend into.
672                        // If a suitable candidate is found, and the current node
673                        // has data associated with it, push the current path as
674                        // a candidate node, and check the child node to see if it
675                        // is closer.
676                        //
677                        // If no suitable candidate is found, but the current node
678                        // has data associated with it, then the current node is the
679                        // nearest ancestor, so we can just return it. If it has no
680                        // data, then the most recent candidate is returned, if available.
681                        ancestor.push(component);
682                        let child = children.iter().position(|c| c.has_common_prefix(rest));
683                        if let Some(index) = child {
684                            if let Some(data) = *data {
685                                candidates.push((ancestor.clone(), data));
686                            }
687
688                            path = rest;
689                            next = Some(&children[index]);
690                            continue;
691                        } else {
692                            return data
693                                .map(|data| (ancestor, data))
694                                .or_else(|| candidates.pop());
695                        }
696                    } else {
697                        // To insert `path` in the tree, we'd have to split `node`, which
698                        // means the most recent candidate is the nearest ancestor, if we
699                        // have one
700                        return candidates.pop();
701                    }
702                }
703                // Leaf nodes by definition cannot be the nearest ancestor, so use
704                // the most recent candidate, if we have one
705                Self::Leaf { .. } => return candidates.pop(),
706            }
707        }
708
709        None
710    }
711
712    fn take(&mut self, prefix: &Path) -> Self {
713        match self {
714            Node::Branch {
715                component,
716                children,
717                data,
718            } => {
719                let component = component.strip_prefix(prefix).unwrap().to_path_buf();
720                Node::Branch {
721                    component,
722                    children: core::mem::take(children),
723                    data: data.take(),
724                }
725            }
726            Node::Leaf {
727                component,
728                data,
729                ty,
730            } => {
731                let component = component.strip_prefix(prefix).unwrap().to_path_buf();
732                Node::Leaf {
733                    component,
734                    ty: *ty,
735                    data: data.take(),
736                }
737            }
738        }
739    }
740
741    fn split_at(&mut self, common_prefix: PathBuf) {
742        let split = self.take(&common_prefix);
743        *self = Node::Branch {
744            component: common_prefix,
745            children: vec![split],
746            data: None,
747        };
748    }
749
750    fn set_type(&mut self, ty: PathType) -> Result<(), TryInsertError> {
751        match self {
752            Self::Leaf {
753                component,
754                data,
755                ty: prev_ty,
756                ..
757            } => match ty {
758                PathType::Unknown | PathType::File => *prev_ty = ty,
759                PathType::Dir => {
760                    let component = core::mem::replace(component, PathBuf::new());
761                    let data = data.take();
762                    *self = Self::Branch {
763                        component,
764                        data,
765                        children: vec![],
766                    };
767                }
768            },
769            Self::Branch { .. } => match ty {
770                PathType::Dir => (),
771                PathType::Unknown | PathType::File => return Err(TryInsertError::PathTypeConflict),
772            },
773        }
774
775        Ok(())
776    }
777
778    fn set_data(&mut self, data: DataKey) {
779        match self {
780            Self::Branch {
781                data: prev_data, ..
782            }
783            | Self::Leaf {
784                data: prev_data, ..
785            } => {
786                *prev_data = Some(data);
787            }
788        }
789    }
790
791    fn push_child(
792        &mut self,
793        component: PathBuf,
794        ty: PathType,
795        data: Option<DataKey>,
796    ) -> Result<(), TryInsertError> {
797        let child = match ty {
798            PathType::File | PathType::Unknown => Self::Leaf {
799                component,
800                ty,
801                data,
802            },
803            PathType::Dir => Self::Branch {
804                component,
805                children: vec![],
806                data,
807            },
808        };
809        match self {
810            Self::Branch { children, .. } => {
811                children.push(child);
812                children.sort();
813            }
814            Self::Leaf {
815                component: parent_component,
816                data: parent_data,
817                ty: PathType::Unknown,
818            } => {
819                let children = vec![child];
820                let component = core::mem::replace(parent_component, PathBuf::new());
821                let data = parent_data.take();
822                *self = Self::Branch {
823                    component,
824                    children,
825                    data,
826                };
827            }
828            Self::Leaf { .. } => return Err(TryInsertError::PathTypeConflict),
829        }
830
831        Ok(())
832    }
833
834    fn try_insert_new<'a>(
835        &'a mut self,
836        path: &Path,
837        component: &Path,
838        ty: PathType,
839        data: Option<DataKey>,
840    ) -> Either<Result<(), TryInsertError>, &'a mut Node> {
841        match self {
842            Self::Branch { children, .. } => {
843                if let Some(index) = children.iter().position(|c| c.has_common_prefix(component)) {
844                    // We can't insert this as new, but the given index is a child we can try to insert into next
845                    return Right(&mut children[index]);
846                }
847                let child = match ty {
848                    PathType::File | PathType::Unknown => Self::Leaf {
849                        component: component.to_path_buf(),
850                        ty,
851                        data,
852                    },
853                    PathType::Dir => Self::Branch {
854                        component: component.to_path_buf(),
855                        children: vec![],
856                        data,
857                    },
858                };
859                children.push(child);
860                children.sort();
861
862                Left(Ok(()))
863            }
864            Self::Leaf {
865                ty: PathType::File, ..
866            } => Left(Err(TryInsertError::Unreachable(path.to_path_buf()))),
867            Self::Leaf {
868                component: parent_component,
869                data: parent_data,
870                ..
871            } => {
872                let child = match ty {
873                    PathType::File | PathType::Unknown => Self::Leaf {
874                        component: component.to_path_buf(),
875                        ty,
876                        data,
877                    },
878                    PathType::Dir => Self::Branch {
879                        component: component.to_path_buf(),
880                        children: vec![],
881                        data,
882                    },
883                };
884                let children = vec![child];
885                let component = core::mem::replace(parent_component, PathBuf::new());
886                let data = parent_data.take();
887                *self = Self::Branch {
888                    component,
889                    children,
890                    data,
891                };
892                Left(Ok(()))
893            }
894        }
895    }
896}
897
898/// Insert `path` into `node` which contains a prefix of `path`.
899///
900/// Returns the depth relative to `node` where `path` is inserted, a depth
901/// of zero indicates that `node` itself was modified.
902#[inline(never)]
903fn insert_into_prefix(
904    node: &mut Node,
905    mut path: &Path,
906    ty: PathType,
907    data: DataKey,
908) -> Result<usize, TryInsertError> {
909    let orig_path = path;
910    let mut next = Some((node, 0));
911
912    loop {
913        let (node, depth) = next.take().unwrap();
914
915        if let Some(prefix) = node.common_prefix(path) {
916            match prefix {
917                Prefix::Exact => {
918                    node.set_type(ty)?;
919                    node.set_data(data);
920                    break Ok(depth);
921                }
922                Prefix::Child => {
923                    path = path.strip_prefix(node.component()).unwrap();
924                    match node.try_insert_new(orig_path, path, ty, Some(data)) {
925                        Left(result) => {
926                            result?;
927                            break Ok(depth + 1);
928                        }
929                        Right(next_child) => {
930                            next = Some((next_child, depth + 1));
931                            continue;
932                        }
933                    }
934                }
935                Prefix::Ancestor => {
936                    node.split_at(path.to_path_buf());
937                    node.set_data(data);
938                    break Ok(depth);
939                }
940                Prefix::Partial(common_prefix) => {
941                    let component = path.strip_prefix(&common_prefix).unwrap().to_path_buf();
942                    node.split_at(common_prefix);
943                    node.push_child(component, ty, Some(data))?;
944                    break Ok(depth);
945                }
946            }
947        }
948    }
949}
950
951#[derive(Debug)]
952struct DfsVisitor<'a> {
953    worklist: &'a [Node],
954    prefix: PathBuf,
955    component_prefix: PathBuf,
956    queued: Option<(PathBuf, usize, &'a [Node])>,
957    depth: usize,
958    only_leaves: bool,
959}
960impl<'a> DfsVisitor<'a> {
961    fn new(prefix: PathBuf, worklist: &'a [Node], depth: usize, only_leaves: bool) -> Self {
962        Self {
963            worklist,
964            prefix,
965            component_prefix: PathBuf::new(),
966            queued: None,
967            depth,
968            only_leaves,
969        }
970    }
971
972    pub fn next(
973        &mut self,
974        max_depth: Option<usize>,
975    ) -> Option<Either<(PathBuf, DataKey), DfsVisitor<'a>>> {
976        if let Some((prefix, relative_depth, worklist)) = self.queued.take() {
977            return Some(Right(DfsVisitor::new(
978                prefix,
979                worklist,
980                relative_depth,
981                self.only_leaves,
982            )));
983        }
984
985        loop {
986            match self.worklist.split_first()? {
987                (Node::Leaf { data: None, .. }, worklist) => {
988                    self.worklist = worklist;
989                    continue;
990                }
991                (
992                    Node::Leaf {
993                        component,
994                        data: Some(data),
995                        ..
996                    },
997                    worklist,
998                ) => {
999                    self.worklist = worklist;
1000
1001                    if self.exceeds_max_depth(component, max_depth) {
1002                        continue;
1003                    }
1004                    let suffix = self.strip_prefix(component);
1005                    let prefix = self.prefix.join(suffix);
1006                    break Some(Left((prefix, *data)));
1007                }
1008                (
1009                    Node::Branch {
1010                        component,
1011                        children,
1012                        data,
1013                    },
1014                    worklist,
1015                ) => {
1016                    self.worklist = worklist;
1017
1018                    let relative_depth = self.relative_depth(component, max_depth);
1019                    if let Some(max_depth) = max_depth
1020                        && relative_depth > max_depth
1021                    {
1022                        continue;
1023                    }
1024                    let suffix = self.strip_prefix(component);
1025                    let prefix = self.prefix.join(suffix);
1026                    if !children.is_empty() {
1027                        match data {
1028                            // We have to emit the branch node first, so save the descent
1029                            // information for the next time `next` is called
1030                            Some(data) => {
1031                                self.queued =
1032                                    Some((prefix.clone(), relative_depth, children.as_slice()));
1033                                break Some(Left((prefix, *data)));
1034                            }
1035                            // We don't need to emit the branch node, so return a new visitor
1036                            // to start descending into the children
1037                            None => {
1038                                break Some(Right(DfsVisitor::new(
1039                                    prefix,
1040                                    children.as_slice(),
1041                                    relative_depth,
1042                                    self.only_leaves,
1043                                )));
1044                            }
1045                        }
1046                    }
1047                }
1048            }
1049        }
1050    }
1051
1052    fn exceeds_max_depth(&self, path: &Path, max_depth: Option<usize>) -> bool {
1053        match max_depth {
1054            None => false,
1055            Some(max_depth) => {
1056                let suffix = self.strip_prefix(path);
1057                let relative_depth = self.depth + suffix.components().count();
1058                relative_depth > max_depth
1059            }
1060        }
1061    }
1062
1063    fn relative_depth(&self, path: &Path, max_depth: Option<usize>) -> usize {
1064        match max_depth {
1065            // If we don't care about the depth, do nothing
1066            None => 0,
1067            Some(_) => {
1068                let suffix = self.strip_prefix(path);
1069                self.depth + suffix.components().count()
1070            }
1071        }
1072    }
1073
1074    #[inline]
1075    fn strip_prefix<'p>(&self, path: &'p Path) -> &'p Path {
1076        if self.component_prefix == Path::new("") {
1077            return path;
1078        }
1079        path.strip_prefix(&self.component_prefix).unwrap()
1080    }
1081}
1082
1083struct Dfs<'a, V> {
1084    data: &'a [V],
1085    stack: Vec<DfsVisitor<'a>>,
1086    current: DfsVisitor<'a>,
1087    max_depth: Option<usize>,
1088}
1089impl<V> core::fmt::Debug for Dfs<'_, V> {
1090    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1091        f.debug_struct("Dfs")
1092            .field("stack", &self.stack)
1093            .field("current", &self.current)
1094            .field("max_depth", &self.max_depth)
1095            .finish()
1096    }
1097}
1098impl<'a, V> Dfs<'a, V> {
1099    fn new(tree: &'a PathPrefixTree<V>, only_leaves: bool, max_depth: Option<usize>) -> Self {
1100        Self {
1101            data: tree.data.as_slice(),
1102            stack: vec![],
1103            current: DfsVisitor::new(PathBuf::new(), tree.tree.roots.as_slice(), 0, only_leaves),
1104            max_depth,
1105        }
1106    }
1107
1108    fn at(
1109        tree: &'a PathPrefixTree<V>,
1110        path: &Path,
1111        only_leaves: bool,
1112        max_depth: Option<usize>,
1113    ) -> Self {
1114        // Traverse tree until we find our initial working set
1115        match tree
1116            .tree
1117            .roots
1118            .iter()
1119            .find(|root| root.has_common_prefix(path))
1120        {
1121            None => Self::empty(tree, only_leaves, max_depth),
1122            Some(root) => {
1123                let mut next = Some(root);
1124                let mut input_path = path;
1125                let mut prefix = PathBuf::new();
1126                loop {
1127                    let node = next.take().unwrap();
1128                    match node.common_prefix(input_path) {
1129                        // No children for `path` in this tree
1130                        None => break Self::empty(tree, only_leaves, max_depth),
1131                        Some(Prefix::Exact) => {
1132                            break Self {
1133                                data: tree.data.as_slice(),
1134                                stack: vec![],
1135                                current: DfsVisitor::new(
1136                                    prefix,
1137                                    core::slice::from_ref(node),
1138                                    0,
1139                                    only_leaves,
1140                                ),
1141                                max_depth,
1142                            };
1143                        }
1144                        // This node is the first child, as long as it is not deeper than `max_depth`
1145                        Some(Prefix::Ancestor) => {
1146                            prefix.push(input_path);
1147                            break Self {
1148                                data: tree.data.as_slice(),
1149                                stack: vec![],
1150                                current: DfsVisitor {
1151                                    component_prefix: PathBuf::from(input_path),
1152                                    ..DfsVisitor::new(
1153                                        prefix,
1154                                        core::slice::from_ref(node),
1155                                        0,
1156                                        only_leaves,
1157                                    )
1158                                },
1159                                max_depth,
1160                            };
1161                        }
1162                        // We need to recurse deeper into the tree to find potential children
1163                        Some(Prefix::Child) => {
1164                            match node {
1165                                Node::Branch {
1166                                    component,
1167                                    children,
1168                                    ..
1169                                } => {
1170                                    input_path = input_path.strip_prefix(component).unwrap();
1171                                    next =
1172                                        children.iter().find(|c| c.has_common_prefix(input_path));
1173                                    if next.is_none() {
1174                                        break Self::empty(tree, only_leaves, max_depth);
1175                                    }
1176                                    prefix.push(component);
1177                                }
1178                                // There are no children for `path`, so return an empty iterator
1179                                Node::Leaf { .. } => {
1180                                    break Self::empty(tree, only_leaves, max_depth);
1181                                }
1182                            }
1183                        }
1184                        // There can be no children of `path` under `node`
1185                        Some(Prefix::Partial(_)) => {
1186                            break Self::empty(tree, only_leaves, max_depth);
1187                        }
1188                    }
1189                }
1190            }
1191        }
1192    }
1193
1194    #[inline]
1195    fn empty(tree: &'a PathPrefixTree<V>, only_leaves: bool, max_depth: Option<usize>) -> Self {
1196        Self {
1197            data: tree.data.as_slice(),
1198            stack: vec![],
1199            current: DfsVisitor::new(PathBuf::new(), &[], 0, only_leaves),
1200            max_depth,
1201        }
1202    }
1203}
1204impl<'a, V> Iterator for Dfs<'a, V> {
1205    type Item = Entry<'a, V>;
1206
1207    fn next(&mut self) -> Option<Self::Item> {
1208        loop {
1209            match self.current.next(self.max_depth) {
1210                // No more nodes at this depth
1211                None => {
1212                    // Try to resume iteration at the next level up, else we're done
1213                    self.current = self.stack.pop()?;
1214                }
1215                // Visitor produced the next element at this depth
1216                Some(Left((path, key))) => {
1217                    return Some(Entry {
1218                        path,
1219                        data: &self.data[key.as_usize()],
1220                    });
1221                }
1222                // Visitor has indicated we should suspend iteration at this
1223                // depth and descend into a child node first
1224                Some(Right(visitor)) => {
1225                    let suspended = core::mem::replace(&mut self.current, visitor);
1226                    self.stack.push(suspended);
1227                }
1228            }
1229        }
1230    }
1231}
1232
1233#[cfg(test)]
1234mod tests {
1235    use pretty_assertions::assert_eq;
1236
1237    use super::*;
1238
1239    #[test]
1240    fn path_tree_insert() {
1241        // Tests:
1242        // * insertion in a clean tree
1243        // * insertion under an existing node
1244        // * insertion in a dirty tree
1245        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1246
1247        let child = Node::Leaf {
1248            component: PathBuf::from("baz"),
1249            ty: PathType::Unknown,
1250            data: Some(DataKey(1)),
1251        };
1252        let a = Node::Branch {
1253            component: PathBuf::from("foo"),
1254            children: vec![Node::Branch {
1255                component: PathBuf::from("bar"),
1256                children: vec![child],
1257                data: Some(DataKey(0)),
1258            }],
1259            data: None,
1260        };
1261        let b = Node::Leaf {
1262            component: PathBuf::from("qux"),
1263            ty: PathType::Unknown,
1264            data: Some(DataKey(2)),
1265        };
1266        let root = Node::Branch {
1267            component: PathBuf::from("/"),
1268            children: vec![a, b],
1269            data: None,
1270        };
1271
1272        assert_eq!(tree.tree.roots.as_slice(), &[root]);
1273    }
1274
1275    #[test]
1276    fn path_tree_nearest_ancestor() {
1277        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1278
1279        assert_eq!(
1280            tree.nearest_ancestor("/foo/bar/baz").map(Entry::into_path),
1281            Some(PathBuf::from("/foo/bar"))
1282        );
1283        assert_eq!(
1284            tree.nearest_ancestor("/foo/bar").map(Entry::into_path),
1285            None
1286        );
1287        assert_eq!(tree.nearest_ancestor("/qux").map(Entry::into_path), None);
1288    }
1289
1290    #[test]
1291    fn path_tree_contains() {
1292        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1293
1294        assert!(tree.contains("/foo/bar/baz"));
1295        assert!(tree.contains("/foo/bar"));
1296        assert!(!tree.contains("/foo"));
1297        assert!(!tree.contains("/foo/bar/baz/thing.txt"));
1298    }
1299
1300    #[test]
1301    fn path_tree_get() {
1302        let mut tree = PathPrefixTree::default();
1303        tree.insert("/foo/bar/baz", 1usize);
1304        tree.insert("/foo/bar/baz/qux", 2usize);
1305
1306        assert_eq!(tree.get("/foo/bar/baz/qux").copied(), Some(2));
1307        assert_eq!(tree.get("/foo/bar/baz").copied(), Some(1));
1308    }
1309
1310    #[test]
1311    fn path_tree_iter() {
1312        let tree = PathPrefixTree::<()>::from_iter(["/qux", "/foo/bar/baz", "/foo/bar"]);
1313
1314        let paths = tree.iter().map(|e| e.into_path()).collect::<Vec<_>>();
1315
1316        let expected = vec![
1317            PathBuf::from("/foo/bar"),
1318            PathBuf::from("/foo/bar/baz"),
1319            PathBuf::from("/qux"),
1320        ];
1321
1322        assert_eq!(paths, expected);
1323    }
1324
1325    #[test]
1326    fn path_tree_children() {
1327        let tree = PathPrefixTree::<()>::from_iter(["/qux", "/foo/bar/baz", "/foo/bar"]);
1328
1329        let paths = tree
1330            .children("/foo/bar", None)
1331            .map(|e| e.into_path())
1332            .collect::<Vec<_>>();
1333
1334        let expected = vec![PathBuf::from("/foo/bar"), PathBuf::from("/foo/bar/baz")];
1335
1336        assert_eq!(paths, expected);
1337
1338        let paths = tree
1339            .children("/foo", None)
1340            .map(|e| e.into_path())
1341            .collect::<Vec<_>>();
1342
1343        assert_eq!(paths, expected);
1344    }
1345}