litcheck_core/fs/
path_tree.rs

1use std::{
2    borrow::Cow,
3    path::{Path, PathBuf},
4};
5
6use either::Either::{self, Left, Right};
7
8/// Represents an entry in a [PathPrefixTree]
9pub struct Entry<'a, V> {
10    pub path: PathBuf,
11    pub data: &'a V,
12}
13impl<'a, V> Entry<'a, V> {
14    #[inline(always)]
15    pub fn into_path(self) -> PathBuf {
16        self.path
17    }
18
19    #[inline(always)]
20    pub fn into_value(self) -> &'a V {
21        self.data
22    }
23}
24impl<'a, V> AsRef<Path> for Entry<'a, V> {
25    #[inline(always)]
26    fn as_ref(&self) -> &Path {
27        self.path.as_path()
28    }
29}
30impl<'a, V> AsRef<V> for Entry<'a, V> {
31    #[inline(always)]
32    fn as_ref(&self) -> &V {
33        self.data
34    }
35}
36
37#[derive(Debug, thiserror::Error)]
38pub enum TryInsertError {
39    #[error("unable to insert path due to existing entry with conflicting file type")]
40    PathTypeConflict,
41    #[error("the path '{}' is unreachable: an ancestor of this path is a file", .0.display())]
42    Unreachable(PathBuf),
43}
44
45/// A [PathPrefixTree] is a tree data structure optimized for indexing/searching
46/// a directory hierarchy in various ways. In particular, it is efficient to ask:
47///
48/// * If a path is contained in the tree
49/// * The nearest ancestor contained in the tree for a given path
50///
51/// Furthermore, the structure of the tree is very compact, only requiring branching
52/// where multiple paths with the same prefix diverge. As a result, the depth of the
53/// tree is typically much less than the depth of the corresponding directory hierarchy,
54/// and is always bounded by the depth of the directory structure itself.
55///
56/// This data structure is lexicographically ordered, and can be reified as a flat set
57/// of paths, or traversed like a tree.
58pub struct PathPrefixTree<V> {
59    tree: PrefixTree,
60    /// Storage for the data associated with nodes in the tree
61    data: Vec<V>,
62}
63impl<V> Default for PathPrefixTree<V> {
64    fn default() -> Self {
65        Self {
66            tree: PrefixTree::default(),
67            data: vec![],
68        }
69    }
70}
71impl<V: Clone> Clone for PathPrefixTree<V> {
72    fn clone(&self) -> Self {
73        let tree = self.tree.clone();
74        let data = self.data.clone();
75        Self { tree, data }
76    }
77}
78impl<'a, V> FromIterator<&'a str> for PathPrefixTree<V>
79where
80    V: Default,
81{
82    fn from_iter<T>(iter: T) -> Self
83    where
84        T: IntoIterator<Item = &'a str>,
85    {
86        let mut tree = Self::default();
87        for path in iter {
88            tree.insert(path, V::default());
89        }
90        tree
91    }
92}
93impl<'a, V> FromIterator<&'a Path> for PathPrefixTree<V>
94where
95    V: Default,
96{
97    fn from_iter<T>(iter: T) -> Self
98    where
99        T: IntoIterator<Item = &'a Path>,
100    {
101        let mut tree = Self::default();
102        for path in iter {
103            tree.insert(path, V::default());
104        }
105        tree
106    }
107}
108impl<V, P> FromIterator<(P, V)> for PathPrefixTree<V>
109where
110    P: AsRef<Path>,
111{
112    fn from_iter<T>(iter: T) -> Self
113    where
114        T: IntoIterator<Item = (P, V)>,
115    {
116        let mut tree = Self::default();
117        for (path, value) in iter {
118            tree.insert(path.as_ref(), value);
119        }
120        tree
121    }
122}
123impl<V> PathPrefixTree<V> {
124    /// Get a new, empty [PathPrefixTree]
125    pub fn new() -> Self {
126        Self::default()
127    }
128
129    /// Insert `path` in this tree with the given value associated with the resulting node.
130    ///
131    /// If `path` is already contained in this tree (using the definition of containment
132    /// as described in `contains`), then this operation will replace the value for that node.
133    pub fn insert<P: AsRef<Path>>(&mut self, path: P, value: V) {
134        self.try_insert(path, value).expect("insert failed")
135    }
136
137    /// Try to insert `path` in this tree with the given value associated with the resulting node.
138    ///
139    /// If `path` is already contained in this tree (using the definition of containment
140    /// as described in `contains`), then this operation will replace the value for that node,
141    /// unless the file type for that node is in conflict, in which case an error will be returned.
142    pub fn try_insert<P: AsRef<Path>>(&mut self, path: P, value: V) -> Result<(), TryInsertError> {
143        let data = DataKey(self.data.len());
144        self.data.push(value);
145        self.tree.insert(path.as_ref(), data)
146    }
147
148    /// Reset this tree to its default, empty state.
149    pub fn clear(&mut self) {
150        self.tree.clear();
151        self.data.clear();
152    }
153
154    /// Returns true if `path` has been inserted in this tree.
155    pub fn contains<P: AsRef<Path>>(&self, path: P) -> bool {
156        self.tree.contains(path.as_ref())
157    }
158
159    /// Get a reference to the data associated with `path`, if present in the tree
160    pub fn get<P: AsRef<Path>>(&self, path: P) -> Option<&V> {
161        let key = self.tree.get(path.as_ref())?;
162        Some(&self.data[key.as_usize()])
163    }
164
165    /// Get a mutable reference to the data associated with `path`, if present in the tree
166    pub fn get_mut<P: AsRef<Path>>(&mut self, path: P) -> Option<&mut V> {
167        let key = self.tree.get(path.as_ref())?;
168        Some(&mut self.data[key.as_usize()])
169    }
170
171    /// Get the nearest entry in the tree which is an ancestor of `path`
172    pub fn nearest_ancestor<P: AsRef<Path>>(&self, path: P) -> Option<Entry<'_, V>> {
173        self.tree
174            .nearest_ancestor(path.as_ref())
175            .map(|(path, key)| Entry {
176                path,
177                data: &self.data[key.as_usize()],
178            })
179    }
180
181    /// Iterate over all of the entries inserted in this tree.
182    pub fn iter(&self) -> impl Iterator<Item = Entry<'_, V>> + '_ {
183        Dfs::new(self, /*only_leaves=*/ false, None)
184    }
185
186    /// Iterate over all of the entries inserted in this tree,
187    /// with a path that refers to a file.
188    ///
189    /// This is like `iter`, but does not emit entries for directories.
190    pub fn files(&self) -> impl Iterator<Item = Entry<'_, V>> + '_ {
191        Dfs::new(self, /*only_leaves=*/ true, None)
192    }
193
194    /// Iterate over all entries in the tree which are descendants of `path`.
195    ///
196    /// If `max_depth` is set, the search is restricted to entries with a path
197    /// containing no more than `max_depth` additional path components than
198    /// `path` itself.
199    ///
200    /// For example, with a tree containing `/foo/bar`, `/foo/bar/baz` and
201    /// `/foo/bar/baz/qux`, calling this function with `/foo/bar` and a max depth
202    /// of 2, will only return `/foo/bar/baz`, not `foo/bar/baz/qux`, since
203    /// the latter is 2 levels deeper in the hierarchy.
204    pub fn children<P: AsRef<Path>>(
205        &self,
206        path: P,
207        max_depth: Option<usize>,
208    ) -> impl Iterator<Item = Entry<'_, V>> + '_ {
209        Dfs::at(self, path.as_ref(), /*only_leaves=*/ false, max_depth)
210    }
211}
212
213#[derive(Debug, Copy, Clone, PartialEq, Eq)]
214enum PathType {
215    /// The path does not exist, and we've observed no descendant paths yet
216    Unknown,
217    /// The path is known to exist, and is not a directory
218    File,
219    /// The path is either known to exist and is a directory, or was previously
220    /// Unknown, but one or more descendant paths have been observed, which
221    /// dictates that this path must be a directory.
222    Dir,
223}
224
225/// This is the type-erased implementation backing [PathPrefixTree]
226#[derive(Default, Clone)]
227struct PrefixTree {
228    /// We permit multiple top-level root nodes in this tree,
229    /// making it actually a forest, but you could also think
230    /// of this field as representing children of a theoretical
231    /// "top" node that is the parent of all possible paths.
232    ///
233    /// On a Unix system, that would implicitly be `/`, but on
234    /// Windows there can be multiple prefixes, e.g. `C:\`, etc.
235    roots: Vec<Node>,
236}
237impl PrefixTree {
238    /// Insert `path` into this tree with `data`
239    pub fn insert(&mut self, path: &Path, data: DataKey) -> Result<(), TryInsertError> {
240        let (path, ty) = canonicalize_and_type(path);
241
242        // Try to find existing root to insert into
243        if let Some(sort) = self.try_insert_existing(&path, ty, data)? {
244            // Found root with common prefix
245            if sort {
246                self.roots.sort();
247            }
248            return Ok(());
249        }
250
251        self.insert_new(path.into_owned(), ty, data);
252
253        Ok(())
254    }
255
256    fn try_insert_existing(
257        &mut self,
258        path: &Path,
259        ty: PathType,
260        data: DataKey,
261    ) -> Result<Option<bool>, TryInsertError> {
262        for root in self.roots.iter_mut() {
263            if root.has_common_prefix(path) {
264                // If we modified `root` due to this insertion, require `roots` to be re-sorted
265                return insert_into_prefix(root, path, ty, data).map(|depth| Some(depth == 0));
266            }
267        }
268
269        Ok(None)
270    }
271
272    fn insert_new(&mut self, mut path: PathBuf, ty: PathType, data: DataKey) {
273        // Create new root node
274        let root = match ty {
275            PathType::Dir => Node::Branch {
276                component: path,
277                children: vec![],
278                data: Some(data),
279            },
280            ty @ (PathType::Unknown | PathType::File) => {
281                let mut components = path.components();
282                let file = PathBuf::from(components.next_back().unwrap().as_os_str());
283                let child = Node::Leaf {
284                    component: file,
285                    ty,
286                    data: Some(data),
287                };
288                path.pop();
289                Node::Branch {
290                    component: path,
291                    children: vec![child],
292                    data: None,
293                }
294            }
295        };
296
297        // Insert the new root node, and ensure the roots are ordered
298        self.roots.push(root);
299        self.roots.sort();
300    }
301
302    /// Reset this tree to its default, empty state.
303    pub fn clear(&mut self) {
304        self.roots.clear();
305    }
306
307    pub fn contains(&self, path: &Path) -> bool {
308        let (path, _) = canonicalize_and_type(path);
309        self.roots.iter().any(|root| root.contains(&path))
310    }
311
312    pub fn get(&self, path: &Path) -> Option<DataKey> {
313        let (path, _) = canonicalize_and_type(path);
314        self.roots.iter().find_map(|root| {
315            if root.has_common_prefix(&path) {
316                root.get(&path).and_then(|n| n.data())
317            } else {
318                None
319            }
320        })
321    }
322
323    pub fn nearest_ancestor(&self, path: &Path) -> Option<(PathBuf, DataKey)> {
324        let (path, _) = canonicalize_and_type(path);
325        self.roots
326            .iter()
327            .find_map(|root| root.nearest_ancestor(&path))
328    }
329}
330
331/// This is similar to `Path::canonicalize`, except it does a few things differently:
332///
333/// * If the given path cannot be canonicalized the "standard" way, we use a fallback
334///   method as described in the following points.
335/// * In the fallback canonicalization, the path type is always Unknown. If such a path
336///   is inserted in the tree, it may become Dir if descendant paths are inserted in the
337///   tree.
338/// * As a result of the above this does not try to determine if a file path is treated as a
339///   directory, e.g. `/foo/bar.txt/qux`. We assume that the user of the [PathPrefixTree] is
340///   either validating paths themselves, or is ok with invalid paths.
341/// * The fallback canonicalization only partially expands parent references (i.e. `..`). When
342///   these occur at the start of the path, they are preserved, not expanded. When they occur
343///   following some other type of path component, they cause that component to be dropped, and
344///   the `..` is considered resolved (and also dropped).
345fn canonicalize_and_type<'a>(path: &'a Path) -> (Cow<'a, Path>, PathType) {
346    use smallvec::SmallVec;
347    use std::path::Component;
348
349    const PATH_SEPARATOR_SIZE: usize = std::path::MAIN_SEPARATOR_STR.as_bytes().len();
350
351    if let Ok(path) = path.canonicalize() {
352        let path: Cow<'a, Path> = Cow::Owned(path);
353        let ty = match path.metadata() {
354            Err(_) => PathType::Unknown,
355            Ok(metadata) => {
356                if metadata.is_dir() {
357                    PathType::Dir
358                } else {
359                    PathType::File
360                }
361            }
362        };
363        return (path, ty);
364    }
365
366    let mut components = SmallVec::<[Component; 4]>::default();
367    let mut canonical = true;
368    let mut size_in_bytes = 0;
369    for component in path.components() {
370        match component {
371            component @ (Component::Normal(_) | Component::RootDir | Component::Prefix(_)) => {
372                size_in_bytes += component.as_os_str().len() + PATH_SEPARATOR_SIZE;
373                components.push(component);
374            }
375            // This only occurs when `.` is the first path component, so drop this
376            // component as we automatically assume relative paths are relative to
377            // the current working directory
378            Component::CurDir => {
379                canonical = false;
380            }
381            // This occurs when `..` is found anywhere in the path. We normalize
382            // these by dropping the preceding component when there is a preceding
383            // component, and when it is not `..`. If there are no preceding
384            // components, then `..` becomes the first component.
385            Component::ParentDir => match components.last() {
386                None | Some(Component::ParentDir) => {
387                    let component = Component::ParentDir;
388                    size_in_bytes += component.as_os_str().len() + PATH_SEPARATOR_SIZE;
389                    components.push(component);
390                }
391                Some(_) => {
392                    canonical = false;
393                    let component = unsafe { components.pop().unwrap_unchecked() };
394                    size_in_bytes -= component.as_os_str().len() + PATH_SEPARATOR_SIZE;
395                }
396            },
397        }
398    }
399
400    // If `path` is already canonical, return it unmodified
401    if canonical {
402        return (Cow::Borrowed(path), PathType::Unknown);
403    }
404
405    // Otherwise, we need to construct the canonical path
406    let mut path = PathBuf::with_capacity(size_in_bytes);
407    for component in components.into_iter() {
408        path.push(component);
409    }
410    (Cow::Owned(path), PathType::Unknown)
411}
412
413#[derive(Debug, Copy, Clone, PartialEq, Eq)]
414struct DataKey(usize);
415impl DataKey {
416    #[inline(always)]
417    const fn as_usize(self) -> usize {
418        self.0
419    }
420}
421
422#[derive(Debug, PartialEq, Eq)]
423pub enum Prefix {
424    /// The node path exactly equals the input path
425    Exact,
426    /// The input path is an ancestor of the node path.
427    Ancestor,
428    /// The input path is a child of the node path
429    Child,
430    /// The input path and node path diverge, and the
431    /// given path is the common prefix of the two.
432    Partial(PathBuf),
433}
434
435#[derive(Debug, Clone, PartialEq, Eq)]
436enum Node {
437    /// A leaf node is always a file
438    Leaf {
439        component: PathBuf,
440        ty: PathType,
441        data: Option<DataKey>,
442    },
443    /// A branch node is always a directory, and may have zero or more children
444    Branch {
445        component: PathBuf,
446        children: Vec<Node>,
447        data: Option<DataKey>,
448    },
449}
450impl Ord for Node {
451    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
452        self.component().cmp(other.component())
453    }
454}
455impl PartialOrd for Node {
456    #[inline]
457    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
458        Some(self.cmp(other))
459    }
460}
461impl Node {
462    /// Get the path component for this node
463    #[inline]
464    pub fn component(&self) -> &Path {
465        match self {
466            Self::Leaf { ref component, .. } | Self::Branch { ref component, .. } => {
467                component.as_path()
468            }
469        }
470    }
471
472    /// Get the data key for this node, if applicable
473    #[inline]
474    pub fn data(&self) -> Option<DataKey> {
475        match self {
476            Self::Leaf { ref data, .. } | Self::Branch { ref data, .. } => *data,
477        }
478    }
479
480    /// Returns the common prefix with `path`.
481    ///
482    /// If `None`, there is no common prefix.
483    ///
484    /// If `Some`, see the docs for [PrefixMatch] for details on what the various
485    /// match types mean.
486    pub fn common_prefix(&self, path: &Path) -> Option<Prefix> {
487        assert_ne!(
488            path,
489            Path::new(""),
490            "invalid empty path given to common_prefix"
491        );
492
493        let mut ac = self.component().components();
494        let mut bc = path.components();
495        let mut common_prefix = PathBuf::new();
496        loop {
497            match (ac.next(), bc.next()) {
498                // We've reached the end of the components of `self`, so we've
499                // got at least some common prefix with `path`, go ahead and return
500                // it without further consideration.
501                (None, None) => break Some(Prefix::Exact),
502                (None, Some(_)) => break Some(Prefix::Child),
503                // The next component in both paths is common, so append it to
504                // our common prefix.
505                (Some(a), Some(b)) if a == b => {
506                    common_prefix.push(a);
507                }
508                // We diverge from `path` here.
509                (Some(_), Some(_)) => {
510                    if common_prefix == Path::new("") {
511                        break None;
512                    } else {
513                        break Some(Prefix::Partial(common_prefix));
514                    }
515                }
516                // We've reached the end of the components of `path` before `self`.
517                // This indicates that `path` is shallower than `self`, and thus `self`
518                // must be split to accomodate `path`.
519                (Some(_), None) => {
520                    break Some(Prefix::Ancestor);
521                }
522            }
523        }
524    }
525
526    /// Returns true if `self` has a common prefix with `path`.
527    pub fn has_common_prefix(&self, path: &Path) -> bool {
528        assert_ne!(
529            path,
530            Path::new(""),
531            "invalid empty path given to has_common_prefix"
532        );
533
534        let mut ac = self.component().components();
535        let mut bc = path.components();
536        let mut has_minimum_prefix = false;
537        loop {
538            match (ac.next(), bc.next()) {
539                (None, _) => break true,
540                (Some(a), Some(b)) if a == b => {
541                    has_minimum_prefix = true;
542                }
543                (Some(_), Some(_)) => break has_minimum_prefix,
544                (Some(_), None) => break true,
545            }
546        }
547    }
548
549    /// Returns true if `path` is explicitly represented under this node
550    ///
551    /// Explicit representation means there is a `Node` in the tree whose full path
552    /// is equal to `path`. Implicit representation is only applicable to directories,
553    /// and refers to when there is a `Node` in the tree for which `path` is a prefix,
554    /// but the full path for that node refers to an item deeper in the directory tree,
555    /// e.g. `/foo/bar` is implicitly represented by a node whose path is `/foo/bar/baz.txt`
556    pub fn contains(&self, mut path: &Path) -> bool {
557        let mut next = Some(self);
558        while let Some(node) = next.take() {
559            match node {
560                Self::Branch {
561                    ref component,
562                    ref children,
563                    ref data,
564                } => {
565                    if let Ok(rest) = path.strip_prefix(component) {
566                        if rest == Path::new("") {
567                            return data.is_some();
568                        }
569                        match children.iter().position(|c| c.has_common_prefix(rest)) {
570                            Some(index) => {
571                                path = rest;
572                                next = Some(&children[index]);
573                                continue;
574                            }
575                            None => {
576                                // the remainder of `path` is not covered by any children
577                                break;
578                            }
579                        }
580                    } else {
581                        // `component` may have a common prefix with `path`, but `path` is
582                        // not explicitly represented in the tree, so we must return false
583                        break;
584                    }
585                }
586                Self::Leaf {
587                    ref component,
588                    ref data,
589                    ..
590                } => {
591                    // `component` may have a common prefix with `path`, but unless `path`
592                    // is equal to `component`, it is not explicitly represented in the tree
593                    return path == component && data.is_some();
594                }
595            }
596        }
597
598        false
599    }
600
601    pub fn get<'a>(&'a self, mut path: &Path) -> Option<&'a Node> {
602        let mut next = Some(self);
603        while let Some(node) = next.take() {
604            match node {
605                Self::Branch {
606                    ref component,
607                    ref children,
608                    ..
609                } => {
610                    if let Ok(rest) = path.strip_prefix(component) {
611                        if rest == Path::new("") {
612                            return Some(node);
613                        }
614                        match children.iter().position(|c| c.has_common_prefix(rest)) {
615                            Some(index) => {
616                                path = rest;
617                                next = Some(&children[index]);
618                                continue;
619                            }
620                            None => {
621                                break;
622                            }
623                        }
624                    }
625                }
626                Self::Leaf { ref component, .. } => {
627                    if path == component {
628                        return Some(node);
629                    }
630                    break;
631                }
632            }
633        }
634
635        None
636    }
637
638    /// Return the path and data key associated with it, which is the nearest
639    /// ancestor of `path`, with data, represented under this node.
640    ///
641    /// For example, consider a tree in which `/foo/bar/baz/qux.txt` and
642    /// `/foo/bar/qux2.txt` are both inserted:
643    ///
644    /// * Given `/foo/bar/baz/other.txt` this function will select `/foo/bar/qux2.txt`,
645    /// because `/foo/bar/baz/qux.txt` is a sibling, but not an ancestor.
646    /// * Given `/foo/bar/qux2.txt`, it will select None, because there is no ancestor
647    /// explicitly represented in the tree for this path
648    /// * Given `/foo/bar/baz/qux.txt`, it will select `/foo/bar/qux2.txt`, because
649    /// `/foo/bar/baz/qux.txt` is not an ancestor of itself, and its nearest ancestor
650    /// is the selected path.
651    pub fn nearest_ancestor(&self, mut path: &Path) -> Option<(PathBuf, DataKey)> {
652        use smallvec::SmallVec;
653        if !self.has_common_prefix(path) {
654            return None;
655        }
656
657        let mut candidates = SmallVec::<[(PathBuf, DataKey); 2]>::default();
658        let mut ancestor = PathBuf::new();
659        let mut next = Some(self);
660        while let Some(node) = next.take() {
661            match node {
662                Self::Branch {
663                    ref component,
664                    ref children,
665                    data,
666                } => {
667                    if let Ok(rest) = path.strip_prefix(component) {
668                        // We found `path`, so take the most recent candidate,
669                        // if one is available. If not, then there are no nearest
670                        // ancestors for `path`
671                        if rest == Path::new("") {
672                            return candidates.pop();
673                        }
674
675                        // Otherwise, find the next child node to descend into.
676                        // If a suitable candidate is found, and the current node
677                        // has data associated with it, push the current path as
678                        // a candidate node, and check the child node to see if it
679                        // is closer.
680                        //
681                        // If no suitable candidate is found, but the current node
682                        // has data associated with it, then the current node is the
683                        // nearest ancestor, so we can just return it. If it has no
684                        // data, then the most recent candidate is returned, if available.
685                        ancestor.push(component);
686                        let child = children.iter().position(|c| c.has_common_prefix(rest));
687                        if let Some(index) = child {
688                            if let Some(data) = *data {
689                                candidates.push((ancestor.clone(), data));
690                            }
691
692                            path = rest;
693                            next = Some(&children[index]);
694                            continue;
695                        } else {
696                            return data
697                                .map(|data| (ancestor, data))
698                                .or_else(|| candidates.pop());
699                        }
700                    } else {
701                        // To insert `path` in the tree, we'd have to split `node`, which
702                        // means the most recent candidate is the nearest ancestor, if we
703                        // have one
704                        return candidates.pop();
705                    }
706                }
707                // Leaf nodes by definition cannot be the nearest ancestor, so use
708                // the most recent candidate, if we have one
709                Self::Leaf { .. } => return candidates.pop(),
710            }
711        }
712
713        None
714    }
715
716    fn take(&mut self, prefix: &Path) -> Self {
717        match self {
718            Node::Branch {
719                ref component,
720                ref mut children,
721                ref mut data,
722            } => {
723                let component = component.strip_prefix(prefix).unwrap().to_path_buf();
724                Node::Branch {
725                    component,
726                    children: core::mem::take(children),
727                    data: data.take(),
728                }
729            }
730            Node::Leaf {
731                ref component,
732                ref mut data,
733                ty,
734            } => {
735                let component = component.strip_prefix(prefix).unwrap().to_path_buf();
736                Node::Leaf {
737                    component,
738                    ty: *ty,
739                    data: data.take(),
740                }
741            }
742        }
743    }
744
745    fn split_at(&mut self, common_prefix: PathBuf) {
746        let split = self.take(&common_prefix);
747        *self = Node::Branch {
748            component: common_prefix,
749            children: vec![split],
750            data: None,
751        };
752    }
753
754    fn set_type(&mut self, ty: PathType) -> Result<(), TryInsertError> {
755        match self {
756            Self::Leaf {
757                ref mut component,
758                ref mut data,
759                ty: ref mut prev_ty,
760                ..
761            } => match ty {
762                PathType::Unknown | PathType::File => *prev_ty = ty,
763                PathType::Dir => {
764                    let component = core::mem::replace(component, PathBuf::new());
765                    let data = data.take();
766                    *self = Self::Branch {
767                        component,
768                        data,
769                        children: vec![],
770                    };
771                }
772            },
773            Self::Branch { .. } => match ty {
774                PathType::Dir => (),
775                PathType::Unknown | PathType::File => return Err(TryInsertError::PathTypeConflict),
776            },
777        }
778
779        Ok(())
780    }
781
782    fn set_data(&mut self, data: DataKey) {
783        match self {
784            Self::Branch {
785                data: ref mut prev_data,
786                ..
787            }
788            | Self::Leaf {
789                data: ref mut prev_data,
790                ..
791            } => {
792                *prev_data = Some(data);
793            }
794        }
795    }
796
797    fn push_child(
798        &mut self,
799        component: PathBuf,
800        ty: PathType,
801        data: Option<DataKey>,
802    ) -> Result<(), TryInsertError> {
803        let child = match ty {
804            PathType::File | PathType::Unknown => Self::Leaf {
805                component,
806                ty,
807                data,
808            },
809            PathType::Dir => Self::Branch {
810                component,
811                children: vec![],
812                data,
813            },
814        };
815        match self {
816            Self::Branch {
817                ref mut children, ..
818            } => {
819                children.push(child);
820                children.sort();
821            }
822            Self::Leaf {
823                component: ref mut parent_component,
824                data: ref mut parent_data,
825                ty: PathType::Unknown,
826            } => {
827                let children = vec![child];
828                let component = core::mem::replace(parent_component, PathBuf::new());
829                let data = parent_data.take();
830                *self = Self::Branch {
831                    component,
832                    children,
833                    data,
834                };
835            }
836            Self::Leaf { .. } => return Err(TryInsertError::PathTypeConflict),
837        }
838
839        Ok(())
840    }
841
842    fn try_insert_new<'a>(
843        &'a mut self,
844        path: &Path,
845        component: &Path,
846        ty: PathType,
847        data: Option<DataKey>,
848    ) -> Either<Result<(), TryInsertError>, &'a mut Node> {
849        match self {
850            Self::Branch {
851                ref mut children, ..
852            } => {
853                if let Some(index) = children.iter().position(|c| c.has_common_prefix(component)) {
854                    // We can't insert this as new, but the given index is a child we can try to insert into next
855                    return Right(&mut children[index]);
856                }
857                let child = match ty {
858                    PathType::File | PathType::Unknown => Self::Leaf {
859                        component: component.to_path_buf(),
860                        ty,
861                        data,
862                    },
863                    PathType::Dir => Self::Branch {
864                        component: component.to_path_buf(),
865                        children: vec![],
866                        data,
867                    },
868                };
869                children.push(child);
870                children.sort();
871
872                Left(Ok(()))
873            }
874            Self::Leaf {
875                ty: PathType::File, ..
876            } => Left(Err(TryInsertError::Unreachable(path.to_path_buf()))),
877            Self::Leaf {
878                component: ref mut parent_component,
879                data: ref mut parent_data,
880                ..
881            } => {
882                let child = match ty {
883                    PathType::File | PathType::Unknown => Self::Leaf {
884                        component: component.to_path_buf(),
885                        ty,
886                        data,
887                    },
888                    PathType::Dir => Self::Branch {
889                        component: component.to_path_buf(),
890                        children: vec![],
891                        data,
892                    },
893                };
894                let children = vec![child];
895                let component = core::mem::replace(parent_component, PathBuf::new());
896                let data = parent_data.take();
897                *self = Self::Branch {
898                    component,
899                    children,
900                    data,
901                };
902                Left(Ok(()))
903            }
904        }
905    }
906}
907
908/// Insert `path` into `node` which contains a prefix of `path`.
909///
910/// Returns the depth relative to `node` where `path` is inserted, a depth
911/// of zero indicates that `node` itself was modified.
912#[inline(never)]
913fn insert_into_prefix(
914    node: &mut Node,
915    mut path: &Path,
916    ty: PathType,
917    data: DataKey,
918) -> Result<usize, TryInsertError> {
919    let orig_path = path;
920    let mut next = Some((node, 0));
921
922    loop {
923        let (node, depth) = next.take().unwrap();
924
925        if let Some(prefix) = node.common_prefix(path) {
926            match prefix {
927                Prefix::Exact => {
928                    node.set_type(ty)?;
929                    node.set_data(data);
930                    break Ok(depth);
931                }
932                Prefix::Child => {
933                    path = path.strip_prefix(node.component()).unwrap();
934                    match node.try_insert_new(orig_path, path, ty, Some(data)) {
935                        Left(result) => {
936                            result?;
937                            break Ok(depth + 1);
938                        }
939                        Right(next_child) => {
940                            next = Some((next_child, depth + 1));
941                            continue;
942                        }
943                    }
944                }
945                Prefix::Ancestor => {
946                    node.split_at(path.to_path_buf());
947                    node.set_data(data);
948                    break Ok(depth);
949                }
950                Prefix::Partial(common_prefix) => {
951                    let component = path.strip_prefix(&common_prefix).unwrap().to_path_buf();
952                    node.split_at(common_prefix);
953                    node.push_child(component, ty, Some(data))?;
954                    break Ok(depth);
955                }
956            }
957        }
958    }
959}
960
961#[derive(Debug)]
962struct DfsVisitor<'a> {
963    worklist: &'a [Node],
964    prefix: PathBuf,
965    component_prefix: PathBuf,
966    queued: Option<(PathBuf, usize, &'a [Node])>,
967    depth: usize,
968    only_leaves: bool,
969}
970impl<'a> DfsVisitor<'a> {
971    fn new(prefix: PathBuf, worklist: &'a [Node], depth: usize, only_leaves: bool) -> Self {
972        Self {
973            worklist,
974            prefix,
975            component_prefix: PathBuf::new(),
976            queued: None,
977            depth,
978            only_leaves,
979        }
980    }
981
982    pub fn next(
983        &mut self,
984        max_depth: Option<usize>,
985    ) -> Option<Either<(PathBuf, DataKey), DfsVisitor<'a>>> {
986        if let Some((prefix, relative_depth, worklist)) = self.queued.take() {
987            return Some(Right(DfsVisitor::new(
988                prefix,
989                worklist,
990                relative_depth,
991                self.only_leaves,
992            )));
993        }
994
995        loop {
996            match self.worklist.split_first()? {
997                (Node::Leaf { data: None, .. }, worklist) => {
998                    self.worklist = worklist;
999                    continue;
1000                }
1001                (
1002                    Node::Leaf {
1003                        ref component,
1004                        data: Some(data),
1005                        ..
1006                    },
1007                    worklist,
1008                ) => {
1009                    self.worklist = worklist;
1010
1011                    if self.exceeds_max_depth(component, max_depth) {
1012                        continue;
1013                    }
1014                    let suffix = self.strip_prefix(component);
1015                    let prefix = self.prefix.join(suffix);
1016                    break Some(Left((prefix, *data)));
1017                }
1018                (
1019                    Node::Branch {
1020                        ref component,
1021                        ref children,
1022                        data,
1023                    },
1024                    worklist,
1025                ) => {
1026                    self.worklist = worklist;
1027
1028                    let relative_depth = self.relative_depth(component, max_depth);
1029                    if let Some(max_depth) = max_depth {
1030                        if relative_depth > max_depth {
1031                            continue;
1032                        }
1033                    }
1034                    let suffix = self.strip_prefix(component);
1035                    let prefix = self.prefix.join(suffix);
1036                    if !children.is_empty() {
1037                        match data {
1038                            // We have to emit the branch node first, so save the descent
1039                            // information for the next time `next` is called
1040                            Some(data) => {
1041                                self.queued =
1042                                    Some((prefix.clone(), relative_depth, children.as_slice()));
1043                                break Some(Left((prefix, *data)));
1044                            }
1045                            // We don't need to emit the branch node, so return a new visitor
1046                            // to start descending into the children
1047                            None => {
1048                                break Some(Right(DfsVisitor::new(
1049                                    prefix,
1050                                    children.as_slice(),
1051                                    relative_depth,
1052                                    self.only_leaves,
1053                                )));
1054                            }
1055                        }
1056                    }
1057                }
1058            }
1059        }
1060    }
1061
1062    fn exceeds_max_depth(&self, path: &Path, max_depth: Option<usize>) -> bool {
1063        match max_depth {
1064            None => false,
1065            Some(max_depth) => {
1066                let suffix = self.strip_prefix(path);
1067                let relative_depth = self.depth + suffix.components().count();
1068                relative_depth > max_depth
1069            }
1070        }
1071    }
1072
1073    fn relative_depth(&self, path: &Path, max_depth: Option<usize>) -> usize {
1074        match max_depth {
1075            // If we don't care about the depth, do nothing
1076            None => 0,
1077            Some(_) => {
1078                let suffix = self.strip_prefix(path);
1079                self.depth + suffix.components().count()
1080            }
1081        }
1082    }
1083
1084    #[inline]
1085    fn strip_prefix<'p>(&self, path: &'p Path) -> &'p Path {
1086        if self.component_prefix == Path::new("") {
1087            return path;
1088        }
1089        path.strip_prefix(&self.component_prefix).unwrap()
1090    }
1091}
1092
1093struct Dfs<'a, V> {
1094    data: &'a [V],
1095    stack: Vec<DfsVisitor<'a>>,
1096    current: DfsVisitor<'a>,
1097    max_depth: Option<usize>,
1098}
1099impl<V> core::fmt::Debug for Dfs<'_, V> {
1100    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1101        f.debug_struct("Dfs")
1102            .field("stack", &self.stack)
1103            .field("current", &self.current)
1104            .field("max_depth", &self.max_depth)
1105            .finish()
1106    }
1107}
1108impl<'a, V> Dfs<'a, V> {
1109    fn new(tree: &'a PathPrefixTree<V>, only_leaves: bool, max_depth: Option<usize>) -> Self {
1110        Self {
1111            data: tree.data.as_slice(),
1112            stack: vec![],
1113            current: DfsVisitor::new(PathBuf::new(), tree.tree.roots.as_slice(), 0, only_leaves),
1114            max_depth,
1115        }
1116    }
1117
1118    fn at(
1119        tree: &'a PathPrefixTree<V>,
1120        path: &Path,
1121        only_leaves: bool,
1122        max_depth: Option<usize>,
1123    ) -> Self {
1124        // Traverse tree until we find our initial working set
1125        match tree
1126            .tree
1127            .roots
1128            .iter()
1129            .find(|root| root.has_common_prefix(path))
1130        {
1131            None => return Self::empty(tree, only_leaves, max_depth),
1132            Some(root) => {
1133                let mut next = Some(root);
1134                let mut input_path = path;
1135                let mut prefix = PathBuf::new();
1136                loop {
1137                    let node = next.take().unwrap();
1138                    match node.common_prefix(input_path) {
1139                        // No children for `path` in this tree
1140                        None => break Self::empty(tree, only_leaves, max_depth),
1141                        Some(Prefix::Exact) => {
1142                            break Self {
1143                                data: tree.data.as_slice(),
1144                                stack: vec![],
1145                                current: DfsVisitor::new(
1146                                    prefix,
1147                                    core::slice::from_ref(node),
1148                                    0,
1149                                    only_leaves,
1150                                ),
1151                                max_depth,
1152                            };
1153                        }
1154                        // This node is the first child, as long as it is not deeper than `max_depth`
1155                        Some(Prefix::Ancestor) => {
1156                            prefix.push(input_path);
1157                            break Self {
1158                                data: tree.data.as_slice(),
1159                                stack: vec![],
1160                                current: DfsVisitor {
1161                                    component_prefix: PathBuf::from(input_path),
1162                                    ..DfsVisitor::new(
1163                                        prefix,
1164                                        core::slice::from_ref(node),
1165                                        0,
1166                                        only_leaves,
1167                                    )
1168                                },
1169                                max_depth,
1170                            };
1171                        }
1172                        // We need to recurse deeper into the tree to find potential children
1173                        Some(Prefix::Child) => {
1174                            match node {
1175                                Node::Branch {
1176                                    ref component,
1177                                    ref children,
1178                                    ..
1179                                } => {
1180                                    input_path = input_path.strip_prefix(component).unwrap();
1181                                    next =
1182                                        children.iter().find(|c| c.has_common_prefix(input_path));
1183                                    if next.is_none() {
1184                                        break Self::empty(tree, only_leaves, max_depth);
1185                                    }
1186                                    prefix.push(component);
1187                                }
1188                                // There are no children for `path`, so return an empty iterator
1189                                Node::Leaf { .. } => {
1190                                    break Self::empty(tree, only_leaves, max_depth)
1191                                }
1192                            }
1193                        }
1194                        // There can be no children of `path` under `node`
1195                        Some(Prefix::Partial(_)) => {
1196                            break Self::empty(tree, only_leaves, max_depth)
1197                        }
1198                    }
1199                }
1200            }
1201        }
1202    }
1203
1204    #[inline]
1205    fn empty(tree: &'a PathPrefixTree<V>, only_leaves: bool, max_depth: Option<usize>) -> Self {
1206        Self {
1207            data: tree.data.as_slice(),
1208            stack: vec![],
1209            current: DfsVisitor::new(PathBuf::new(), &[], 0, only_leaves),
1210            max_depth,
1211        }
1212    }
1213}
1214impl<'a, V> Iterator for Dfs<'a, V> {
1215    type Item = Entry<'a, V>;
1216
1217    fn next(&mut self) -> Option<Self::Item> {
1218        loop {
1219            match self.current.next(self.max_depth) {
1220                // No more nodes at this depth
1221                None => {
1222                    // Try to resume iteration at the next level up, else we're done
1223                    self.current = self.stack.pop()?;
1224                }
1225                // Visitor produced the next element at this depth
1226                Some(Left((path, key))) => {
1227                    return Some(Entry {
1228                        path,
1229                        data: &self.data[key.as_usize()],
1230                    });
1231                }
1232                // Visitor has indicated we should suspend iteration at this
1233                // depth and descend into a child node first
1234                Some(Right(visitor)) => {
1235                    let suspended = core::mem::replace(&mut self.current, visitor);
1236                    self.stack.push(suspended);
1237                }
1238            }
1239        }
1240    }
1241}
1242
1243#[cfg(test)]
1244mod tests {
1245    use pretty_assertions::assert_eq;
1246
1247    use super::*;
1248
1249    #[test]
1250    fn path_tree_insert() {
1251        // Tests:
1252        // * insertion in a clean tree
1253        // * insertion under an existing node
1254        // * insertion in a dirty tree
1255        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1256
1257        let child = Node::Leaf {
1258            component: PathBuf::from("baz"),
1259            ty: PathType::Unknown,
1260            data: Some(DataKey(1)),
1261        };
1262        let a = Node::Branch {
1263            component: PathBuf::from("foo"),
1264            children: vec![Node::Branch {
1265                component: PathBuf::from("bar"),
1266                children: vec![child],
1267                data: Some(DataKey(0)),
1268            }],
1269            data: None,
1270        };
1271        let b = Node::Leaf {
1272            component: PathBuf::from("qux"),
1273            ty: PathType::Unknown,
1274            data: Some(DataKey(2)),
1275        };
1276        let root = Node::Branch {
1277            component: PathBuf::from("/"),
1278            children: vec![a, b],
1279            data: None,
1280        };
1281
1282        assert_eq!(tree.tree.roots.as_slice(), &[root]);
1283    }
1284
1285    #[test]
1286    fn path_tree_nearest_ancestor() {
1287        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1288
1289        assert_eq!(
1290            tree.nearest_ancestor("/foo/bar/baz").map(Entry::into_path),
1291            Some(PathBuf::from("/foo/bar"))
1292        );
1293        assert_eq!(
1294            tree.nearest_ancestor("/foo/bar").map(Entry::into_path),
1295            None
1296        );
1297        assert_eq!(tree.nearest_ancestor("/qux").map(Entry::into_path), None);
1298    }
1299
1300    #[test]
1301    fn path_tree_contains() {
1302        let tree = PathPrefixTree::<()>::from_iter(["/foo/bar", "/foo/bar/baz", "/qux"]);
1303
1304        assert!(tree.contains("/foo/bar/baz"));
1305        assert!(tree.contains("/foo/bar"));
1306        assert!(!tree.contains("/foo"));
1307        assert!(!tree.contains("/foo/bar/baz/thing.txt"));
1308    }
1309
1310    #[test]
1311    fn path_tree_get() {
1312        let mut tree = PathPrefixTree::default();
1313        tree.insert("/foo/bar/baz", 1usize);
1314        tree.insert("/foo/bar/baz/qux", 2usize);
1315
1316        assert_eq!(tree.get("/foo/bar/baz/qux").copied(), Some(2));
1317        assert_eq!(tree.get("/foo/bar/baz").copied(), Some(1));
1318    }
1319
1320    #[test]
1321    fn path_tree_iter() {
1322        let tree = PathPrefixTree::<()>::from_iter(["/qux", "/foo/bar/baz", "/foo/bar"]);
1323
1324        let paths = tree.iter().map(|e| e.into_path()).collect::<Vec<_>>();
1325
1326        let expected = vec![
1327            PathBuf::from("/foo/bar"),
1328            PathBuf::from("/foo/bar/baz"),
1329            PathBuf::from("/qux"),
1330        ];
1331
1332        assert_eq!(paths, expected);
1333    }
1334
1335    #[test]
1336    fn path_tree_children() {
1337        let tree = PathPrefixTree::<()>::from_iter(["/qux", "/foo/bar/baz", "/foo/bar"]);
1338
1339        let paths = tree
1340            .children("/foo/bar", None)
1341            .map(|e| e.into_path())
1342            .collect::<Vec<_>>();
1343
1344        let expected = vec![PathBuf::from("/foo/bar"), PathBuf::from("/foo/bar/baz")];
1345
1346        assert_eq!(paths, expected);
1347
1348        let paths = tree
1349            .children("/foo", None)
1350            .map(|e| e.into_path())
1351            .collect::<Vec<_>>();
1352
1353        assert_eq!(paths, expected);
1354    }
1355}