fs_more/directory/scan/
iter.rs

1use std::{
2    collections::VecDeque,
3    fs::Metadata,
4    path::{Path, PathBuf},
5};
6
7use_enabled_fs_module!();
8
9use super::{DirectoryScanDepthLimit, DirectoryScanOptions, ScanEntry};
10use crate::{directory::ScanEntryDepth, error::DirectoryScanError};
11
12
13/// A currently open directory that is being iterated over (scanned).
14struct OpenDirectory {
15    /// Path of the directory being scanned.
16    directory_path: PathBuf,
17
18    /// Depth of the directory, relative to the root of the scan tree.
19    directory_depth: ScanEntryDepth,
20
21    /// The actual directory iterator from the standard library.
22    iterator: fs::ReadDir,
23}
24
25
26/// A directory pending for a scan.
27struct PendingDirectory {
28    /// Path of the directory to be scanned.
29    directory_path: PathBuf,
30
31    /// Depth of the directory, relative to the root of the scan tree.
32    directory_depth: ScanEntryDepth,
33}
34
35
36/// Represents a directory tree ancestor.
37///
38/// Used for symlink cycle detection when
39/// [`DirectoryScanOptions::should_track_ancestors`] returns `true`.
40struct Ancestor {
41    /// Path of the ancestor directory.
42    path: PathBuf,
43
44    /// Depth of the ancestor directory.
45    depth: ScanEntryDepth,
46}
47
48
49/// Internal information about the next yielded directory entry.
50struct NextEntryInfo {
51    /// Path of the next entry.
52    ///
53    /// If [`DirectoryScanOptions::follow_symbolic_links`] is `true`,
54    /// this path will never lead to a symlink.
55    path: PathBuf,
56
57    /// Metadata regarding the entry.
58    metadata: Metadata,
59
60    /// Depth of the entry, relative to the root of the scan tree.
61    depth: ScanEntryDepth,
62}
63
64
65
66/// A recursive breadth-first directory iterator.
67///
68/// Obtained from calling `into_iter` after initializing the directory scanner;
69/// see [`DirectoryScanner::new`].
70///
71///
72/// [`DirectoryScanner::new`]: super::DirectoryScanner::new
73pub struct BreadthFirstDirectoryIter {
74    /// Path of the directory the scan started at.
75    base_directory: PathBuf,
76
77    /// Directory scanning options.
78    options: DirectoryScanOptions,
79
80    /// Whether the `base_directory` has been added to the pending scan queue yet.
81    /// If the base directory is a symbolic link to a directory and
82    /// [`follow_base_directory_symbolic_link`] is `true`,
83    /// the symlink is also resolved in this step, and the `base_directory` field value
84    /// is updated to reflect the symlink destination.
85    ///
86    /// This is generally done on the first call to [`Self::next`].
87    ///
88    /// [`follow_base_directory_symbolic_link`]: DirectoryScanOptions::follow_base_directory_symbolic_link
89    has_processed_base_directory: bool,
90
91    /// Whether the `base_directory` has been taken from the pending scan queue and opened for reading yet.
92    ///
93    /// This is generally done on the first call to [`Self::next`].
94    has_scanned_base_directory: bool,
95
96    /// A stack of pending directory paths that have yet to be scanned.
97    ///
98    /// Fresh entries are added to the back, and the next entry is taken from the front (FIFO order).
99    pending_directory_stack: VecDeque<PendingDirectory>,
100
101    /// If `Some`, this field contains the currently active directory "reader" (iterator),
102    /// along with information about the directory path we're scanning, and its depth in the scan tree.
103    currently_open_directory: Option<OpenDirectory>,
104
105    /// Contains a list of directory paths used for preventing symbolic link cycles.
106    /// The values are ancestors of the `currently_open_directory`, plus the current directory itself
107    /// (which is the last element). Items are therefore ordered from shallowest to deepest
108    /// (i.e. first element is a handle to the base directory).
109    ///
110    /// This will always be empty if [`follow_symbolic_links`] is `false`
111    /// (i.e. when [`DirectoryScanOptions::should_track_ancestors`] returns `false`).
112    ///
113    ///
114    /// [`follow_symbolic_links`]: DirectoryScanOptions::follow_symbolic_links
115    current_directory_ancestors: Vec<Ancestor>,
116}
117
118
119impl BreadthFirstDirectoryIter {
120    pub(super) fn new<P>(base_directory: P, options: DirectoryScanOptions) -> Self
121    where
122        P: Into<PathBuf>,
123    {
124        let base_directory: PathBuf = base_directory.into();
125
126        Self {
127            base_directory,
128            has_processed_base_directory: false,
129            has_scanned_base_directory: false,
130            options,
131            currently_open_directory: None,
132            pending_directory_stack: VecDeque::new(),
133            current_directory_ancestors: vec![],
134        }
135    }
136
137
138    /// Returns a reference to the currently active (open) directory iterator, if any,
139    /// `None` otherwise.
140    fn current_directory_handle(&self) -> Option<&OpenDirectory> {
141        if self.currently_open_directory.is_some() {
142            let handle = self
143                .currently_open_directory
144                .as_ref()
145                .expect("currently_open_directory should be Some");
146
147            return Some(handle);
148        }
149
150        None
151    }
152
153    /// Returns the directory path of the currently active (open) directory iterator.
154    ///
155    /// # Panics
156    /// If there is no currently open directory iterator, this method will panic.
157    /// It is up to the caller to ensure a directory iterator iss be currently active.
158    fn current_directory_handle_path_unchecked(&self) -> &Path {
159        &self
160            .current_directory_handle()
161            .expect("expected a directory handle to be open")
162            .directory_path
163    }
164
165    /// Returns the entry depth of the currently active (open) directory iterator.
166    ///
167    /// # Panics
168    /// If there is no currently open directory iterator, this method will panic.
169    /// It is up to the caller to ensure a directory iterator iss be currently active.
170    fn current_directory_handle_depth_unchecked(&self) -> &ScanEntryDepth {
171        &self
172            .current_directory_handle()
173            .expect("expected a directory handle to be open")
174            .directory_depth
175    }
176
177    /// Returns a mutable reference to a directory iterator, either the current one,
178    /// or if none is active at the moment, opening the next directory iterator on the stack.
179    ///
180    /// Returns `None` if the pending directory stack is empty, i.e. when the iterator has
181    /// no more elements to scan and yield.
182    fn current_or_next_directory_handle_mut(
183        &mut self,
184    ) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
185        if self.currently_open_directory.is_some() {
186            let handle = self
187                .currently_open_directory
188                .as_mut()
189                .expect("currently_open_directory should be Some");
190
191            return Ok(Some(handle));
192        }
193
194        self.open_next_directory_handle()
195    }
196
197    /// Returns a [`SymlinkCycleEncountered`] error if the provided `directory_path`
198    /// would lead to a scan cycle, e.g. when symlinks are cyclic.
199    ///
200    /// This method relies on the [`Self::current_directory_ancestors`] field
201    /// to be properly maintained as directories are entered or exited.
202    ///
203    ///
204    /// # Invariants
205    /// - `directory_path` must not be a symlink to a directory (you must resolve the link yourself
206    ///   before calling the function).
207    ///
208    ///
209    /// [`SymlinkCycleEncountered`]: DirectoryScanError::SymlinkCycleEncountered
210    fn ensure_directory_path_does_not_lead_to_a_tree_cycle(
211        &self,
212        directory_path: &Path,
213    ) -> Result<(), DirectoryScanError> {
214        for ancestor_directory_handle in &self.current_directory_ancestors {
215            if directory_path.eq(&ancestor_directory_handle.path) {
216                return Err(DirectoryScanError::SymlinkCycleEncountered {
217                    directory_path: directory_path.to_path_buf(),
218                });
219            }
220        }
221
222        Ok(())
223    }
224
225    /// Attempts to open the next pending directory for iteration.
226    ///
227    /// On success, a `Ok(Some(&mut`[`OpenDirectory`]`))` is returned,
228    /// containing the newly-opened directory which is ready for iteration.
229    ///
230    /// **Note that if a directory is already open when calling this,
231    /// it will be discarded!** Prefer using [`Self::close_current_directory_handle`]
232    /// when a directory is exhausted instead of directly calling this method.
233    ///
234    /// # Edge cases
235    /// If there is no pending directory left, and the base directory has already been scanned,
236    /// `Ok(None)` is returned.
237    fn open_next_directory_handle(
238        &mut self,
239    ) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
240        if !self.has_scanned_base_directory {
241            // We've just started, perhaps having just yielded the base directory.
242            // As such, we should open the base directory.
243
244            let base_dir_iterator = fs::read_dir(&self.base_directory).map_err(|io_error| {
245                DirectoryScanError::UnableToReadDirectory {
246                    directory_path: self.base_directory.clone(),
247                    error: io_error,
248                }
249            })?;
250
251
252            let active_reader_entry = OpenDirectory {
253                directory_path: self.base_directory.clone(),
254                directory_depth: ScanEntryDepth::BaseDirectory,
255                iterator: base_dir_iterator,
256            };
257
258
259            assert!(self.currently_open_directory.is_none());
260
261            self.currently_open_directory = Some(active_reader_entry);
262            self.has_scanned_base_directory = true;
263
264
265            if self.options.should_track_ancestors() {
266                #[cfg(debug_assertions)]
267                {
268                    if !self.current_directory_ancestors.is_empty() {
269                        panic!(
270                            "expected current_directory_ancestors to be empty \
271                            before opening and scanning the base directory"
272                        );
273                    }
274                }
275
276                self.current_directory_ancestors.push(Ancestor {
277                    path: self.base_directory.clone(),
278                    depth: ScanEntryDepth::BaseDirectory,
279                });
280            }
281
282
283            let handle_mut = self.currently_open_directory
284                .as_mut()
285                // PANIC SAFETY: We just `push`-ed onto the vector, meaning this will never panic.
286                .expect("currently_open_directory should be Some");
287
288            return Ok(Some(handle_mut));
289        }
290
291
292        // The base directory has already been opened or read;
293        // open one pending directory from the pending directory stack instead,
294        // or return `None` if no pending directories are left.
295        let Some(next_pending_directory) = self.pending_directory_stack.pop_front() else {
296            return Ok(None);
297        };
298
299
300        let directory_iterator =
301            fs::read_dir(&next_pending_directory.directory_path).map_err(|io_error| {
302                DirectoryScanError::UnableToReadDirectory {
303                    directory_path: next_pending_directory.directory_path.clone(),
304                    error: io_error,
305                }
306            })?;
307
308        let active_reader_entry = OpenDirectory {
309            directory_path: next_pending_directory.directory_path.clone(),
310            directory_depth: next_pending_directory.directory_depth,
311            iterator: directory_iterator,
312        };
313
314
315
316        // This will also drop the previously open directory.
317        // It is up to the caller of this function to ensure this is wanted behaviour.
318        self.currently_open_directory = Some(active_reader_entry);
319
320
321        // We always need to pop at most one ancestor, and only when
322        // moving to scan an adjacent directory with the same depth.
323        // We never need to pop more, because we're doing a breadth-first scan.
324        // As such, the depth is guaranteed to be monotonically increasing.
325        //
326        // For example:
327        // ```
328        // a
329        // |- b
330        // |  |-> c.bin
331        // |  |-> d.bin
332        // |- e
333        //    |- f
334        //    |  |-> g.bin
335        //    |- h
336        // ```
337        //
338        // As far as cycle detection is concerned in such a tree,
339        // we'll need to track the ancestors in the following way:
340        // - before entering (scanning) `a`, append `a` to the ancestor list,
341        // - before entering `b`, append `b` to the ancestor list,
342        // - before entering `e`, pop `b` and append `e` to the ancestor list,
343        // - before entering `f`, append `f` to the ancestor list,
344        // - before entering `h`, pop `f` and append `h` to the ancestor list.
345        //
346        // As you can see, we only need to pop a single ancestor when moving through
347        // adjacent directories of the same depth.
348        if self.options.should_track_ancestors() {
349            if let Some(last_ancestor) = self.current_directory_ancestors.last() {
350                if last_ancestor
351                    .depth
352                    .eq(&next_pending_directory.directory_depth)
353                {
354                    self.current_directory_ancestors.pop();
355                }
356            }
357
358            self.current_directory_ancestors.push(Ancestor {
359                path: next_pending_directory.directory_path,
360                depth: next_pending_directory.directory_depth,
361            });
362        }
363
364
365
366        let handle_mut = self.currently_open_directory
367            .as_mut()
368            // PANIC SAFETY: We just `push`-ed onto the vector.
369            .expect("currently_open_directory should be Some");
370
371        Ok(Some(handle_mut))
372    }
373
374    /// This method will close the current directory handle,
375    /// or return an `Err(())` if there is no open handle.
376    fn close_current_directory_handle(&mut self) -> Result<(), ()> {
377        let Some(_) = self.currently_open_directory.take() else {
378            return Err(());
379        };
380
381        Ok(())
382    }
383
384    /// Pushes a directory onto the pending directory scan queue.
385    /// As this is a breadth-first iterator, the new directory will be placed last.
386    fn queue_directory_for_scanning(&mut self, directory_path: PathBuf, depth: ScanEntryDepth) {
387        let pending_dir_entry = PendingDirectory {
388            directory_path,
389            directory_depth: depth,
390        };
391
392        self.pending_directory_stack.push_back(pending_dir_entry);
393    }
394
395    /// Returns the next directory scan entry. This will automatically manage
396    /// the currently open directory, as well as close and open new pending directories, etc.
397    ///
398    /// If the scan has been exhausted, `Ok(None)` is returned, signalling the end of the iterator.
399    ///
400    /// If following symlinks is enabled, the returned entries will have their symlink paths followed.
401    fn next_entry(&mut self) -> Result<Option<NextEntryInfo>, DirectoryScanError> {
402        loop {
403            let follow_symbolic_links = self.options.follow_symbolic_links;
404
405            let Some(current_directory_iterator) = self.current_or_next_directory_handle_mut()?
406            else {
407                return Ok(None);
408            };
409
410
411            let Some(raw_entry_result) = current_directory_iterator.iterator.next() else {
412                self.close_current_directory_handle()
413                        // PANIC SAFETY: We just held a reference to an open directory,
414                        // which means `close_current_directory_handle` will be able to remove it, 
415                        // as it does exist.
416                        .expect("at least one directory should be currently opened");
417
418                // The loop will restart and a new directory will be opened.
419                // If there are no further directories to scan, `None` will be returned from the iterator.
420
421                continue;
422            };
423
424
425
426            let raw_entry = raw_entry_result.map_err(|io_error| {
427                DirectoryScanError::UnableToReadDirectoryEntry {
428                    directory_path: self
429                                .current_directory_handle()
430                                // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
431                                // we never call `close_current_directory_handle`.
432                                .expect("expected a directory handle to be open")
433                                .directory_path
434                                .clone(),
435                    error: io_error,
436                }
437            })?;
438
439            let raw_entry_metadata = raw_entry.metadata().map_err(|io_error| {
440                DirectoryScanError::UnableToReadDirectoryEntry {
441                    // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
442                    // we never call `close_current_directory_handle`.
443                    directory_path: self.current_directory_handle_path_unchecked().to_path_buf(),
444                    error: io_error,
445                }
446            })?;
447
448
449            let (raw_entry_path, raw_entry_metadata) =
450                if follow_symbolic_links && raw_entry_metadata.is_symlink() {
451                    let resolved_raw_entry_path =
452                        fs::read_link(raw_entry.path()).map_err(|io_error| {
453                            DirectoryScanError::UnableToReadDirectoryEntry {
454                                // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
455                                // we never call `close_current_directory_handle`.
456                                directory_path: self
457                                    .current_directory_handle_path_unchecked()
458                                    .to_path_buf(),
459                                error: io_error,
460                            }
461                        })?;
462
463                    let raw_entry_metadata_followed =
464                        fs::symlink_metadata(&resolved_raw_entry_path).map_err(|io_error| {
465                            DirectoryScanError::UnableToReadDirectoryEntry {
466                                // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
467                                // we never call `close_current_directory_handle`.
468                                directory_path: self
469                                    .current_directory_handle_path_unchecked()
470                                    .to_path_buf(),
471                                error: io_error,
472                            }
473                        })?;
474
475
476                    if raw_entry_metadata_followed.is_dir() {
477                        // Function invariant upheld: `resolved_raw_entry_path` is the symlink destination.
478                        self.ensure_directory_path_does_not_lead_to_a_tree_cycle(
479                            &resolved_raw_entry_path,
480                        )?;
481                    }
482
483                    (resolved_raw_entry_path, raw_entry_metadata_followed)
484                } else {
485                    (raw_entry.path(), raw_entry_metadata)
486                };
487
488
489            return Ok(Some(NextEntryInfo {
490                path: raw_entry_path,
491                metadata: raw_entry_metadata,
492                // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
493                // we never call `close_current_directory_handle`.
494                depth: self
495                    .current_directory_handle_depth_unchecked()
496                    .plus_one_level(),
497            }));
498        }
499    }
500}
501
502
503
504impl Iterator for BreadthFirstDirectoryIter {
505    type Item = Result<ScanEntry, DirectoryScanError>;
506
507    fn next(&mut self) -> Option<Self::Item> {
508        if !self.has_processed_base_directory {
509            self.has_processed_base_directory = true;
510
511            // Follow symlink if configured to do so.
512            let base_directory_metadata =
513                try_some!(fs::symlink_metadata(&self.base_directory), |io_error| {
514                    DirectoryScanError::UnableToReadDirectory {
515                        directory_path: self.base_directory.clone(),
516                        error: io_error,
517                    }
518                });
519
520
521            if !base_directory_metadata.is_symlink() && !base_directory_metadata.is_dir() {
522                return Some(Err(DirectoryScanError::NotADirectory {
523                    path: self.base_directory.clone(),
524                }));
525            }
526
527
528            if base_directory_metadata.is_symlink() {
529                if !self.options.yield_base_directory {
530                    // Nothing no follow, nothing to yield - the iterator will have no elements.
531                    return None;
532                }
533
534                if self.options.follow_base_directory_symbolic_link {
535                    let symlink_destination =
536                        try_some!(fs::read_link(&self.base_directory), |io_error| {
537                            DirectoryScanError::UnableToReadDirectory {
538                                directory_path: self.base_directory.clone(),
539                                error: io_error,
540                            }
541                        });
542
543                    let symlink_destination_metadata =
544                        try_some!(fs::symlink_metadata(&symlink_destination), |io_error| {
545                            DirectoryScanError::UnableToReadDirectory {
546                                directory_path: self.base_directory.clone(),
547                                error: io_error,
548                            }
549                        });
550
551                    if !symlink_destination_metadata.is_dir() {
552                        return Some(Err(DirectoryScanError::NotADirectory {
553                            path: self.base_directory.clone(),
554                        }));
555                    }
556
557
558                    // We followed the symlink, and we should now update our iterator's base directory path.
559                    self.base_directory = symlink_destination;
560                } else {
561                    // This flag will prevent the iterator from going further;
562                    // the base directory (which is a symlink) will be yielded,
563                    // but no further elements will be returned.
564                    self.has_scanned_base_directory = true;
565                }
566            }
567
568
569            if self.options.yield_base_directory {
570                return Some(Ok(ScanEntry::new(
571                    self.base_directory.clone(),
572                    base_directory_metadata,
573                    ScanEntryDepth::BaseDirectory,
574                )));
575            }
576        }
577
578
579        let next_entry = {
580            let Some(next_entry_info) = try_some!(self.next_entry()) else {
581                // No further entries, the iterator has concluded. Once this is reached,
582                // all subsequent calls to `next` will also hit this branch, returning `None`.
583                return None;
584            };
585
586
587            if next_entry_info.metadata.is_dir() {
588                let ScanEntryDepth::AtDepth {
589                    depth: current_dir_depth,
590                } = next_entry_info.depth
591                else {
592                    // PANIC SAFETY: Only the base directory can be emitted with `ScanEntryDepth::BaseDirectory`,
593                    // and the code flow ensures it's not in this branch.
594                    panic!("expected the next entry's depth to be 0+, not base directory");
595                };
596
597                match self.options.maximum_scan_depth {
598                    DirectoryScanDepthLimit::Unlimited => {
599                        self.queue_directory_for_scanning(
600                            next_entry_info.path.clone(),
601                            next_entry_info.depth,
602                        );
603                    }
604                    DirectoryScanDepthLimit::Limited { maximum_depth } => {
605                        if current_dir_depth < maximum_depth {
606                            self.queue_directory_for_scanning(
607                                next_entry_info.path.clone(),
608                                next_entry_info.depth,
609                            );
610                        }
611                    }
612                }
613            }
614
615
616            ScanEntry::new(next_entry_info.path, next_entry_info.metadata, next_entry_info.depth)
617        };
618
619
620        Some(Ok(next_entry))
621    }
622}