fs_more/directory/scan/iter.rs
1use std::{
2 collections::VecDeque,
3 fs::Metadata,
4 path::{Path, PathBuf},
5};
6
7use_enabled_fs_module!();
8
9use super::{DirectoryScanDepthLimit, DirectoryScanOptions, ScanEntry};
10use crate::{directory::ScanEntryDepth, error::DirectoryScanError};
11
12
13/// A currently open directory that is being iterated over (scanned).
14struct OpenDirectory {
15 /// Path of the directory being scanned.
16 directory_path: PathBuf,
17
18 /// Depth of the directory, relative to the root of the scan tree.
19 directory_depth: ScanEntryDepth,
20
21 /// The actual directory iterator from the standard library.
22 iterator: fs::ReadDir,
23}
24
25
26/// A directory pending for a scan.
27struct PendingDirectory {
28 /// Path of the directory to be scanned.
29 directory_path: PathBuf,
30
31 /// Depth of the directory, relative to the root of the scan tree.
32 directory_depth: ScanEntryDepth,
33}
34
35
36/// Represents a directory tree ancestor.
37///
38/// Used for symlink cycle detection when
39/// [`DirectoryScanOptions::should_track_ancestors`] returns `true`.
40struct Ancestor {
41 /// Path of the ancestor directory.
42 path: PathBuf,
43
44 /// Depth of the ancestor directory.
45 depth: ScanEntryDepth,
46}
47
48
49/// Internal information about the next yielded directory entry.
50struct NextEntryInfo {
51 /// Path of the next entry.
52 ///
53 /// If [`DirectoryScanOptions::follow_symbolic_links`] is `true`,
54 /// this path will never lead to a symlink.
55 path: PathBuf,
56
57 /// Metadata regarding the entry.
58 metadata: Metadata,
59
60 /// Depth of the entry, relative to the root of the scan tree.
61 depth: ScanEntryDepth,
62}
63
64
65
66/// A recursive breadth-first directory iterator.
67///
68/// Obtained from calling `into_iter` after initializing the directory scanner;
69/// see [`DirectoryScanner::new`].
70///
71///
72/// [`DirectoryScanner::new`]: super::DirectoryScanner::new
73pub struct BreadthFirstDirectoryIter {
74 /// Path of the directory the scan started at.
75 base_directory: PathBuf,
76
77 /// Directory scanning options.
78 options: DirectoryScanOptions,
79
80 /// Whether the `base_directory` has been added to the pending scan queue yet.
81 /// If the base directory is a symbolic link to a directory and
82 /// [`follow_base_directory_symbolic_link`] is `true`,
83 /// the symlink is also resolved in this step, and the `base_directory` field value
84 /// is updated to reflect the symlink destination.
85 ///
86 /// This is generally done on the first call to [`Self::next`].
87 ///
88 /// [`follow_base_directory_symbolic_link`]: DirectoryScanOptions::follow_base_directory_symbolic_link
89 has_processed_base_directory: bool,
90
91 /// Whether the `base_directory` has been taken from the pending scan queue and opened for reading yet.
92 ///
93 /// This is generally done on the first call to [`Self::next`].
94 has_scanned_base_directory: bool,
95
96 /// A stack of pending directory paths that have yet to be scanned.
97 ///
98 /// Fresh entries are added to the back, and the next entry is taken from the front (FIFO order).
99 pending_directory_stack: VecDeque<PendingDirectory>,
100
101 /// If `Some`, this field contains the currently active directory "reader" (iterator),
102 /// along with information about the directory path we're scanning, and its depth in the scan tree.
103 currently_open_directory: Option<OpenDirectory>,
104
105 /// Contains a list of directory paths used for preventing symbolic link cycles.
106 /// The values are ancestors of the `currently_open_directory`, plus the current directory itself
107 /// (which is the last element). Items are therefore ordered from shallowest to deepest
108 /// (i.e. first element is a handle to the base directory).
109 ///
110 /// This will always be empty if [`follow_symbolic_links`] is `false`
111 /// (i.e. when [`DirectoryScanOptions::should_track_ancestors`] returns `false`).
112 ///
113 ///
114 /// [`follow_symbolic_links`]: DirectoryScanOptions::follow_symbolic_links
115 current_directory_ancestors: Vec<Ancestor>,
116}
117
118
119impl BreadthFirstDirectoryIter {
120 pub(super) fn new<P>(base_directory: P, options: DirectoryScanOptions) -> Self
121 where
122 P: Into<PathBuf>,
123 {
124 let base_directory: PathBuf = base_directory.into();
125
126 Self {
127 base_directory,
128 has_processed_base_directory: false,
129 has_scanned_base_directory: false,
130 options,
131 currently_open_directory: None,
132 pending_directory_stack: VecDeque::new(),
133 current_directory_ancestors: vec![],
134 }
135 }
136
137
138 /// Returns a reference to the currently active (open) directory iterator, if any,
139 /// `None` otherwise.
140 fn current_directory_handle(&self) -> Option<&OpenDirectory> {
141 if self.currently_open_directory.is_some() {
142 let handle = self
143 .currently_open_directory
144 .as_ref()
145 .expect("currently_open_directory should be Some");
146
147 return Some(handle);
148 }
149
150 None
151 }
152
153 /// Returns the directory path of the currently active (open) directory iterator.
154 ///
155 /// # Panics
156 /// If there is no currently open directory iterator, this method will panic.
157 /// It is up to the caller to ensure a directory iterator iss be currently active.
158 fn current_directory_handle_path_unchecked(&self) -> &Path {
159 &self
160 .current_directory_handle()
161 .expect("expected a directory handle to be open")
162 .directory_path
163 }
164
165 /// Returns the entry depth of the currently active (open) directory iterator.
166 ///
167 /// # Panics
168 /// If there is no currently open directory iterator, this method will panic.
169 /// It is up to the caller to ensure a directory iterator iss be currently active.
170 fn current_directory_handle_depth_unchecked(&self) -> &ScanEntryDepth {
171 &self
172 .current_directory_handle()
173 .expect("expected a directory handle to be open")
174 .directory_depth
175 }
176
177 /// Returns a mutable reference to a directory iterator, either the current one,
178 /// or if none is active at the moment, opening the next directory iterator on the stack.
179 ///
180 /// Returns `None` if the pending directory stack is empty, i.e. when the iterator has
181 /// no more elements to scan and yield.
182 fn current_or_next_directory_handle_mut(
183 &mut self,
184 ) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
185 if self.currently_open_directory.is_some() {
186 let handle = self
187 .currently_open_directory
188 .as_mut()
189 .expect("currently_open_directory should be Some");
190
191 return Ok(Some(handle));
192 }
193
194 self.open_next_directory_handle()
195 }
196
197 /// Returns a [`SymlinkCycleEncountered`] error if the provided `directory_path`
198 /// would lead to a scan cycle, e.g. when symlinks are cyclic.
199 ///
200 /// This method relies on the [`Self::current_directory_ancestors`] field
201 /// to be properly maintained as directories are entered or exited.
202 ///
203 ///
204 /// # Invariants
205 /// - `directory_path` must not be a symlink to a directory (you must resolve the link yourself
206 /// before calling the function).
207 ///
208 ///
209 /// [`SymlinkCycleEncountered`]: DirectoryScanError::SymlinkCycleEncountered
210 fn ensure_directory_path_does_not_lead_to_a_tree_cycle(
211 &self,
212 directory_path: &Path,
213 ) -> Result<(), DirectoryScanError> {
214 for ancestor_directory_handle in &self.current_directory_ancestors {
215 if directory_path.eq(&ancestor_directory_handle.path) {
216 return Err(DirectoryScanError::SymlinkCycleEncountered {
217 directory_path: directory_path.to_path_buf(),
218 });
219 }
220 }
221
222 Ok(())
223 }
224
225 /// Attempts to open the next pending directory for iteration.
226 ///
227 /// On success, a `Ok(Some(&mut`[`OpenDirectory`]`))` is returned,
228 /// containing the newly-opened directory which is ready for iteration.
229 ///
230 /// **Note that if a directory is already open when calling this,
231 /// it will be discarded!** Prefer using [`Self::close_current_directory_handle`]
232 /// when a directory is exhausted instead of directly calling this method.
233 ///
234 /// # Edge cases
235 /// If there is no pending directory left, and the base directory has already been scanned,
236 /// `Ok(None)` is returned.
237 fn open_next_directory_handle(
238 &mut self,
239 ) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
240 if !self.has_scanned_base_directory {
241 // We've just started, perhaps having just yielded the base directory.
242 // As such, we should open the base directory.
243
244 let base_dir_iterator = fs::read_dir(&self.base_directory).map_err(|io_error| {
245 DirectoryScanError::UnableToReadDirectory {
246 directory_path: self.base_directory.clone(),
247 error: io_error,
248 }
249 })?;
250
251
252 let active_reader_entry = OpenDirectory {
253 directory_path: self.base_directory.clone(),
254 directory_depth: ScanEntryDepth::BaseDirectory,
255 iterator: base_dir_iterator,
256 };
257
258
259 assert!(self.currently_open_directory.is_none());
260
261 self.currently_open_directory = Some(active_reader_entry);
262 self.has_scanned_base_directory = true;
263
264
265 if self.options.should_track_ancestors() {
266 #[cfg(debug_assertions)]
267 {
268 if !self.current_directory_ancestors.is_empty() {
269 panic!(
270 "expected current_directory_ancestors to be empty \
271 before opening and scanning the base directory"
272 );
273 }
274 }
275
276 self.current_directory_ancestors.push(Ancestor {
277 path: self.base_directory.clone(),
278 depth: ScanEntryDepth::BaseDirectory,
279 });
280 }
281
282
283 let handle_mut = self.currently_open_directory
284 .as_mut()
285 // PANIC SAFETY: We just `push`-ed onto the vector, meaning this will never panic.
286 .expect("currently_open_directory should be Some");
287
288 return Ok(Some(handle_mut));
289 }
290
291
292 // The base directory has already been opened or read;
293 // open one pending directory from the pending directory stack instead,
294 // or return `None` if no pending directories are left.
295 let Some(next_pending_directory) = self.pending_directory_stack.pop_front() else {
296 return Ok(None);
297 };
298
299
300 let directory_iterator =
301 fs::read_dir(&next_pending_directory.directory_path).map_err(|io_error| {
302 DirectoryScanError::UnableToReadDirectory {
303 directory_path: next_pending_directory.directory_path.clone(),
304 error: io_error,
305 }
306 })?;
307
308 let active_reader_entry = OpenDirectory {
309 directory_path: next_pending_directory.directory_path.clone(),
310 directory_depth: next_pending_directory.directory_depth,
311 iterator: directory_iterator,
312 };
313
314
315
316 // This will also drop the previously open directory.
317 // It is up to the caller of this function to ensure this is wanted behaviour.
318 self.currently_open_directory = Some(active_reader_entry);
319
320
321 // We always need to pop at most one ancestor, and only when
322 // moving to scan an adjacent directory with the same depth.
323 // We never need to pop more, because we're doing a breadth-first scan.
324 // As such, the depth is guaranteed to be monotonically increasing.
325 //
326 // For example:
327 // ```
328 // a
329 // |- b
330 // | |-> c.bin
331 // | |-> d.bin
332 // |- e
333 // |- f
334 // | |-> g.bin
335 // |- h
336 // ```
337 //
338 // As far as cycle detection is concerned in such a tree,
339 // we'll need to track the ancestors in the following way:
340 // - before entering (scanning) `a`, append `a` to the ancestor list,
341 // - before entering `b`, append `b` to the ancestor list,
342 // - before entering `e`, pop `b` and append `e` to the ancestor list,
343 // - before entering `f`, append `f` to the ancestor list,
344 // - before entering `h`, pop `f` and append `h` to the ancestor list.
345 //
346 // As you can see, we only need to pop a single ancestor when moving through
347 // adjacent directories of the same depth.
348 if self.options.should_track_ancestors() {
349 if let Some(last_ancestor) = self.current_directory_ancestors.last() {
350 if last_ancestor
351 .depth
352 .eq(&next_pending_directory.directory_depth)
353 {
354 self.current_directory_ancestors.pop();
355 }
356 }
357
358 self.current_directory_ancestors.push(Ancestor {
359 path: next_pending_directory.directory_path,
360 depth: next_pending_directory.directory_depth,
361 });
362 }
363
364
365
366 let handle_mut = self.currently_open_directory
367 .as_mut()
368 // PANIC SAFETY: We just `push`-ed onto the vector.
369 .expect("currently_open_directory should be Some");
370
371 Ok(Some(handle_mut))
372 }
373
374 /// This method will close the current directory handle,
375 /// or return an `Err(())` if there is no open handle.
376 fn close_current_directory_handle(&mut self) -> Result<(), ()> {
377 let Some(_) = self.currently_open_directory.take() else {
378 return Err(());
379 };
380
381 Ok(())
382 }
383
384 /// Pushes a directory onto the pending directory scan queue.
385 /// As this is a breadth-first iterator, the new directory will be placed last.
386 fn queue_directory_for_scanning(&mut self, directory_path: PathBuf, depth: ScanEntryDepth) {
387 let pending_dir_entry = PendingDirectory {
388 directory_path,
389 directory_depth: depth,
390 };
391
392 self.pending_directory_stack.push_back(pending_dir_entry);
393 }
394
395 /// Returns the next directory scan entry. This will automatically manage
396 /// the currently open directory, as well as close and open new pending directories, etc.
397 ///
398 /// If the scan has been exhausted, `Ok(None)` is returned, signalling the end of the iterator.
399 ///
400 /// If following symlinks is enabled, the returned entries will have their symlink paths followed.
401 fn next_entry(&mut self) -> Result<Option<NextEntryInfo>, DirectoryScanError> {
402 loop {
403 let follow_symbolic_links = self.options.follow_symbolic_links;
404
405 let Some(current_directory_iterator) = self.current_or_next_directory_handle_mut()?
406 else {
407 return Ok(None);
408 };
409
410
411 let Some(raw_entry_result) = current_directory_iterator.iterator.next() else {
412 self.close_current_directory_handle()
413 // PANIC SAFETY: We just held a reference to an open directory,
414 // which means `close_current_directory_handle` will be able to remove it,
415 // as it does exist.
416 .expect("at least one directory should be currently opened");
417
418 // The loop will restart and a new directory will be opened.
419 // If there are no further directories to scan, `None` will be returned from the iterator.
420
421 continue;
422 };
423
424
425
426 let raw_entry = raw_entry_result.map_err(|io_error| {
427 DirectoryScanError::UnableToReadDirectoryEntry {
428 directory_path: self
429 .current_directory_handle()
430 // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
431 // we never call `close_current_directory_handle`.
432 .expect("expected a directory handle to be open")
433 .directory_path
434 .clone(),
435 error: io_error,
436 }
437 })?;
438
439 let raw_entry_metadata = raw_entry.metadata().map_err(|io_error| {
440 DirectoryScanError::UnableToReadDirectoryEntry {
441 // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
442 // we never call `close_current_directory_handle`.
443 directory_path: self.current_directory_handle_path_unchecked().to_path_buf(),
444 error: io_error,
445 }
446 })?;
447
448
449 let (raw_entry_path, raw_entry_metadata) =
450 if follow_symbolic_links && raw_entry_metadata.is_symlink() {
451 let resolved_raw_entry_path =
452 fs::read_link(raw_entry.path()).map_err(|io_error| {
453 DirectoryScanError::UnableToReadDirectoryEntry {
454 // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
455 // we never call `close_current_directory_handle`.
456 directory_path: self
457 .current_directory_handle_path_unchecked()
458 .to_path_buf(),
459 error: io_error,
460 }
461 })?;
462
463 let raw_entry_metadata_followed =
464 fs::symlink_metadata(&resolved_raw_entry_path).map_err(|io_error| {
465 DirectoryScanError::UnableToReadDirectoryEntry {
466 // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
467 // we never call `close_current_directory_handle`.
468 directory_path: self
469 .current_directory_handle_path_unchecked()
470 .to_path_buf(),
471 error: io_error,
472 }
473 })?;
474
475
476 if raw_entry_metadata_followed.is_dir() {
477 // Function invariant upheld: `resolved_raw_entry_path` is the symlink destination.
478 self.ensure_directory_path_does_not_lead_to_a_tree_cycle(
479 &resolved_raw_entry_path,
480 )?;
481 }
482
483 (resolved_raw_entry_path, raw_entry_metadata_followed)
484 } else {
485 (raw_entry.path(), raw_entry_metadata)
486 };
487
488
489 return Ok(Some(NextEntryInfo {
490 path: raw_entry_path,
491 metadata: raw_entry_metadata,
492 // PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
493 // we never call `close_current_directory_handle`.
494 depth: self
495 .current_directory_handle_depth_unchecked()
496 .plus_one_level(),
497 }));
498 }
499 }
500}
501
502
503
504impl Iterator for BreadthFirstDirectoryIter {
505 type Item = Result<ScanEntry, DirectoryScanError>;
506
507 fn next(&mut self) -> Option<Self::Item> {
508 if !self.has_processed_base_directory {
509 self.has_processed_base_directory = true;
510
511 // Follow symlink if configured to do so.
512 let base_directory_metadata =
513 try_some!(fs::symlink_metadata(&self.base_directory), |io_error| {
514 DirectoryScanError::UnableToReadDirectory {
515 directory_path: self.base_directory.clone(),
516 error: io_error,
517 }
518 });
519
520
521 if !base_directory_metadata.is_symlink() && !base_directory_metadata.is_dir() {
522 return Some(Err(DirectoryScanError::NotADirectory {
523 path: self.base_directory.clone(),
524 }));
525 }
526
527
528 if base_directory_metadata.is_symlink() {
529 if !self.options.yield_base_directory {
530 // Nothing no follow, nothing to yield - the iterator will have no elements.
531 return None;
532 }
533
534 if self.options.follow_base_directory_symbolic_link {
535 let symlink_destination =
536 try_some!(fs::read_link(&self.base_directory), |io_error| {
537 DirectoryScanError::UnableToReadDirectory {
538 directory_path: self.base_directory.clone(),
539 error: io_error,
540 }
541 });
542
543 let symlink_destination_metadata =
544 try_some!(fs::symlink_metadata(&symlink_destination), |io_error| {
545 DirectoryScanError::UnableToReadDirectory {
546 directory_path: self.base_directory.clone(),
547 error: io_error,
548 }
549 });
550
551 if !symlink_destination_metadata.is_dir() {
552 return Some(Err(DirectoryScanError::NotADirectory {
553 path: self.base_directory.clone(),
554 }));
555 }
556
557
558 // We followed the symlink, and we should now update our iterator's base directory path.
559 self.base_directory = symlink_destination;
560 } else {
561 // This flag will prevent the iterator from going further;
562 // the base directory (which is a symlink) will be yielded,
563 // but no further elements will be returned.
564 self.has_scanned_base_directory = true;
565 }
566 }
567
568
569 if self.options.yield_base_directory {
570 return Some(Ok(ScanEntry::new(
571 self.base_directory.clone(),
572 base_directory_metadata,
573 ScanEntryDepth::BaseDirectory,
574 )));
575 }
576 }
577
578
579 let next_entry = {
580 let Some(next_entry_info) = try_some!(self.next_entry()) else {
581 // No further entries, the iterator has concluded. Once this is reached,
582 // all subsequent calls to `next` will also hit this branch, returning `None`.
583 return None;
584 };
585
586
587 if next_entry_info.metadata.is_dir() {
588 let ScanEntryDepth::AtDepth {
589 depth: current_dir_depth,
590 } = next_entry_info.depth
591 else {
592 // PANIC SAFETY: Only the base directory can be emitted with `ScanEntryDepth::BaseDirectory`,
593 // and the code flow ensures it's not in this branch.
594 panic!("expected the next entry's depth to be 0+, not base directory");
595 };
596
597 match self.options.maximum_scan_depth {
598 DirectoryScanDepthLimit::Unlimited => {
599 self.queue_directory_for_scanning(
600 next_entry_info.path.clone(),
601 next_entry_info.depth,
602 );
603 }
604 DirectoryScanDepthLimit::Limited { maximum_depth } => {
605 if current_dir_depth < maximum_depth {
606 self.queue_directory_for_scanning(
607 next_entry_info.path.clone(),
608 next_entry_info.depth,
609 );
610 }
611 }
612 }
613 }
614
615
616 ScanEntry::new(next_entry_info.path, next_entry_info.metadata, next_entry_info.depth)
617 };
618
619
620 Some(Ok(next_entry))
621 }
622}