1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
use std::{
collections::VecDeque,
fs::Metadata,
path::{Path, PathBuf},
};
use_enabled_fs_module!();
use super::{DirectoryScanDepthLimit, DirectoryScanOptions, ScanEntry};
use crate::{directory::ScanEntryDepth, error::DirectoryScanError};
/// A currently open directory that is being iterated over (scanned).
struct OpenDirectory {
/// Path of the directory being scanned.
directory_path: PathBuf,
/// Depth of the directory, relative to the root of the scan tree.
directory_depth: ScanEntryDepth,
/// The actual directory iterator from the standard library.
iterator: fs::ReadDir,
}
/// A directory pending for a scan.
struct PendingDirectory {
/// Path of the directory to be scanned.
directory_path: PathBuf,
/// Depth of the directory, relative to the root of the scan tree.
directory_depth: ScanEntryDepth,
}
/// Represents a directory tree ancestor.
///
/// Used for symlink cycle detection when
/// [`DirectoryScanOptions::should_track_ancestors`] returns `true`.
struct Ancestor {
/// Path of the ancestor directory.
path: PathBuf,
/// Depth of the ancestor directory.
depth: ScanEntryDepth,
}
/// Internal information about the next yielded directory entry.
struct NextEntryInfo {
/// Path of the next entry.
///
/// If [`DirectoryScanOptions::follow_symbolic_links`] is `true`,
/// this path will never lead to a symlink.
path: PathBuf,
/// Metadata regarding the entry.
metadata: Metadata,
/// Depth of the entry, relative to the root of the scan tree.
depth: ScanEntryDepth,
}
/// A recursive breadth-first directory iterator.
///
/// Obtained from calling `into_iter` after initializing the directory scanner;
/// see [`DirectoryScanner::new`].
///
///
/// [`DirectoryScanner::new`]: super::DirectoryScanner::new
pub struct BreadthFirstDirectoryIter {
/// Path of the directory the scan started at.
base_directory: PathBuf,
/// Directory scanning options.
options: DirectoryScanOptions,
/// Whether the `base_directory` has been added to the pending scan queue yet.
/// If the base directory is a symbolic link to a directory and
/// [`follow_base_directory_symbolic_link`] is `true`,
/// the symlink is also resolved in this step, and the `base_directory` field value
/// is updated to reflect the symlink destination.
///
/// This is generally done on the first call to [`Self::next`].
///
/// [`follow_base_directory_symbolic_link`]: DirectoryScanOptions::follow_base_directory_symbolic_link
has_processed_base_directory: bool,
/// Whether the `base_directory` has been taken from the pending scan queue and opened for reading yet.
///
/// This is generally done on the first call to [`Self::next`].
has_scanned_base_directory: bool,
/// A stack of pending directory paths that have yet to be scanned.
///
/// Fresh entries are added to the back, and the next entry is taken from the front (FIFO order).
pending_directory_stack: VecDeque<PendingDirectory>,
/// If `Some`, this field contains the currently active directory "reader" (iterator),
/// along with information about the directory path we're scanning, and its depth in the scan tree.
currently_open_directory: Option<OpenDirectory>,
/// Contains a list of directory paths used for preventing symbolic link cycles.
/// The values are ancestors of the `currently_open_directory`, plus the current directory itself
/// (which is the last element). Items are therefore ordered from shallowest to deepest
/// (i.e. first element is a handle to the base directory).
///
/// This will always be empty if [`follow_symbolic_links`] is `false`
/// (i.e. when [`DirectoryScanOptions::should_track_ancestors`] returns `false`).
///
///
/// [`follow_symbolic_links`]: DirectoryScanOptions::follow_symbolic_links
current_directory_ancestors: Vec<Ancestor>,
}
impl BreadthFirstDirectoryIter {
pub(super) fn new<P>(base_directory: P, options: DirectoryScanOptions) -> Self
where
P: Into<PathBuf>,
{
let base_directory: PathBuf = base_directory.into();
Self {
base_directory,
has_processed_base_directory: false,
has_scanned_base_directory: false,
options,
currently_open_directory: None,
pending_directory_stack: VecDeque::new(),
current_directory_ancestors: vec![],
}
}
/// Returns a reference to the currently active (open) directory iterator, if any,
/// `None` otherwise.
fn current_directory_handle(&self) -> Option<&OpenDirectory> {
if self.currently_open_directory.is_some() {
let handle = self
.currently_open_directory
.as_ref()
.expect("currently_open_directory should be Some");
return Some(handle);
}
None
}
/// Returns the directory path of the currently active (open) directory iterator.
///
/// # Panics
/// If there is no currently open directory iterator, this method will panic.
/// It is up to the caller to ensure a directory iterator iss be currently active.
fn current_directory_handle_path_unchecked(&self) -> &Path {
&self
.current_directory_handle()
.expect("expected a directory handle to be open")
.directory_path
}
/// Returns the entry depth of the currently active (open) directory iterator.
///
/// # Panics
/// If there is no currently open directory iterator, this method will panic.
/// It is up to the caller to ensure a directory iterator iss be currently active.
fn current_directory_handle_depth_unchecked(&self) -> &ScanEntryDepth {
&self
.current_directory_handle()
.expect("expected a directory handle to be open")
.directory_depth
}
/// Returns a mutable reference to a directory iterator, either the current one,
/// or if none is active at the moment, opening the next directory iterator on the stack.
///
/// Returns `None` if the pending directory stack is empty, i.e. when the iterator has
/// no more elements to scan and yield.
fn current_or_next_directory_handle_mut(
&mut self,
) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
if self.currently_open_directory.is_some() {
let handle = self
.currently_open_directory
.as_mut()
.expect("currently_open_directory should be Some");
return Ok(Some(handle));
}
self.open_next_directory_handle()
}
/// Returns a [`SymlinkCycleEncountered`] error if the provided `directory_path`
/// would lead to a scan cycle, e.g. when symlinks are cyclic.
///
/// This method relies on the [`Self::current_directory_ancestors`] field
/// to be properly maintained as directories are entered or exited.
///
///
/// # Invariants
/// - `directory_path` must not be a symlink to a directory (you must resolve the link yourself
/// before calling the function).
///
///
/// [`SymlinkCycleEncountered`]: DirectoryScanError::SymlinkCycleEncountered
fn ensure_directory_path_does_not_lead_to_a_tree_cycle(
&self,
directory_path: &Path,
) -> Result<(), DirectoryScanError> {
for ancestor_directory_handle in &self.current_directory_ancestors {
if directory_path.eq(&ancestor_directory_handle.path) {
return Err(DirectoryScanError::SymlinkCycleEncountered {
directory_path: directory_path.to_path_buf(),
});
}
}
Ok(())
}
/// Attempts to open the next pending directory for iteration.
///
/// On success, a `Ok(Some(&mut`[`OpenDirectory`]`))` is returned,
/// containing the newly-opened directory which is ready for iteration.
///
/// **Note that if a directory is already open when calling this,
/// it will be discarded!** Prefer using [`Self::close_current_directory_handle`]
/// when a directory is exhausted instead of directly calling this method.
///
/// # Edge cases
/// If there is no pending directory left, and the base directory has already been scanned,
/// `Ok(None)` is returned.
fn open_next_directory_handle(
&mut self,
) -> Result<Option<&mut OpenDirectory>, DirectoryScanError> {
if !self.has_scanned_base_directory {
// We've just started, perhaps having just yielded the base directory.
// As such, we should open the base directory.
let base_dir_iterator = fs::read_dir(&self.base_directory).map_err(|io_error| {
DirectoryScanError::UnableToReadDirectory {
directory_path: self.base_directory.clone(),
error: io_error,
}
})?;
let active_reader_entry = OpenDirectory {
directory_path: self.base_directory.clone(),
directory_depth: ScanEntryDepth::BaseDirectory,
iterator: base_dir_iterator,
};
assert!(self.currently_open_directory.is_none());
self.currently_open_directory = Some(active_reader_entry);
self.has_scanned_base_directory = true;
if self.options.should_track_ancestors() {
#[cfg(debug_assertions)]
{
if !self.current_directory_ancestors.is_empty() {
panic!(
"expected current_directory_ancestors to be empty \
before opening and scanning the base directory"
);
}
}
self.current_directory_ancestors.push(Ancestor {
path: self.base_directory.clone(),
depth: ScanEntryDepth::BaseDirectory,
});
}
let handle_mut = self.currently_open_directory
.as_mut()
// PANIC SAFETY: We just `push`-ed onto the vector, meaning this will never panic.
.expect("currently_open_directory should be Some");
return Ok(Some(handle_mut));
}
// The base directory has already been opened or read;
// open one pending directory from the pending directory stack instead,
// or return `None` if no pending directories are left.
let Some(next_pending_directory) = self.pending_directory_stack.pop_front() else {
return Ok(None);
};
let directory_iterator =
fs::read_dir(&next_pending_directory.directory_path).map_err(|io_error| {
DirectoryScanError::UnableToReadDirectory {
directory_path: next_pending_directory.directory_path.clone(),
error: io_error,
}
})?;
let active_reader_entry = OpenDirectory {
directory_path: next_pending_directory.directory_path.clone(),
directory_depth: next_pending_directory.directory_depth,
iterator: directory_iterator,
};
// This will also drop the previously open directory.
// It is up to the caller of this function to ensure this is wanted behaviour.
self.currently_open_directory = Some(active_reader_entry);
// We always need to pop at most one ancestor, and only when
// moving to scan an adjacent directory with the same depth.
// We never need to pop more, because we're doing a breadth-first scan.
// As such, the depth is guaranteed to be monotonically increasing.
//
// For example:
// ```
// a
// |- b
// | |-> c.bin
// | |-> d.bin
// |- e
// |- f
// | |-> g.bin
// |- h
// ```
//
// As far as cycle detection is concerned in such a tree,
// we'll need to track the ancestors in the following way:
// - before entering (scanning) `a`, append `a` to the ancestor list,
// - before entering `b`, append `b` to the ancestor list,
// - before entering `e`, pop `b` and append `e` to the ancestor list,
// - before entering `f`, append `f` to the ancestor list,
// - before entering `h`, pop `f` and append `h` to the ancestor list.
//
// As you can see, we only need to pop a single ancestor when moving through
// adjacent directories of the same depth.
if self.options.should_track_ancestors() {
if let Some(last_ancestor) = self.current_directory_ancestors.last() {
if last_ancestor
.depth
.eq(&next_pending_directory.directory_depth)
{
self.current_directory_ancestors.pop();
}
}
self.current_directory_ancestors.push(Ancestor {
path: next_pending_directory.directory_path,
depth: next_pending_directory.directory_depth,
});
}
let handle_mut = self.currently_open_directory
.as_mut()
// PANIC SAFETY: We just `push`-ed onto the vector.
.expect("currently_open_directory should be Some");
Ok(Some(handle_mut))
}
/// This method will close the current directory handle,
/// or return an `Err(())` if there is no open handle.
fn close_current_directory_handle(&mut self) -> Result<(), ()> {
let Some(_) = self.currently_open_directory.take() else {
return Err(());
};
Ok(())
}
/// Pushes a directory onto the pending directory scan queue.
/// As this is a breadth-first iterator, the new directory will be placed last.
fn queue_directory_for_scanning(&mut self, directory_path: PathBuf, depth: ScanEntryDepth) {
let pending_dir_entry = PendingDirectory {
directory_path,
directory_depth: depth,
};
self.pending_directory_stack.push_back(pending_dir_entry);
}
/// Returns the next directory scan entry. This will automatically manage
/// the currently open directory, as well as close and open new pending directories, etc.
///
/// If the scan has been exhausted, `Ok(None)` is returned, signalling the end of the iterator.
///
/// If following symlinks is enabled, the returned entries will have their symlink paths followed.
fn next_entry(&mut self) -> Result<Option<NextEntryInfo>, DirectoryScanError> {
loop {
let follow_symbolic_links = self.options.follow_symbolic_links;
let Some(current_directory_iterator) = self.current_or_next_directory_handle_mut()?
else {
return Ok(None);
};
let Some(raw_entry_result) = current_directory_iterator.iterator.next() else {
self.close_current_directory_handle()
// PANIC SAFETY: We just held a reference to an open directory,
// which means `close_current_directory_handle` will be able to remove it,
// as it does exist.
.expect("at least one directory should be currently opened");
// The loop will restart and a new directory will be opened.
// If there are no further directories to scan, `None` will be returned from the iterator.
continue;
};
let raw_entry = raw_entry_result.map_err(|io_error| {
DirectoryScanError::UnableToReadDirectoryEntry {
directory_path: self
.current_directory_handle()
// PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
// we never call `close_current_directory_handle`.
.expect("expected a directory handle to be open")
.directory_path
.clone(),
error: io_error,
}
})?;
let raw_entry_metadata = raw_entry.metadata().map_err(|io_error| {
DirectoryScanError::UnableToReadDirectoryEntry {
// PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
// we never call `close_current_directory_handle`.
directory_path: self.current_directory_handle_path_unchecked().to_path_buf(),
error: io_error,
}
})?;
let (raw_entry_path, raw_entry_metadata) =
if follow_symbolic_links && raw_entry_metadata.is_symlink() {
let resolved_raw_entry_path =
fs::read_link(raw_entry.path()).map_err(|io_error| {
DirectoryScanError::UnableToReadDirectoryEntry {
// PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
// we never call `close_current_directory_handle`.
directory_path: self
.current_directory_handle_path_unchecked()
.to_path_buf(),
error: io_error,
}
})?;
let raw_entry_metadata_followed =
fs::symlink_metadata(&resolved_raw_entry_path).map_err(|io_error| {
DirectoryScanError::UnableToReadDirectoryEntry {
// PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
// we never call `close_current_directory_handle`.
directory_path: self
.current_directory_handle_path_unchecked()
.to_path_buf(),
error: io_error,
}
})?;
if raw_entry_metadata_followed.is_dir() {
// Function invariant upheld: `resolved_raw_entry_path` is the symlink destination.
self.ensure_directory_path_does_not_lead_to_a_tree_cycle(
&resolved_raw_entry_path,
)?;
}
(resolved_raw_entry_path, raw_entry_metadata_followed)
} else {
(raw_entry.path(), raw_entry_metadata)
};
return Ok(Some(NextEntryInfo {
path: raw_entry_path,
metadata: raw_entry_metadata,
// PANIC SAFETY: between the call to `current_or_next_directory_handle_mut` and this point,
// we never call `close_current_directory_handle`.
depth: self
.current_directory_handle_depth_unchecked()
.plus_one_level(),
}));
}
}
}
impl Iterator for BreadthFirstDirectoryIter {
type Item = Result<ScanEntry, DirectoryScanError>;
fn next(&mut self) -> Option<Self::Item> {
if !self.has_processed_base_directory {
self.has_processed_base_directory = true;
// Follow symlink if configured to do so.
let base_directory_metadata =
try_some!(fs::symlink_metadata(&self.base_directory), |io_error| {
DirectoryScanError::UnableToReadDirectory {
directory_path: self.base_directory.clone(),
error: io_error,
}
});
if !base_directory_metadata.is_symlink() && !base_directory_metadata.is_dir() {
return Some(Err(DirectoryScanError::NotADirectory {
path: self.base_directory.clone(),
}));
}
if base_directory_metadata.is_symlink() {
if !self.options.yield_base_directory {
// Nothing no follow, nothing to yield - the iterator will have no elements.
return None;
}
if self.options.follow_base_directory_symbolic_link {
let symlink_destination =
try_some!(fs::read_link(&self.base_directory), |io_error| {
DirectoryScanError::UnableToReadDirectory {
directory_path: self.base_directory.clone(),
error: io_error,
}
});
let symlink_destination_metadata =
try_some!(fs::symlink_metadata(&symlink_destination), |io_error| {
DirectoryScanError::UnableToReadDirectory {
directory_path: self.base_directory.clone(),
error: io_error,
}
});
if !symlink_destination_metadata.is_dir() {
return Some(Err(DirectoryScanError::NotADirectory {
path: self.base_directory.clone(),
}));
}
// We followed the symlink, and we should now update our iterator's base directory path.
self.base_directory = symlink_destination;
} else {
// This flag will prevent the iterator from going further;
// the base directory (which is a symlink) will be yielded,
// but no further elements will be returned.
self.has_scanned_base_directory = true;
}
}
if self.options.yield_base_directory {
return Some(Ok(ScanEntry::new(
self.base_directory.clone(),
base_directory_metadata,
ScanEntryDepth::BaseDirectory,
)));
}
}
let next_entry = {
let Some(next_entry_info) = try_some!(self.next_entry()) else {
// No further entries, the iterator has concluded. Once this is reached,
// all subsequent calls to `next` will also hit this branch, returning `None`.
return None;
};
if next_entry_info.metadata.is_dir() {
let ScanEntryDepth::AtDepth {
depth: current_dir_depth,
} = next_entry_info.depth
else {
// PANIC SAFETY: Only the base directory can be emitted with `ScanEntryDepth::BaseDirectory`,
// and the code flow ensures it's not in this branch.
panic!("expected the next entry's depth to be 0+, not base directory");
};
match self.options.maximum_scan_depth {
DirectoryScanDepthLimit::Unlimited => {
self.queue_directory_for_scanning(
next_entry_info.path.clone(),
next_entry_info.depth,
);
}
DirectoryScanDepthLimit::Limited { maximum_depth } => {
if current_dir_depth < maximum_depth {
self.queue_directory_for_scanning(
next_entry_info.path.clone(),
next_entry_info.depth,
);
}
}
}
}
ScanEntry::new(next_entry_info.path, next_entry_info.metadata, next_entry_info.depth)
};
Some(Ok(next_entry))
}
}