1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
use std::path::{Path, PathBuf};

use_enabled_fs_module!();

use crate::{
    error::{DirectoryScanError, DirectorySizeScanError, FileSizeError, IsDirectoryEmptyError},
    file::file_size_in_bytes,
};



/// A list of file and directory paths.
///
/// You can obtain this from [`DirectoryScan::into_scanned_files_and_directories`].
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct ScannedFilesAndDirectories {
    /// Scanned files (their paths).
    pub files: Vec<PathBuf>,

    /// Scanned directories (their paths).
    pub directories: Vec<PathBuf>,
}



/// The maximum directory scan depth option.
///
/// Used primarily in [`DirectoryScan::scan_with_options`].
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum DirectoryScanDepthLimit {
    /// No scan depth limit.
    Unlimited,

    /// Scan depth is limited to `maximum_depth`, where the value refers to
    /// the maximum depth of the subdirectory whose contents are still listed.
    ///
    ///
    /// # Examples
    /// `maximum_depth = 0` indicates a scan that will cover only the files and directories
    /// directly in the source directory.
    ///
    /// ```md
    /// ~/scanned-directory
    ///  |- foo.csv
    ///  |- foo-2.csv
    ///  |- bar/
    ///     (no entries listed)
    /// ```
    ///
    /// Notice how *contents* of the `~/scanned-directory/bar/`
    /// directory are not returned in the scan when using depth `0`.
    ///
    ///
    /// <br>
    ///
    /// `maximum_depth = 1` will cover the files and directories directly in the source directory
    /// plus one level of files and subdirectories deeper.
    ///
    /// ```md
    /// ~/scanned-directory
    ///  |- foo.csv
    ///  |- foo-2.csv
    ///  |- bar/
    ///     |- hello-world.txt
    ///     |- bar2/
    ///        (no entries listed)
    /// ```
    ///
    /// Notice how contents of `~/scanned-directory/bar` are listed,
    /// but contents of `~/scanned-directory/bar/bar2` are not.
    Limited {
        /// Maximum scan depth.
        maximum_depth: usize,
    },
}

/// Options that influence [`DirectoryScan`].
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct DirectoryScanOptions {
    /// The maximum directory scanning depth, see [`DirectoryScanDepthLimit`].
    pub maximum_scan_depth: DirectoryScanDepthLimit,

    /// Whether to follow symbolic links when scanning or not.
    ///
    /// ## If enabled
    /// We'll follow the symbolic links, even if they lead outside the base `directory_path`.
    /// Note that this means the files and directories included in the scan results
    /// **might not necessarily be sub-paths of the provided base `directory_path`**.
    ///
    /// If a symbolic link turns out to be broken (its destination doesn't exist),
    /// it is simply ignored (not included in the scan results).
    ///
    ///
    /// ## If disabled
    /// When we encounter a symbolic link, the results will include the file path of
    /// the symbolic link itself, *not the link's destination path*.
    ///
    /// If an encountered symbolic link points to a directory, it will
    /// be included in the results in a similar manner, but with one significant difference:
    /// as we won't resolve symbolic links, the files and subdirectories of that symlinked directory
    /// will not be scanned, even if the scan depth limit would have allowed it.
    ///
    /// If a symbolic link turns out to be broken (its destination doesn't exist),
    /// it is simply ignored (not included in the scan results).
    pub follow_symbolic_links: bool,
}

impl Default for DirectoryScanOptions {
    /// Returns the default directory scanning options, which are:
    /// - unlimited scan depth,
    /// - symlinks are no followed.
    fn default() -> Self {
        Self {
            maximum_scan_depth: DirectoryScanDepthLimit::Unlimited,
            follow_symbolic_links: false,
        }
    }
}


/// A directory scanner with configurable scan depth and symlink behaviour.
///
/// This scanner is able to recursively iterate over the directory
/// as well as optionally follow symbolic links. If, however, you're
/// looking for something with a bit more features, such as lazy iteration
/// and sorting, consider the [`walkdir`](https://docs.rs/walkdir) crate.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DirectoryScan {
    /// Path of the directory that was scanned.
    root_directory_path: PathBuf,

    /// A `bool` indicating whether this scan covered the entire directory tree.
    ///
    /// For example, this can be `false` when the user limits the scan depth
    /// to e.g. [`DirectoryScanDepth::Limited`]`{ maximum_depth: 1 }`,
    /// but the actual directory structure has e.g. three layers of subdirectories and files.
    ///
    /// If `maximum_scan_depth` is set to
    /// [`DirectoryScanDepth::Unlimited`][]
    /// in the constructor for this scan, this method will always return `true`.
    covers_entire_subtree: bool,

    /// Files that were found in the scan.
    files: Vec<PathBuf>,

    /// Directories that were found in the scan.
    /// Doesn't include the root directory (`root_directory_path`).
    directories: Vec<PathBuf>,
}

impl DirectoryScan {
    /// Perform a directory scan.
    ///
    ///
    /// # Scan depth
    /// Maximum scanning depth can be configured by setting
    /// [`options.maximum_scan_depth`].
    ///
    ///
    /// # Symbolic links
    /// This scanner can follow symbolic links, see [`options.follow_symbolic_links`]
    /// for more information.
    ///
    ///
    /// ## `directory_path` symlink behaviour
    /// Regardless of the symbolic link option described above:
    /// if `directory_path` itself is a symbolic link to a directory,
    /// the link destination will be resolved before beginning the scan.
    ///
    ///
    /// [`options.follow_symbolic_links`]: DirectoryScanOptions::follow_symbolic_links
    /// [`options.maximum_scan_depth`]: DirectoryScanOptions::maximum_scan_depth
    pub fn scan_with_options<P>(
        directory_path: P,
        options: DirectoryScanOptions,
    ) -> Result<Self, DirectoryScanError>
    where
        P: Into<PathBuf>,
    {
        let directory_path: PathBuf = directory_path.into();


        // Ensure the directory exists. We use `try_exists`
        // instead of `exists` to catch permission and other IO errors
        // as distinct from the `DirectoryScanError::NotFound` error.

        match directory_path.try_exists() {
            Ok(exists) => {
                if !exists {
                    return Err(DirectoryScanError::NotFound {
                        path: directory_path,
                    });
                }
            }
            Err(error) => {
                return Err(DirectoryScanError::UnableToReadDirectory {
                    directory_path,
                    error,
                });
            }
        }

        if !directory_path.is_dir() {
            return Err(DirectoryScanError::NotADirectory {
                path: directory_path,
            });
        }


        let mut file_list = Vec::new();
        let mut directory_list = Vec::new();
        let mut actual_tree_is_deeper_than_scan = false;


        // Create a FIFO (queue) of directories that need to be scanned.

        struct PendingDirectoryScan {
            /// The directory to scan.
            path: PathBuf,

            /// How deep the directory is. The initial `directory_path` has a depth of `0`,
            /// a direct directory descendant in it has `1`, and so on.
            depth: usize,
        }

        impl PendingDirectoryScan {
            #[inline]
            pub fn new(path: PathBuf, depth: usize) -> Self {
                Self { path, depth }
            }
        }


        let mut directory_scan_queue = Vec::new();

        directory_scan_queue.push(PendingDirectoryScan::new(directory_path.clone(), 0));


        while let Some(next_directory) = directory_scan_queue.pop() {
            let directory_entry_iterator = fs::read_dir(&next_directory.path).map_err(|error| {
                DirectoryScanError::UnableToReadDirectory {
                    directory_path: next_directory.path.clone(),
                    error,
                }
            })?;


            for directory_entry in directory_entry_iterator {
                let directory_entry = directory_entry.map_err(|error| {
                    DirectoryScanError::UnableToReadDirectoryItem {
                        directory_path: next_directory.path.clone(),
                        error,
                    }
                })?;

                let item_file_type = directory_entry.file_type().map_err(|error| {
                    DirectoryScanError::UnableToReadDirectoryItem {
                        directory_path: next_directory.path.clone(),
                        error,
                    }
                })?;


                if item_file_type.is_file() {
                    // Files are simply added to the resulting scan and no further action is needed.

                    file_list.push(directory_entry.path());
                } else if item_file_type.is_dir() {
                    // If the scan depth limit allows it, sub-directories will need to be scanned
                    // for additional content. We can do that by adding them to the `directory_scan_queue`.

                    match options.maximum_scan_depth {
                        DirectoryScanDepthLimit::Limited { maximum_depth } => {
                            if next_directory.depth < maximum_depth {
                                directory_scan_queue.push(PendingDirectoryScan::new(
                                    directory_entry.path(),
                                    next_directory.depth + 1,
                                ));
                            } else {
                                // This marks down that we weren't able to scan the
                                // full directory tree due to scan depth limits.
                                actual_tree_is_deeper_than_scan = true;
                            }
                        }
                        DirectoryScanDepthLimit::Unlimited => {
                            directory_scan_queue.push(PendingDirectoryScan::new(
                                directory_entry.path(),
                                next_directory.depth + 1,
                            ));
                        }
                    }

                    directory_list.push(directory_entry.path());
                } else if item_file_type.is_symlink() {
                    // If `follow_symbolic_links` is set to `true`, we follow the link to its destination
                    // and append that *destination* path to the file or directory list,
                    // incrementing the depth as we would for normal directories.

                    // If it is set to `false`, we find whether it points to a file or a directory,
                    // then just include the original non-resolved path in the results.

                    let resolved_symlink_path =
                        fs::read_link(directory_entry.path()).map_err(|error| {
                            DirectoryScanError::UnableToReadDirectoryItem {
                                directory_path: next_directory.path.clone(),
                                error,
                            }
                        })?;

                    match resolved_symlink_path.try_exists() {
                        Ok(exists) => {
                            if !exists {
                                continue;
                            }
                        }
                        Err(error) => {
                            return Err(DirectoryScanError::UnableToReadDirectoryItem {
                                directory_path: next_directory.path.clone(),
                                error,
                            });
                        }
                    }

                    let resolved_symlink_metadata =
                        fs::metadata(&resolved_symlink_path).map_err(|error| {
                            DirectoryScanError::UnableToReadDirectoryItem {
                                directory_path: next_directory.path.clone(),
                                error,
                            }
                        })?;


                    if options.follow_symbolic_links {
                        if resolved_symlink_metadata.is_file() {
                            file_list.push(resolved_symlink_path);
                        } else if resolved_symlink_metadata.is_dir() {
                            // Depth settings are respected if the destination is a directory.
                            match options.maximum_scan_depth {
                                DirectoryScanDepthLimit::Limited { maximum_depth } => {
                                    if next_directory.depth < maximum_depth {
                                        directory_scan_queue.push(PendingDirectoryScan::new(
                                            resolved_symlink_path.clone(),
                                            next_directory.depth + 1,
                                        ));
                                    } else {
                                        actual_tree_is_deeper_than_scan = true;
                                    }
                                }
                                DirectoryScanDepthLimit::Unlimited => {
                                    directory_scan_queue.push(PendingDirectoryScan::new(
                                        resolved_symlink_path.clone(),
                                        next_directory.depth + 1,
                                    ));
                                }
                            }

                            directory_list.push(resolved_symlink_path);
                        }
                    } else if resolved_symlink_metadata.is_file() {
                        file_list.push(directory_entry.path());
                    } else if resolved_symlink_metadata.is_dir() {
                        directory_list.push(directory_entry.path());
                    }
                }
            }
        }

        Ok(Self {
            root_directory_path: directory_path,
            covers_entire_subtree: !actual_tree_is_deeper_than_scan,
            files: file_list,
            directories: directory_list,
        })
    }


    /// Returns a slice of all scanned files (paths are absolute).
    pub fn files(&self) -> &[PathBuf] {
        &self.files
    }

    /// Consumes `self` and returns a [`Vec`] containing all scanned files (paths are absolute).
    ///
    /// If you are also interested in directories, look at [`Self::files`] + [`Self::directories`]
    /// or [`Self::into_scanned_files_and_directories`] instead.
    pub fn into_files(self) -> Vec<PathBuf> {
        self.files
    }

    /// Returns a slice of all scanned directories (paths are absolute).
    pub fn directories(&self) -> &[PathBuf] {
        &self.directories
    }

    /// Consumes `self` and returns a [`Vec`] containing all scanned directories (paths are absolute).
    ///
    /// If you are also interested in files, look at [`Self::files`] + [`Self::directories`]
    /// or [`Self::into_scanned_files_and_directories`] instead.
    pub fn into_directories(self) -> Vec<PathBuf> {
        self.files
    }

    /// Consumes `self` and returns a small struct containing two fields: `files` and `directories`.
    ///
    /// Use this method when you wish to consume the scanner and are interested in both scanned files and directories.
    /// Alternatives that don't consume the scanner are [`Self::files`] and [`Self::directories`].
    pub fn into_scanned_files_and_directories(self) -> ScannedFilesAndDirectories {
        ScannedFilesAndDirectories {
            files: self.files,
            directories: self.directories,
        }
    }

    /// Returns the total size in bytes of all scanned files and directories.
    ///
    ///
    /// ## Potential file system race conditions
    /// *Careful:* this method iterates over the scanned files and directories and queries their size at call time.
    /// This means the caller will get an up-to-date directory size if they happen to call the method multiple times,
    /// potentially after modifying the one of the scanned files.
    ///
    /// However, it also means that it this method *can* return, among other things,
    /// an `Err(`[`DirectorySizeScanError::ScanEntryNoLongerExists`]`)`
    /// if any file or directory that was scanned at initialization has been removed since.
    /// The same applies for files changing their read permissions, with that usually resulting in
    /// `Err(`[`DirectorySizeScanError::UnableToAccessFile`]`)`.
    ///
    /// This is very much the same thing as the relatively well-known file system race condition
    /// inherent in `if file_exists(): then open_file()`
    /// ([time-of-check, time-of-use](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)),
    /// just on a bigger scale.
    ///
    /// The impact of this is---in most cases---relatively low, but it is worth noting.
    ///
    ///
    /// ## Impacts of scan depth limits
    /// *Careful:* if you initialized [`DirectoryScan`] with a scan depth limit
    /// that is smaller than the actual depth of the directory tree you're scanning,
    /// the value returned by this function will be smaller than
    /// the "real" contents of that directory.
    ///
    /// It is up to the user to decide whether that is desired behavior or not.
    /// To find out whether the returned number of bytes will not reflect the full depth
    /// of the directory structure, see [`Self::covers_entire_directory_tree`].
    pub fn total_size_in_bytes(&self) -> Result<u64, DirectorySizeScanError> {
        let mut total_bytes = 0;

        for file_path in &self.files {
            let file_size_bytes = file_size_in_bytes(file_path).map_err(|error| match error {
                FileSizeError::NotFound { path } => {
                    DirectorySizeScanError::ScanEntryNoLongerExists { path }
                }
                FileSizeError::NotAFile { path } => {
                    DirectorySizeScanError::ScanEntryNoLongerExists { path }
                }
                FileSizeError::UnableToAccessFile { file_path, error } => {
                    DirectorySizeScanError::UnableToAccessFile { file_path, error }
                }
                FileSizeError::OtherIoError { error } => {
                    DirectorySizeScanError::OtherIoError { error }
                }
            })?;

            total_bytes += file_size_bytes;
        }

        for directory_path in &self.directories {
            let directory_size_bytes = fs::metadata(directory_path)
                .map_err(|_| DirectorySizeScanError::ScanEntryNoLongerExists {
                    path: directory_path.to_path_buf(),
                })?
                .len();

            total_bytes += directory_size_bytes;
        }

        Ok(total_bytes)
    }

    /// Returns a `bool` indicating whether this scan covered the entire directory tree.
    ///
    /// For example, this can be `false` when the user limits the scan depth
    /// to e.g. [`DirectoryScanDepthLimit::Limited`]`{ maximum_depth: 1 }`,
    /// but the actual directory structure has e.g. three layers of subdirectories and files.
    ///
    /// If `maximum_scan_depth` is set to
    /// [`DirectoryScanDepthLimit::Unlimited`][]
    /// in the constructor for this scan, this method will always return `true`.
    pub fn covers_entire_directory_tree(&self) -> bool {
        self.covers_entire_subtree
    }
}



/// Returns `Ok(true)` if the given directory is completely empty, `Ok(false)` is it is not,
/// `Err(_)` if the read fails.
///
/// Does not check whether the path exists, meaning the error return type is
/// a very uninformative [`std::io::Error`].
pub(crate) fn is_directory_empty_unchecked(directory_path: &Path) -> std::io::Result<bool> {
    let mut directory_read = fs::read_dir(directory_path)?;
    Ok(directory_read.next().is_none())
}


/// Returns a `bool` indicating whether the given directory is completely empty.
///
/// Permission and other errors will *not* be coerced into `false`, but will raise a distinct error,
/// see [`IsDirectoryEmptyError`].
pub fn is_directory_empty<P>(directory_path: P) -> Result<bool, IsDirectoryEmptyError>
where
    P: AsRef<Path>,
{
    let directory_path: &Path = directory_path.as_ref();
    let directory_metadata =
        fs::metadata(directory_path).map_err(|_| IsDirectoryEmptyError::NotFound {
            directory_path: directory_path.to_path_buf(),
        })?;

    if !directory_metadata.is_dir() {
        return Err(IsDirectoryEmptyError::NotADirectory {
            path: directory_path.to_path_buf(),
        });
    }


    let mut directory_read = fs::read_dir(directory_path).map_err(|error| {
        IsDirectoryEmptyError::UnableToReadDirectory {
            directory_path: directory_path.to_path_buf(),
            error,
        }
    })?;

    Ok(directory_read.next().is_some())
}