sprint_dir/
walker.rs

1use crate::getdent::DirentBuf;
2
3use core::mem;
4use std::io;
5use std::ffi::{CStr, CString, OsStr, OsString};
6use std::path::{Component, Path, PathBuf};
7use std::sync::Arc;
8use std::os::unix::fs::FileTypeExt;
9use std::os::unix::ffi::OsStrExt;
10use once_cell::sync::OnceCell;
11
12use super::UnixFileType as FileTypeInner;
13use super::getdent::{DirentErr, Entry, More};
14
15/// Configure walking over all files in a directory tree.
16pub struct WalkDir {
17    /// The user supplied configuration.
18    config: Configuration,
19    path: PathBuf,
20}
21
22/// The main iterator.
23pub struct IntoIter {
24    /// The user supplied configuration.
25    config: Configuration,
26    /// The current 'finger' within the tree of directories.
27    stack: Vec<WorkItem>,
28    /// The number of file descriptors we are still allowed to open.
29    open_budget: usize,
30    /// Statistics about the system calls etc.
31    stats: Stats,
32}
33
34/// Describes a file that was found.
35///
36/// All parents of this entry have already been yielded before.
37#[derive(Debug, Clone)]
38pub struct DirEntry {
39    /// The file type reported by the call to `getdent`.
40    file_type: FileType,
41    /// The depth at which this entry was found.
42    depth: usize,
43    /// The file name of this entry.
44    file_name: EntryPath,
45    /// The normalized full path of the entry.
46    full_path: OnceCell<PathBuf>,
47}
48
49#[derive(Debug, Clone)]
50enum EntryPath {
51    /// We have already allocate the whole path in its own buffer.
52    Full(PathBuf),
53    /// The path is given as the filename alone.
54    Name {
55        name: OsString,
56        /// The parent directory of the entry.
57        parent: Arc<Node>,
58    },
59}
60
61#[derive(Debug)]
62pub struct Error {
63    _private: (),
64}
65
66/// The type of a file entry.
67///
68/// Accessing this will not cause any system calls and is very cheap. However, the type may not
69/// always be known. In these cases you need to manually query the file meta data.
70#[derive(Clone, Copy, Debug, PartialEq)]
71pub struct FileType {
72    inner: Option<FileTypeInner>,
73}
74
75#[derive(Copy, Clone)]
76struct Configuration {
77    min_depth: usize,
78    max_depth: usize,
79    max_open: usize,
80    follow_links: bool,
81    contents_first: bool,
82    same_file_system: bool,
83}
84
85#[derive(Debug, Default)]
86struct Stats {
87    nr_close: usize,
88    nr_getdent: usize,
89    nr_open: usize,
90    nr_openat: usize,
91    nr_stat: usize,
92}
93
94/// Completed directory nodes that are parents of still open nodes or active entries.
95#[derive(Debug)]
96struct Node {
97    /// The depth at which this node occurs.
98    depth: usize,
99    /// The path of this node.
100    path: EntryPath,
101}
102
103enum WorkItem {
104    /// A directory which is still open.
105    Open(Open),
106    /// A directory that was closed.
107    Closed(Closed),
108}
109
110/// Directories with a file descriptor.
111struct Open {
112    /// The open file descriptor.
113    fd: DirFd,
114    /// The buffer for reading entries of this directory.
115    buffer: DirentBuf,
116    /// The directory depth of this descriptor.
117    depth: usize,
118    /// The parent representation of this node.
119    /// Not to be confused with the potentially still open parent directory.
120    as_parent: Arc<Node>,
121}
122
123/// Describes a directory that had to be closed, and its entries read to memory.
124struct Closed {
125    /// The directory depth of the directory.
126    depth: usize,
127    /// The children.
128    children: Vec<Backlog>,
129    /// The parent representation of this node.
130    /// The parent directory is also surely closed but children might not be.
131    as_parent: Option<Arc<Node>>,
132}
133
134struct DirFd(libc::c_int);
135
136/// Describes an item of a closed directory.
137///
138/// The directories represented by this type are no-one's parent yet.
139///
140/// Note that by using `openat` we can avoid having to construct the complete path as a single
141/// `PathBuf` but this requires keeping the parent `fd` open.
142///
143/// TODO: what if we use a dequeue to actually allocate these consecutively in memory?
144struct Backlog {
145    /// The complete path up to here.
146    /// Since the file descriptor was closed we can't use `openat` but need to reconstruct the full
147    /// path. We might want to track statistics on this since it really is annoying.
148    file_path: PathBuf,
149    file_type: Option<FileTypeInner>,
150}
151
152// Public interfaces.
153
154impl WalkDir {
155    pub fn new(path: impl AsRef<Path>) -> Self {
156        WalkDir {
157            config: Configuration::default(),
158            path: path.as_ref().to_owned(),
159        }
160    }
161
162    pub fn min_depth(mut self, n: usize) -> Self {
163        self.config.min_depth = n;
164        self
165    }
166
167    pub fn max_depth(mut self, n: usize) -> Self {
168        self.config.max_depth = n;
169        self
170    }
171
172    pub fn max_open(mut self, n: usize) -> Self {
173        self.config.max_open = n;
174        self
175    }
176
177    pub fn follow_links(mut self, yes: bool) -> Self {
178        self.config.follow_links = yes;
179        self
180    }
181
182    pub fn sort_by<F>(self, cmp: F) -> Self where
183        F: FnMut(&DirEntry, &DirEntry) -> core::cmp::Ordering + Send + Sync + 'static,
184    {
185        todo!()
186    }
187
188    pub fn contents_first(mut self, yes: bool) -> Self {
189        self.config.contents_first = yes;
190        self
191    }
192
193    pub fn same_file_system(mut self, yes: bool) -> Self {
194        self.config.same_file_system = yes;
195        self
196    }
197
198    pub fn build(mut self) -> IntoIter {
199        self.config.assert_consistent();
200        let first_item = self.initial_closed();
201
202        IntoIter {
203            config: self.config,
204            stack: vec![WorkItem::Closed(first_item)],
205            open_budget: 128,
206            stats: Stats::default(),
207        }
208    }
209
210    fn initial_closed(&mut self) -> Closed {
211        let backlog = Backlog {
212            file_path: core::mem::take(&mut self.path),
213            // We do not _know_ this file type yet, recover and check on iteration.
214            file_type: None,
215        };
216
217        Closed {
218            depth: 0,
219            children: vec![backlog],
220            as_parent: None,
221        }
222    }
223}
224
225impl Configuration {
226    fn assert_consistent(&self) {
227        assert!(self.min_depth <= self.max_depth);
228        assert!(self.max_open > 0);
229        assert!(!self.follow_links, "Unsupported");
230        assert!(!self.same_file_system , "Unsupported");
231    }
232}
233
234impl Default for Configuration {
235    fn default() -> Self {
236        Configuration {
237            min_depth: 0,
238            max_depth: usize::MAX,
239            max_open: 10,
240            follow_links: false,
241            contents_first: false,
242            same_file_system: false,
243        }
244    }
245}
246
247impl IntoIter {
248    pub fn skip_current_dir(&mut self) {
249        todo!()
250    }
251
252    pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> where
253        P: FnMut(&DirEntry) -> bool,
254    {
255        todo!()
256    }
257
258    pub fn stats(&self) -> &dyn core::fmt::Debug {
259        &self.stats
260    }
261}
262
263pub struct FilterEntry<I, P> {
264    unused: core::marker::PhantomData<(I, P)>,
265}
266
267impl FileType {
268    pub fn is_dir(&self) -> bool {
269        self.inner == Some(FileTypeInner::Directory)
270    }
271
272    pub fn is_file(&self) -> bool {
273        self.inner == Some(FileTypeInner::File)
274    }
275
276    pub fn is_symlink(&self) -> bool {
277        self.inner == Some(FileTypeInner::SymbolicLink)
278    }
279
280    fn set(&mut self, inner: FileTypeInner) {
281        self.inner = Some(inner);
282    }
283}
284
285impl DirEntry {
286    // TODO: enable `openat`?
287
288    /// Inspect the path of this entry.
289    pub fn path(&self) -> &Path {
290        self.full_path.get_or_init(|| {
291            self.file_name.make_path()
292        })
293    }
294
295    pub fn path_is_symlink(&self) -> bool {
296        self.file_type.is_symlink()
297    }
298
299    /// Read the full meta data.
300    pub fn metadata(&self) -> io::Result<std::fs::Metadata> {
301        std::fs::metadata(self.path())
302    }
303
304    /// Convert the entry into a path
305    ///
306    /// Potentially more efficient than `as_path().to_owned()`.
307    pub fn into_path(self) -> PathBuf {
308        let file_name = self.file_name;
309        self.full_path.into_inner().unwrap_or_else(|| {
310            file_name.make_path()
311        })
312    }
313
314    pub fn file_type(&self) -> FileType {
315        self.file_type
316    }
317
318    /// Return the filename of this entry.
319    pub fn file_name(&self) -> &OsStr {
320        match &self.file_name {
321            EntryPath::Full(buf) => buf.file_name().unwrap(),
322            EntryPath::Name { name, .. } => name,
323        }
324    }
325
326    /// The depth at which this entry is in the directory tree.
327    ///
328    /// When iterating items in depth-first order and following symbolic links then this is not
329    /// necessarily the smallest depth at which it might appear.
330    pub fn depth(&self) -> usize {
331        self.depth
332    }
333}
334
335impl Open {
336    fn openat_os(&self, path: &OsStr, stats: &mut Stats) -> io::Result<Self> {
337        let bytes = path.as_bytes().to_owned();
338        let cstr = CString::new(bytes).unwrap();
339        self.openat(&cstr, stats)
340    }
341
342    fn openat(&self, path: &CStr, stats: &mut Stats) -> io::Result<Self> {
343        stats.nr_openat += 1;
344        let fd = self.fd.openat(path)?;
345        let filename = OsStr::from_bytes(path.to_bytes()).to_owned();
346
347        Ok(Open {
348            fd,
349            buffer: DirentBuf::with_size(1 << 14),
350            depth: self.depth + 1,
351            as_parent: Arc::new(Node {
352                path: EntryPath::Name {
353                    name: filename,
354                    parent: self.as_parent.clone(),
355                },
356                depth: self.depth + 1,
357            }),
358        })
359    }
360
361    /// Get the next item from this directory.
362    fn pop(&mut self) -> Option<Entry<'_>> {
363        self.buffer.drain().next().map(Self::okay)
364    }
365
366    fn ready_entry(&mut self) -> Option<DirEntry> {
367        let depth = self.depth;
368        let parent = self.as_parent.clone();
369        let entry = self.pop()?;
370
371        let entry = match Self::sub_entry(entry) {
372            None => return self.ready_entry(),
373            Some(entry) => entry,
374        };
375
376        Some(DirEntry {
377            file_name: EntryPath::Name {
378                name: entry.file_name().to_owned(),
379                parent,
380            },
381            depth,
382            file_type: FileType {
383                inner: entry.file_type(),
384            },
385            full_path: OnceCell::new(),
386        })
387    }
388
389    fn fill_buffer(&mut self, stats: &mut Stats) -> io::Result<More> {
390        stats.nr_getdent += 1;
391        self.buffer.fill_buf(self.fd.0)
392    }
393
394    /// Forcibly close this directory entry.
395    /// Returns None if its already finished and Some with the remaining backlog items otherwise.
396    fn close(mut self, stats: &mut Stats) -> io::Result<Option<Closed>> {
397        let mut backlog = vec![];
398        let base = self.as_parent.make_path();
399
400        loop {
401            let entries = self.buffer
402                .drain()
403                .map(Self::okay)
404                .filter_map(Self::sub_entry)
405                .map(|entry| Self::backlog(&base, entry));
406            backlog.extend(entries);
407            stats.nr_getdent += 1;
408            match self.buffer.fill_buf(self.fd.0)? {
409                More::Blocked => unreachable!("Just drained buffer is blocked"),
410                More::More => {},
411                More::Done => break,
412            }
413        }
414
415        if backlog.is_empty() {
416            stats.nr_close += 1;
417            self.fd.close()?;
418            Ok(None)
419        } else {
420            let closed = Closed::from_backlog(&self, backlog);
421            stats.nr_close += 1;
422            self.fd.close()?;
423            Ok(Some(closed))
424        }
425    }
426
427    /// Filter an entry that we got from the internal buffer.
428    /// Handles kernel errors and setup faults which mustn't occur in regular operation.
429    fn okay(entry: Result<Entry<'_>, DirentErr>) -> Entry<'_> {
430        match entry {
431            Ok(entry) => entry,
432            Err(DirentErr::TooShort) => unreachable!("Inconsistent buffer state"),
433            Err(DirentErr::InvalidLength) => unreachable!("You must have hit a kernel bug!"),
434        }
435    }
436
437    fn sub_entry(entry: Entry<'_>) -> Option<Entry<'_>> {
438        // Never recurse into current or parent directory.
439        match Path::new(entry.file_name()).components().next() {
440            Some(Component::CurDir) | Some(Component::ParentDir) => None,
441            _ => Some(entry),
442        }
443
444    }
445
446    fn backlog(base: &Path, entry: Entry<'_>) -> Backlog {
447        Backlog {
448            file_path: base.join(entry.file_name()),
449            file_type: entry.file_type(),
450        }
451    }
452}
453
454impl DirFd {
455    fn open(path: &Path) -> io::Result<Self> {
456        let raw_name = path.as_os_str().as_bytes().to_owned();
457        let unix_name = CString::new(raw_name).expect("No interior NULL byte in Path");
458
459        let result = unsafe {
460            libc::open(unix_name.as_c_str().as_ptr(), libc::O_RDONLY | libc::O_DIRECTORY)
461        };
462
463        if result == -1 {
464            return Err(io::Error::last_os_error());
465        }
466
467        Ok(DirFd(result))
468    }
469
470    fn openat(&self, path: &CStr) -> io::Result<Self> {
471        let result = unsafe {
472            libc::openat(self.0, path.as_ptr(), libc::O_RDONLY | libc::O_DIRECTORY)
473        };
474
475        if result == -1 {
476            return Err(io::Error::last_os_error());
477        }
478
479        Ok(DirFd(result))
480    }
481
482    fn close(self) -> io::Result<()> {
483        match unsafe { libc::close(self.0) } {
484            0 => Ok(()),
485            _ => Err(io::Error::last_os_error()),
486        }
487    }
488}
489
490impl Closed {
491    fn from_backlog(open: &Open, children: Vec<Backlog>) -> Self {
492        Closed {
493            depth: open.depth + 1,
494            children,
495            as_parent: None,
496        }
497    }
498
499    fn open(&self, backlog: &DirEntry, stats: &mut Stats) -> io::Result<Open> {
500        let path = backlog.file_name.make_path();
501        stats.nr_open += 1;
502        let fd = DirFd::open(&path)?;
503
504        Ok(Open {
505            fd,
506            buffer: DirentBuf::with_size(1 << 14),
507            depth: self.depth + 1,
508            as_parent: Arc::new(Node {
509                depth: self.depth + 1,
510                path: EntryPath::Full(path),
511            })
512        })
513    }
514
515    fn ready_entry(&mut self) -> Option<DirEntry> {
516        let backlog = self.children.pop()?;
517        Some(DirEntry {
518            file_name: EntryPath::Full(backlog.file_path),
519            file_type: FileType {
520                inner: backlog.file_type
521            },
522            depth: self.depth,
523            full_path: OnceCell::new(),
524        })
525    }
526}
527
528impl EntryPath {
529    fn make_path(&self) -> PathBuf {
530        match self {
531            EntryPath::Full(buf) => buf.clone(),
532            EntryPath::Name { name, parent } => {
533                let mut buf = parent.make_path();
534                buf.push(&name);
535                buf
536            }
537        }
538    }
539}
540
541impl Node {
542    /// Allocate a path buffer for the path described.
543    fn make_path(&self) -> PathBuf {
544        self.path.make_path()
545    }
546}
547
548impl IntoIter {
549    /// See if we should descend to the newly found entry.
550    fn iter_entry(&mut self, entry: &mut DirEntry) -> Result<(), Error> {
551        let is_dir = match entry.file_type.inner {
552            Some(FileTypeInner::Directory) => true,
553            Some(_) => false,
554            None => {
555                //can we make fstatat work?
556                self.stats.nr_stat += 1;
557                let meta = std::fs::metadata(entry.file_name.make_path())
558                    .map_err(Error::from_io)?
559                    .file_type();
560                if meta.is_dir() {
561                    entry.file_type.set(FileTypeInner::Directory);
562                    true
563                } else if meta.is_file() {
564                    entry.file_type.set(FileTypeInner::File);
565                    false
566                } else if meta.is_symlink() {
567                    entry.file_type.set(FileTypeInner::SymbolicLink);
568                    false
569                } else if meta.is_block_device() {
570                    entry.file_type.set(FileTypeInner::BlockDevice);
571                    false
572                } else if meta.is_char_device() {
573                    entry.file_type.set(FileTypeInner::CharDevice);
574                    false
575                } else if meta.is_fifo() {
576                    entry.file_type.set(FileTypeInner::File);
577                    false
578                } else if meta.is_socket() {
579                    entry.file_type.set(FileTypeInner::UnixSocket);
580                    false
581                } else {
582                    false
583                }
584            }
585        };
586
587        if is_dir {
588            // TODO: filter? min_depth? max_depth?
589
590            let can_open = self.open_budget > 0;
591            let mut next: WorkItem = match self.stack.last().unwrap() {
592                WorkItem::Open(open) if can_open => {
593                    open.openat_os(entry.file_name(), &mut self.stats)
594                        .map_err(Error::from_io)
595                        .map(WorkItem::Open)?
596                }
597                WorkItem::Open(open) => {
598                    if self.config.contents_first {
599                        // TODO: close and open the actual next.
600                    } else {
601                        // TODO: add the sub directory as a closed one.
602                    }
603
604                    todo!()
605                }
606                WorkItem::Closed(closed) => {
607                    assert!(can_open, "No more budget but only closed work items");
608                    closed.open(entry, &mut self.stats)
609                        .map_err(Error::from_io)
610                        .map(WorkItem::Open)?
611                }
612            };
613
614            if !self.config.contents_first {
615                mem::swap(&mut next, self.stack.last_mut().unwrap());
616            }
617
618            self.stack.push(next);
619        }
620
621        Ok({})
622    }
623}
624
625impl IntoIterator for WalkDir {
626    type IntoIter = IntoIter;
627    type Item = Result<DirEntry, Error>;
628    fn into_iter(self) -> IntoIter {
629        WalkDir::build(self)
630    }
631}
632
633impl Iterator for IntoIter {
634    type Item = Result<DirEntry, Error>;
635    fn next(&mut self) -> Option<Self::Item> {
636        let mut current = self.stack.last_mut()?;
637
638        // First try to get an item that is ripe for reaping.
639        let mut found = match &mut current {
640            WorkItem::Open(open) => match open.ready_entry() {
641                Some(entry) => entry,
642                // No more items, try refilling.
643                None => {
644                    match open.fill_buffer(&mut self.stats) {
645                        Err(err) => todo!(),
646                        Ok(More::More) => return self.next(),
647                        Ok(More::Blocked) => unreachable!("Empty buffer blocked"),
648                        Ok(More::Done) => {
649                            let _ = self.stack.pop();
650                            return self.next();
651                        }
652                    }
653                },
654            }
655            WorkItem::Closed(closed) => match closed.ready_entry() {
656                Some(entry) => entry,
657                None => {
658                    // Nothing to do, try the next entry.
659                    let _ = self.stack.pop();
660                    return self.next();
661                }
662            }
663        };
664
665        Some(self.iter_entry(&mut found).map(|_| found))
666    }
667}
668
669// Private implementation items.
670
671impl Open {
672}
673
674impl Error {
675    fn new() -> Self {
676        Error { _private: () }
677    }
678
679    pub fn path(&self) -> Option<&Path> {
680        todo!()
681    }
682
683    pub fn loop_ancestor(&self) -> Option<&Path> {
684        todo!()
685    }
686
687    pub fn depth(&self) -> usize {
688        todo!()
689    }
690
691    pub fn io_error(&self) -> Option<&std::io::Error> {
692        todo!()
693    }
694
695    pub fn into_io_error(&self) -> Option<std::io::Error> {
696        todo!()
697    }
698
699    fn from_io(_: io::Error) -> Self {
700        Error::new()
701    }
702}
703
704impl<P> Iterator for FilterEntry<IntoIter, P> {
705    type Item = Result<DirEntry, Error>;
706    fn next(&mut self) -> Option<Self::Item> {
707        unimplemented!()
708    }
709}