git_features/
fs.rs

1//! Filesystem utilities
2//!
3//! These are will be parallel if the `parallel` feature is enabled, at the expense of compiling additional dependencies
4//! along with runtime costs for maintaining a global [`rayon`](https://docs.rs/rayon) thread pool.
5//!
6//! For information on how to use the [`WalkDir`] type, have a look at
7//! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled
8//! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise
9
10#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
11mod shared {
12    /// The desired level of parallelism.
13    pub enum Parallelism {
14        /// Do not parallelize at all by making a serial traversal on the current thread.
15        Serial,
16        /// Create a new thread pool for each traversal with up to 16 threads or the amount of logical cores of the machine.
17        ThreadPoolPerTraversal {
18            /// The base name of the threads we create as part of the thread-pool.
19            thread_name: &'static str,
20        },
21    }
22}
23
24///
25#[cfg(feature = "fs-walkdir-parallel")]
26pub mod walkdir {
27    use std::path::Path;
28
29    pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir};
30
31    pub use super::shared::Parallelism;
32
33    /// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`.
34    pub type DirEntry = DirEntryGeneric<((), ())>;
35
36    impl From<Parallelism> for jwalk::Parallelism {
37        fn from(v: Parallelism) -> Self {
38            match v {
39                Parallelism::Serial => jwalk::Parallelism::Serial,
40                Parallelism::ThreadPoolPerTraversal { thread_name } => {
41                    let pool = jwalk::rayon::ThreadPoolBuilder::new()
42                        .num_threads(num_cpus::get().min(16))
43                        .stack_size(128 * 1024)
44                        .thread_name(move |idx| format!("{thread_name} {idx}"))
45                        .build()
46                        .expect("we only set options that can't cause a build failure");
47                    jwalk::Parallelism::RayonExistingPool {
48                        pool: pool.into(),
49                        busy_timeout: None,
50                    }
51                }
52            }
53        }
54    }
55
56    /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
57    pub fn walkdir_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
58        WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into())
59    }
60
61    /// Instantiate a new directory iterator which will not skip hidden files and is sorted
62    pub fn walkdir_sorted_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
63        WalkDir::new(root)
64            .skip_hidden(false)
65            .sort(true)
66            .parallelism(parallelism.into())
67    }
68
69    /// The Iterator yielding directory items
70    pub type DirEntryIter = DirEntryIterGeneric<((), ())>;
71}
72
73#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
74///
75pub mod walkdir {
76    use std::path::Path;
77
78    pub use walkdir::{DirEntry, Error, WalkDir};
79
80    pub use super::shared::Parallelism;
81
82    /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
83    pub fn walkdir_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
84        WalkDir::new(root)
85    }
86
87    /// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`.
88    pub fn walkdir_sorted_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
89        WalkDir::new(root).sort_by_file_name()
90    }
91
92    /// The Iterator yielding directory items
93    pub type DirEntryIter = walkdir::IntoIter;
94}
95
96#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
97pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir};
98
99/// Prepare open options which won't follow symlinks when the file is opened.
100///
101/// Note: only effective on unix currently.
102pub fn open_options_no_follow() -> std::fs::OpenOptions {
103    #[cfg_attr(not(unix), allow(unused_mut))]
104    let mut options = std::fs::OpenOptions::new();
105    #[cfg(unix)]
106    {
107        /// Make sure that it's impossible to follow through to the target of symlinks.
108        /// Note that this will still follow symlinks in the path, which is what we assume
109        /// has been checked separately.
110        use std::os::unix::fs::OpenOptionsExt;
111        options.custom_flags(libc::O_NOFOLLOW);
112    }
113    options
114}
115
116mod snapshot {
117    use std::ops::Deref;
118
119    use crate::threading::{get_mut, get_ref, MutableOnDemand, OwnShared};
120
121    /// A structure holding enough information to reload a value if its on-disk representation changes as determined by its modified time.
122    #[derive(Debug)]
123    pub struct Snapshot<T: std::fmt::Debug> {
124        value: T,
125        modified: std::time::SystemTime,
126    }
127
128    impl<T: Clone + std::fmt::Debug> Clone for Snapshot<T> {
129        fn clone(&self) -> Self {
130            Self {
131                value: self.value.clone(),
132                modified: self.modified,
133            }
134        }
135    }
136
137    /// A snapshot of a resource which is up-to-date in the moment it is retrieved.
138    pub type SharedSnapshot<T> = OwnShared<Snapshot<T>>;
139
140    /// Use this type for fields in structs that are to store the [`Snapshot`], typically behind an [`OwnShared`].
141    ///
142    /// Note that the resource itself is behind another [`OwnShared`] to allow it to be used without holding any kind of lock, hence
143    /// without blocking updates while it is used.
144    #[derive(Debug, Default)]
145    pub struct MutableSnapshot<T: std::fmt::Debug>(pub MutableOnDemand<Option<SharedSnapshot<T>>>);
146
147    impl<T: std::fmt::Debug> Deref for Snapshot<T> {
148        type Target = T;
149
150        fn deref(&self) -> &Self::Target {
151            &self.value
152        }
153    }
154
155    impl<T: std::fmt::Debug> Deref for MutableSnapshot<T> {
156        type Target = MutableOnDemand<Option<SharedSnapshot<T>>>;
157
158        fn deref(&self) -> &Self::Target {
159            &self.0
160        }
161    }
162
163    impl<T: std::fmt::Debug> MutableSnapshot<T> {
164        /// Create a new instance of this type.
165        ///
166        /// Useful in case `Default::default()` isn't working for some reason.
167        pub fn new() -> Self {
168            MutableSnapshot(MutableOnDemand::new(None))
169        }
170
171        /// Refresh `state` forcefully by re-`open`ing the resource. Note that `open()` returns `None` if the resource isn't
172        /// present on disk, and that it's critical that the modified time is obtained _before_ opening the resource.
173        pub fn force_refresh<E>(
174            &self,
175            open: impl FnOnce() -> Result<Option<(std::time::SystemTime, T)>, E>,
176        ) -> Result<(), E> {
177            let mut state = get_mut(&self.0);
178            *state = open()?.map(|(modified, value)| OwnShared::new(Snapshot { value, modified }));
179            Ok(())
180        }
181
182        /// Assure that the resource in `state` is up-to-date by comparing the `current_modification_time` with the one we know in `state`
183        /// and by acting accordingly.
184        /// Returns the potentially updated/reloaded resource if it is still present on disk, which then represents a snapshot that is up-to-date
185        /// in that very moment, or `None` if the underlying file doesn't exist.
186        ///
187        /// Note that even though this is racy, each time a request is made there is a chance to see the actual state.
188        pub fn recent_snapshot<E>(
189            &self,
190            mut current_modification_time: impl FnMut() -> Option<std::time::SystemTime>,
191            open: impl FnOnce() -> Result<Option<T>, E>,
192        ) -> Result<Option<SharedSnapshot<T>>, E> {
193            let state = get_ref(self);
194            let recent_modification = current_modification_time();
195            let buffer = match (&*state, recent_modification) {
196                (None, None) => (*state).clone(),
197                (Some(_), None) => {
198                    drop(state);
199                    let mut state = get_mut(self);
200                    *state = None;
201                    (*state).clone()
202                }
203                (Some(snapshot), Some(modified_time)) => {
204                    if snapshot.modified < modified_time {
205                        drop(state);
206                        let mut state = get_mut(self);
207
208                        if let (Some(_snapshot), Some(modified_time)) = (&*state, current_modification_time()) {
209                            *state = open()?.map(|value| {
210                                OwnShared::new(Snapshot {
211                                    value,
212                                    modified: modified_time,
213                                })
214                            });
215                        }
216
217                        (*state).clone()
218                    } else {
219                        // Note that this relies on sub-section precision or else is a race when the packed file was just changed.
220                        // It's nothing we can know though, so… up to the caller unfortunately.
221                        Some(snapshot.clone())
222                    }
223                }
224                (None, Some(_modified_time)) => {
225                    drop(state);
226                    let mut state = get_mut(self);
227                    // Still in the same situation? If so, load the buffer. This compensates for the trampling herd
228                    // during lazy-loading at the expense of another mtime check.
229                    if let (None, Some(modified_time)) = (&*state, current_modification_time()) {
230                        *state = open()?.map(|value| {
231                            OwnShared::new(Snapshot {
232                                value,
233                                modified: modified_time,
234                            })
235                        });
236                    }
237                    (*state).clone()
238                }
239            };
240            Ok(buffer)
241        }
242    }
243}
244pub use snapshot::{MutableSnapshot, SharedSnapshot, Snapshot};