git_features/fs.rs
1//! Filesystem utilities
2//!
3//! These are will be parallel if the `parallel` feature is enabled, at the expense of compiling additional dependencies
4//! along with runtime costs for maintaining a global [`rayon`](https://docs.rs/rayon) thread pool.
5//!
6//! For information on how to use the [`WalkDir`] type, have a look at
7//! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled
8//! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise
9
10#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
11mod shared {
12 /// The desired level of parallelism.
13 pub enum Parallelism {
14 /// Do not parallelize at all by making a serial traversal on the current thread.
15 Serial,
16 /// Create a new thread pool for each traversal with up to 16 threads or the amount of logical cores of the machine.
17 ThreadPoolPerTraversal {
18 /// The base name of the threads we create as part of the thread-pool.
19 thread_name: &'static str,
20 },
21 }
22}
23
24///
25#[cfg(feature = "fs-walkdir-parallel")]
26pub mod walkdir {
27 use std::path::Path;
28
29 pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir};
30
31 pub use super::shared::Parallelism;
32
33 /// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`.
34 pub type DirEntry = DirEntryGeneric<((), ())>;
35
36 impl From<Parallelism> for jwalk::Parallelism {
37 fn from(v: Parallelism) -> Self {
38 match v {
39 Parallelism::Serial => jwalk::Parallelism::Serial,
40 Parallelism::ThreadPoolPerTraversal { thread_name } => {
41 let pool = jwalk::rayon::ThreadPoolBuilder::new()
42 .num_threads(num_cpus::get().min(16))
43 .stack_size(128 * 1024)
44 .thread_name(move |idx| format!("{thread_name} {idx}"))
45 .build()
46 .expect("we only set options that can't cause a build failure");
47 jwalk::Parallelism::RayonExistingPool {
48 pool: pool.into(),
49 busy_timeout: None,
50 }
51 }
52 }
53 }
54 }
55
56 /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
57 pub fn walkdir_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
58 WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into())
59 }
60
61 /// Instantiate a new directory iterator which will not skip hidden files and is sorted
62 pub fn walkdir_sorted_new(root: impl AsRef<Path>, parallelism: Parallelism) -> WalkDir {
63 WalkDir::new(root)
64 .skip_hidden(false)
65 .sort(true)
66 .parallelism(parallelism.into())
67 }
68
69 /// The Iterator yielding directory items
70 pub type DirEntryIter = DirEntryIterGeneric<((), ())>;
71}
72
73#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))]
74///
75pub mod walkdir {
76 use std::path::Path;
77
78 pub use walkdir::{DirEntry, Error, WalkDir};
79
80 pub use super::shared::Parallelism;
81
82 /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`.
83 pub fn walkdir_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
84 WalkDir::new(root)
85 }
86
87 /// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`.
88 pub fn walkdir_sorted_new(root: impl AsRef<Path>, _: Parallelism) -> WalkDir {
89 WalkDir::new(root).sort_by_file_name()
90 }
91
92 /// The Iterator yielding directory items
93 pub type DirEntryIter = walkdir::IntoIter;
94}
95
96#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))]
97pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir};
98
99/// Prepare open options which won't follow symlinks when the file is opened.
100///
101/// Note: only effective on unix currently.
102pub fn open_options_no_follow() -> std::fs::OpenOptions {
103 #[cfg_attr(not(unix), allow(unused_mut))]
104 let mut options = std::fs::OpenOptions::new();
105 #[cfg(unix)]
106 {
107 /// Make sure that it's impossible to follow through to the target of symlinks.
108 /// Note that this will still follow symlinks in the path, which is what we assume
109 /// has been checked separately.
110 use std::os::unix::fs::OpenOptionsExt;
111 options.custom_flags(libc::O_NOFOLLOW);
112 }
113 options
114}
115
116mod snapshot {
117 use std::ops::Deref;
118
119 use crate::threading::{get_mut, get_ref, MutableOnDemand, OwnShared};
120
121 /// A structure holding enough information to reload a value if its on-disk representation changes as determined by its modified time.
122 #[derive(Debug)]
123 pub struct Snapshot<T: std::fmt::Debug> {
124 value: T,
125 modified: std::time::SystemTime,
126 }
127
128 impl<T: Clone + std::fmt::Debug> Clone for Snapshot<T> {
129 fn clone(&self) -> Self {
130 Self {
131 value: self.value.clone(),
132 modified: self.modified,
133 }
134 }
135 }
136
137 /// A snapshot of a resource which is up-to-date in the moment it is retrieved.
138 pub type SharedSnapshot<T> = OwnShared<Snapshot<T>>;
139
140 /// Use this type for fields in structs that are to store the [`Snapshot`], typically behind an [`OwnShared`].
141 ///
142 /// Note that the resource itself is behind another [`OwnShared`] to allow it to be used without holding any kind of lock, hence
143 /// without blocking updates while it is used.
144 #[derive(Debug, Default)]
145 pub struct MutableSnapshot<T: std::fmt::Debug>(pub MutableOnDemand<Option<SharedSnapshot<T>>>);
146
147 impl<T: std::fmt::Debug> Deref for Snapshot<T> {
148 type Target = T;
149
150 fn deref(&self) -> &Self::Target {
151 &self.value
152 }
153 }
154
155 impl<T: std::fmt::Debug> Deref for MutableSnapshot<T> {
156 type Target = MutableOnDemand<Option<SharedSnapshot<T>>>;
157
158 fn deref(&self) -> &Self::Target {
159 &self.0
160 }
161 }
162
163 impl<T: std::fmt::Debug> MutableSnapshot<T> {
164 /// Create a new instance of this type.
165 ///
166 /// Useful in case `Default::default()` isn't working for some reason.
167 pub fn new() -> Self {
168 MutableSnapshot(MutableOnDemand::new(None))
169 }
170
171 /// Refresh `state` forcefully by re-`open`ing the resource. Note that `open()` returns `None` if the resource isn't
172 /// present on disk, and that it's critical that the modified time is obtained _before_ opening the resource.
173 pub fn force_refresh<E>(
174 &self,
175 open: impl FnOnce() -> Result<Option<(std::time::SystemTime, T)>, E>,
176 ) -> Result<(), E> {
177 let mut state = get_mut(&self.0);
178 *state = open()?.map(|(modified, value)| OwnShared::new(Snapshot { value, modified }));
179 Ok(())
180 }
181
182 /// Assure that the resource in `state` is up-to-date by comparing the `current_modification_time` with the one we know in `state`
183 /// and by acting accordingly.
184 /// Returns the potentially updated/reloaded resource if it is still present on disk, which then represents a snapshot that is up-to-date
185 /// in that very moment, or `None` if the underlying file doesn't exist.
186 ///
187 /// Note that even though this is racy, each time a request is made there is a chance to see the actual state.
188 pub fn recent_snapshot<E>(
189 &self,
190 mut current_modification_time: impl FnMut() -> Option<std::time::SystemTime>,
191 open: impl FnOnce() -> Result<Option<T>, E>,
192 ) -> Result<Option<SharedSnapshot<T>>, E> {
193 let state = get_ref(self);
194 let recent_modification = current_modification_time();
195 let buffer = match (&*state, recent_modification) {
196 (None, None) => (*state).clone(),
197 (Some(_), None) => {
198 drop(state);
199 let mut state = get_mut(self);
200 *state = None;
201 (*state).clone()
202 }
203 (Some(snapshot), Some(modified_time)) => {
204 if snapshot.modified < modified_time {
205 drop(state);
206 let mut state = get_mut(self);
207
208 if let (Some(_snapshot), Some(modified_time)) = (&*state, current_modification_time()) {
209 *state = open()?.map(|value| {
210 OwnShared::new(Snapshot {
211 value,
212 modified: modified_time,
213 })
214 });
215 }
216
217 (*state).clone()
218 } else {
219 // Note that this relies on sub-section precision or else is a race when the packed file was just changed.
220 // It's nothing we can know though, so… up to the caller unfortunately.
221 Some(snapshot.clone())
222 }
223 }
224 (None, Some(_modified_time)) => {
225 drop(state);
226 let mut state = get_mut(self);
227 // Still in the same situation? If so, load the buffer. This compensates for the trampling herd
228 // during lazy-loading at the expense of another mtime check.
229 if let (None, Some(modified_time)) = (&*state, current_modification_time()) {
230 *state = open()?.map(|value| {
231 OwnShared::new(Snapshot {
232 value,
233 modified: modified_time,
234 })
235 });
236 }
237 (*state).clone()
238 }
239 };
240 Ok(buffer)
241 }
242 }
243}
244pub use snapshot::{MutableSnapshot, SharedSnapshot, Snapshot};