luminol_filesystem/
path_cache.rs

1// Copyright (C) 2024 Melody Madeline Lyons
2//
3// This file is part of Luminol.
4//
5// Luminol is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// Luminol is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with Luminol.  If not, see <http://www.gnu.org/licenses/>.
17
18use color_eyre::eyre::WrapErr;
19use itertools::Itertools;
20
21use crate::{DirEntry, Error, FileSystem as FileSystemTrait, Metadata, OpenFlags, Result};
22
23const TRIE_SUFFIX: &str = "\0";
24
25#[derive(Debug, Clone)]
26struct CactusNode {
27    /// The path component stored in this cactus stack node.
28    value: String,
29    /// The index of the next node within the cactus stack, or `None` if there is no next node.
30    next: Option<usize>,
31    /// One more than the number of times you need to follow `next` until you get `None`.
32    len: usize,
33}
34
35/// This cache stores the lowercased versions of paths and their corresponding original paths.
36/// Given a lowercased path, for example "data/mapinfos", you can find the original path by first
37/// appending a forward slash followed by `TRIE_SUFFIX` to the end of the path, then looking up the
38/// file at that path in `trie` and then looking up the lowercased file extension.
39/// This gives you the index of a node in `cactus`, which stores the
40/// original path. To recover the original path, follow the chain of cactus stack nodes by
41/// following the `next` field on the nodes. This gives you the path components of the original
42/// path in reverse order.
43#[derive(Debug, Default, Clone)]
44struct Cache {
45    trie: crate::FileSystemTrie<qp_trie::Trie<qp_trie::wrapper::BString, usize>>,
46    cactus: slab::Slab<CactusNode>,
47}
48
49#[derive(Debug)]
50pub struct FileSystem<F> {
51    fs: F,
52    cache: parking_lot::RwLock<Cache>,
53}
54
55impl Cache {
56    fn get_path_from_cactus_index(&self, index: usize) -> camino::Utf8PathBuf {
57        let Some(node) = self.cactus.get(index) else {
58            return Default::default();
59        };
60
61        let mut vec = Vec::with_capacity(node.len);
62
63        let mut node = Some(node);
64        while let Some(n) = node {
65            vec.push(&n.value);
66            node = n.next.and_then(|next| self.cactus.get(next));
67        }
68
69        vec.iter().rev().join(std::path::MAIN_SEPARATOR_STR).into()
70    }
71
72    /// Gets the original, case-sensitive version of the given case-insensitive path from the underlying
73    /// filesystem and puts it into the cache.
74    /// This method memoizes: if we want to insert "this/is/a/path" and the cache already contains
75    /// the case-sensitive version of the path "this/is", we will only scan the underlying filesystem
76    /// for the case-sensitive names of the remaining two components in the path.
77    fn regen(
78        &mut self,
79        fs: &impl FileSystemTrait,
80        path: impl AsRef<camino::Utf8Path>,
81    ) -> crate::Result<()> {
82        let path = path.as_ref();
83
84        // We don't need to do anything if there already is a path in the trie matching the given
85        // path
86        if self.desensitize(path).is_some() {
87            return Ok(());
88        }
89
90        let extension = path.extension().unwrap_or_default().to_string();
91        let mut path = to_lowercase(path);
92        path.set_extension("");
93
94        // If there is a matching path with a different file extension than this one, we may need
95        // to add this new path to the extension trie
96        if self.trie.contains_file(with_trie_suffix(&path)) {
97            if let Some(mut desensitized_path) = self
98                .trie
99                .get_file(with_trie_suffix(&path))
100                .unwrap()
101                .values()
102                .next()
103                .map(|&cactus_index| self.get_path_from_cactus_index(cactus_index))
104            {
105                desensitized_path.set_extension(&extension);
106                if fs.exists(&desensitized_path)? {
107                    let extension_trie = self.trie.get_file_mut(with_trie_suffix(&path)).unwrap();
108                    let sibling = self
109                        .cactus
110                        .get(*extension_trie.values().next().unwrap())
111                        .unwrap();
112                    let cactus_index = self.cactus.insert(CactusNode {
113                        value: desensitized_path.file_name().unwrap_or_default().into(),
114                        ..*sibling
115                    });
116                    extension_trie.insert_str(&extension, cactus_index);
117                    return Ok(());
118                }
119            }
120        }
121
122        let extension = extension.to_lowercase();
123
124        let prefix = self.trie.get_dir_prefix(&path);
125        let mut cactus_index = (!prefix.as_str().is_empty()).then(|| {
126            let extension_trie = self.trie.get_file(with_trie_suffix(prefix)).unwrap();
127            *extension_trie
128                .get_str(&extension)
129                .unwrap_or(extension_trie.values().next().unwrap())
130        });
131        let mut len = prefix.iter().count();
132
133        // Get the longest prefix of the path that is in the trie, convert it to lowercase and
134        // remove file extensions
135        let mut lower_string = prefix.to_string();
136        if let Some(additional) = path
137            .as_str()
138            .bytes()
139            .len()
140            .checked_sub(lower_string.bytes().len())
141        {
142            lower_string.reserve(additional);
143        }
144
145        // This is the same thing as `lower_string` except with the actual letter casing from the
146        // filesystem and without removing file extensions
147        let mut original_string = cactus_index.map_or_else(Default::default, |i| {
148            self.get_path_from_cactus_index(i).to_string()
149        });
150        if let Some(additional) = path
151            .as_str()
152            .bytes()
153            .len()
154            .checked_sub(original_string.bytes().len())
155        {
156            original_string.reserve(additional);
157        }
158
159        // Iterate over the remaining path components that aren't present in
160        // `lower_string`/`original_string`
161        for name in path.strip_prefix(prefix).unwrap().iter() {
162            let entries = fs
163                .read_dir(&original_string)
164                .wrap_err_with(|| format!("While regenerating cache for path {path:?}"))?;
165            len += 1;
166
167            let mut original_name = None;
168            let mut new_cactus_index = 0;
169            for entry in entries.into_iter() {
170                let entry_name = camino::Utf8Path::new(entry.file_name())
171                    .file_stem()
172                    .unwrap_or(entry.file_name())
173                    .to_lowercase();
174
175                let entry_extension = camino::Utf8Path::new(entry.file_name())
176                    .extension()
177                    .unwrap_or_default()
178                    .to_lowercase();
179
180                let extension_trie = self.trie.get_or_create_file_with_mut(
181                    if lower_string.is_empty() {
182                        with_trie_suffix(&entry_name)
183                    } else {
184                        format!("{lower_string}/{entry_name}/{TRIE_SUFFIX}").into()
185                    },
186                    Default::default,
187                );
188
189                let index = extension_trie
190                    .get_str(&entry_extension)
191                    .copied()
192                    .unwrap_or_else(|| {
193                        let index = self.cactus.insert(CactusNode {
194                            value: entry.file_name().to_string(),
195                            next: cactus_index,
196                            len,
197                        });
198                        extension_trie.insert_str(&entry_extension, index);
199                        index
200                    });
201
202                if entry_name == name {
203                    original_name = Some(entry.file_name().to_string());
204                    new_cactus_index = index;
205                }
206            }
207
208            let Some(original_name) = original_name else {
209                return Ok(());
210            };
211            if !lower_string.is_empty() {
212                lower_string.push('/');
213            }
214            lower_string.push_str(name);
215            if !original_string.is_empty() {
216                original_string.push(std::path::MAIN_SEPARATOR);
217            }
218            original_string.push_str(&original_name);
219            cactus_index = Some(new_cactus_index);
220        }
221
222        Ok(())
223    }
224
225    /// Gets the case-sensitive version of the given case-insensitive path from the cache.
226    /// The path has to already exist in the cache; you need to use `.regen` to insert paths into
227    /// the cache before this can get them.
228    fn desensitize(&self, path: impl AsRef<camino::Utf8Path>) -> Option<camino::Utf8PathBuf> {
229        let path = path.as_ref();
230        if path.as_str().is_empty() {
231            return Some(Default::default());
232        }
233        let mut path = to_lowercase(path);
234        let extension = path.extension().unwrap_or_default().to_string();
235        let path_clone = path.clone();
236        path.set_extension("");
237
238        // Try to search for the given path exactly (case-insensitive)
239        let maybe_exact_match =
240            self.trie
241                .get_file(with_trie_suffix(&path))
242                .and_then(|extension_trie| {
243                    extension_trie
244                        .get_str(&extension)
245                        .map(|&index| self.get_path_from_cactus_index(index))
246                });
247
248        maybe_exact_match.or_else(|| {
249            // If we didn't find anything the first time, try again using a '.*' glob pattern
250            // appended to the end (still case-insensitive)
251            let path = path_clone;
252            self.trie
253                .get_file(with_trie_suffix(path))
254                .and_then(|extension_trie| {
255                    extension_trie
256                        .values()
257                        .next()
258                        .map(|&index| self.get_path_from_cactus_index(index))
259                })
260        })
261    }
262}
263
264impl<F> FileSystem<F>
265where
266    F: FileSystemTrait,
267{
268    pub fn new(fs: F) -> Result<Self> {
269        let this = FileSystem {
270            fs,
271            cache: Default::default(),
272        };
273        Ok(this)
274    }
275
276    pub fn fs(&self) -> &F {
277        &self.fs
278    }
279
280    pub fn rebuild(&self) {
281        let mut cache = self.cache.write();
282        *cache = Default::default(); // FIXME we don't actually bother rebuilding anything, this is just a reset...
283    }
284
285    pub fn debug_ui(&self, ui: &mut egui::Ui) {
286        let cache = self.cache.read();
287
288        ui.with_layout(
289            egui::Layout {
290                cross_justify: true,
291                ..*ui.layout()
292            },
293            |ui| {
294                egui::ScrollArea::vertical()
295                    .id_source("luminol_path_cache_debug_ui")
296                    .show_rows(
297                        ui,
298                        ui.text_style_height(&egui::TextStyle::Body),
299                        cache.cactus.len(),
300                        |ui, rows| {
301                            for (_, (key, cactus_index)) in cache
302                                .trie
303                                .iter_prefix("")
304                                .unwrap()
305                                .filter_map(|(mut key, extension_trie)| {
306                                    (key.file_name() == Some(TRIE_SUFFIX)).then(|| {
307                                        key.pop();
308                                        extension_trie
309                                            .values()
310                                            .map(move |&cactus_index| (key.clone(), cactus_index))
311                                    })
312                                })
313                                .flatten()
314                                .enumerate()
315                                .filter(|(row, _)| rows.contains(row))
316                            {
317                                ui.add(
318                                    egui::Label::new(format!(
319                                        "{key} ➡ {}",
320                                        cache.get_path_from_cactus_index(cactus_index),
321                                    ))
322                                    .truncate(),
323                                );
324                            }
325                        },
326                    );
327            },
328        );
329    }
330
331    /// Finds the correct letter casing and file extension for the given RPG Maker-style
332    /// case-insensitive path.
333    ///
334    /// First this function will perform a case-insensitive search for the given path.
335    ///
336    /// If no file or folder at that path is found, this function searches a second time with a
337    /// '.*' glob pattern appended to the end of the path (e.g. if you're looking for "my/path",
338    /// this will also find stuff like "my/path.txt" or "my/path.json").
339    ///
340    /// If no match was found either time, returns `Err(NotExist)`.
341    pub fn desensitize(&self, path: impl AsRef<camino::Utf8Path>) -> Result<camino::Utf8PathBuf> {
342        let path = path.as_ref();
343        let mut cache = self.cache.write();
344        cache.regen(&self.fs, path)?;
345        cache.desensitize(path).ok_or(Error::NotExist.into())
346    }
347}
348
349pub fn to_lowercase(p: impl AsRef<camino::Utf8Path>) -> camino::Utf8PathBuf {
350    p.as_ref()
351        .as_str()
352        .to_lowercase()
353        .replace(std::path::MAIN_SEPARATOR, "/")
354        .into()
355}
356
357fn with_trie_suffix(path: impl AsRef<camino::Utf8Path>) -> camino::Utf8PathBuf {
358    let path = path.as_ref();
359    if path.as_str().is_empty() {
360        TRIE_SUFFIX.into()
361    } else {
362        format!("{path}/{TRIE_SUFFIX}").into()
363    }
364}
365
366impl<F> FileSystemTrait for FileSystem<F>
367where
368    F: FileSystemTrait,
369{
370    type File = F::File;
371
372    fn open_file(
373        &self,
374        path: impl AsRef<camino::Utf8Path>,
375        flags: OpenFlags,
376    ) -> Result<Self::File> {
377        let mut cache = self.cache.write();
378        let path = path.as_ref();
379        let c = format!("While opening file {path:?} in a path cache");
380        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
381
382        if flags.contains(OpenFlags::Create) && cache.desensitize(path).is_none() {
383            // If `OpenFlags::Create` was passed via `flags` and the given path doesn't exist in
384            // the cache, then it must be the case that the path doesn't exist because we just
385            // called `.regen` to attempt to insert the path into the cache a few lines ago. So we
386            // need to create the file.
387
388            // Use the path cache to get the desensitized version of the path to the parent
389            // directory of the new file we need to create. If the parent directory doesn't exist
390            // in the cache either then the parent directory doesn't exist yet, so error out with a
391            // "does not exist" error because we don't recursively create parent directories.
392            let parent_path = cache
393                .desensitize(
394                    path.parent()
395                        .ok_or(Error::NotExist)
396                        .wrap_err_with(|| c.clone())?,
397                )
398                .ok_or(Error::NotExist)
399                .wrap_err_with(|| c.clone())?;
400
401            // Create the file in the parent directory with the filename at the end of the original
402            // path.
403            let path = parent_path.join(path.file_name().unwrap());
404            let file = self
405                .fs
406                .open_file(&path, flags)
407                .wrap_err_with(|| c.clone())?;
408
409            // Add the new file to the path cache.
410            cache.regen(&self.fs, &path).wrap_err_with(|| c.clone())?;
411
412            Ok(file)
413        } else {
414            self.fs
415                .open_file(
416                    cache
417                        .desensitize(path)
418                        .ok_or(Error::NotExist)
419                        .wrap_err_with(|| c.clone())?,
420                    flags,
421                )
422                .wrap_err_with(|| c.clone())
423        }
424    }
425
426    fn metadata(&self, path: impl AsRef<camino::Utf8Path>) -> Result<Metadata> {
427        let mut cache = self.cache.write();
428        let path = path.as_ref();
429        let c = format!("While getting metadata for {path:?} in a path cache");
430        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
431
432        let path = cache
433            .desensitize(path)
434            .ok_or(Error::NotExist)
435            .wrap_err_with(|| c.clone())?;
436        self.fs.metadata(path).wrap_err_with(|| c.clone())
437    }
438
439    fn rename(
440        &self,
441        from: impl AsRef<camino::Utf8Path>,
442        to: impl AsRef<camino::Utf8Path>,
443    ) -> Result<()> {
444        let mut cache = self.cache.write();
445        let c = format!(
446            "While renaming {:?} to {:?} in a path cache",
447            from.as_ref(),
448            to.as_ref()
449        );
450        cache
451            .regen(&self.fs, from.as_ref())
452            .wrap_err_with(|| c.clone())?;
453        let from = cache
454            .desensitize(from)
455            .ok_or(Error::NotExist)
456            .wrap_err_with(|| c.clone())?;
457
458        self.fs.rename(&from, to).wrap_err_with(|| c.clone())?;
459
460        {
461            let cache = &mut *cache;
462            for extension_trie in cache.trie.iter_prefix(&from).unwrap().map(|(_, t)| t) {
463                for index in extension_trie.values().copied() {
464                    cache.cactus.remove(index);
465                }
466            }
467            cache.trie.remove_dir(&from);
468        }
469
470        Ok(())
471    }
472
473    fn exists(&self, path: impl AsRef<camino::Utf8Path>) -> Result<bool> {
474        let mut cache = self.cache.write();
475        let path = path.as_ref();
476        let c = format!("While checking if {path:?} exists in a path cache");
477        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
478        Ok(cache.desensitize(path).is_some())
479    }
480
481    fn create_dir(&self, path: impl AsRef<camino::Utf8Path>) -> Result<()> {
482        let mut cache = self.cache.write();
483        let path = path.as_ref();
484        let c = format!("While creating directory {path:?} in a path cache");
485        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
486
487        let mut lower_path = to_lowercase(path);
488        let extension = lower_path.extension().unwrap_or_default().to_string();
489        lower_path.set_extension("");
490        let prefix = cache.trie.get_dir_prefix(lower_path);
491        let cactus_index = (!prefix.as_str().is_empty()).then(|| {
492            let extension_trie = cache.trie.get_file(with_trie_suffix(prefix)).unwrap();
493            *extension_trie
494                .get_str(&extension)
495                .unwrap_or(extension_trie.values().next().unwrap())
496        });
497        let original_prefix =
498            cactus_index.map_or_else(Default::default, |i| cache.get_path_from_cactus_index(i));
499        let len = original_prefix.iter().count();
500
501        self.fs
502            .create_dir(if len == 0 {
503                path.to_path_buf()
504            } else if len == path.iter().count() {
505                original_prefix
506            } else {
507                std::iter::once(original_prefix.as_str())
508                    .chain(path.iter().skip(len))
509                    .join(std::path::MAIN_SEPARATOR_STR)
510                    .into()
511            })
512            .wrap_err_with(|| c.clone())?;
513
514        Ok(())
515    }
516
517    fn remove_dir(&self, path: impl AsRef<camino::Utf8Path>) -> Result<()> {
518        let mut cache = self.cache.write();
519        let path = path.as_ref();
520        let c = format!("While removing directory {path:?} in a path cache");
521        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
522        let path = cache
523            .desensitize(path)
524            .ok_or(Error::NotExist)
525            .wrap_err_with(|| c.clone())?;
526
527        self.fs.remove_dir(&path).wrap_err_with(|| c.clone())?;
528
529        // Remove the directory and its contents from `cache.trie` and `cache.cactus`
530        {
531            let cache = &mut *cache;
532
533            let mut path = to_lowercase(path);
534            path.set_extension("");
535
536            if let Some(iter) = cache.trie.iter_prefix(&path) {
537                for extension_trie in iter.map(|(_, t)| t) {
538                    for index in extension_trie.values().copied() {
539                        cache.cactus.remove(index);
540                    }
541                }
542                cache.trie.remove_dir(&path);
543            }
544        }
545
546        Ok(())
547    }
548
549    fn remove_file(&self, path: impl AsRef<camino::Utf8Path>) -> Result<()> {
550        let mut cache = self.cache.write();
551        let path = path.as_ref();
552        let c = format!("While removing file {path:?} in a path cache");
553        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
554        let path = cache
555            .desensitize(path)
556            .ok_or(Error::NotExist)
557            .wrap_err_with(|| c.clone())?;
558
559        self.fs.remove_file(&path).wrap_err_with(|| c.clone())?;
560
561        // Remove the file from `cache.trie` and `cache.cactus`
562        {
563            let cache = &mut *cache;
564
565            let mut path = to_lowercase(path);
566            let extension = path.extension().unwrap_or_default().to_string();
567            let path_clone = path.clone();
568            path.set_extension("");
569
570            // Remove by exact match
571            if let Some(extension_trie) = cache.trie.get_file_mut(with_trie_suffix(&path)) {
572                if let Some(&index) = extension_trie.get_str(&extension) {
573                    cache.cactus.remove(index);
574                    extension_trie.remove_str(&extension);
575                    if extension_trie.is_empty() {
576                        cache.trie.remove_dir(&path);
577                    }
578                    return Ok(());
579                }
580            }
581
582            // Remove with added '.*' glob pattern
583            let path = path_clone;
584            if let Some(extension_trie) = cache.trie.get_file_mut(with_trie_suffix(&path)) {
585                if let Some((key, &index)) = extension_trie.iter().next() {
586                    let key = key.to_owned();
587                    cache.cactus.remove(index);
588                    extension_trie.remove(&key);
589                }
590                if extension_trie.is_empty() {
591                    cache.trie.remove_dir(&path);
592                }
593            }
594        }
595
596        Ok(())
597    }
598
599    fn read_dir(&self, path: impl AsRef<camino::Utf8Path>) -> Result<Vec<DirEntry>> {
600        let mut cache = self.cache.write();
601        let path = path.as_ref();
602        let c = format!("While reading the contents of the directory {path:?} in a path cache");
603        cache.regen(&self.fs, path).wrap_err_with(|| c.clone())?;
604        let path = cache
605            .desensitize(path)
606            .ok_or(Error::NotExist)
607            .wrap_err_with(|| c.clone())?;
608        self.fs.read_dir(path).wrap_err_with(|| c.clone())
609    }
610}