tinymist_vfs/
lib.rs

1//! upstream of following files <https://github.com/rust-lang/rust-analyzer/tree/master/crates/vfs>
2//!   ::path_interner.rs -> path_interner.rs
3
4/// Provides ProxyAccessModel that makes access to JavaScript objects for
5/// browser compilation.
6#[cfg(feature = "browser")]
7pub mod browser;
8
9/// Provides SystemAccessModel that makes access to the local file system for
10/// system compilation.
11#[cfg(feature = "system")]
12pub mod system;
13
14/// Provides dummy access model.
15///
16/// Note: we can still perform compilation with dummy access model, since
17/// [`Vfs`] will make a overlay access model over the provided dummy access
18/// model.
19pub mod dummy;
20
21/// Provides snapshot models
22pub mod snapshot;
23pub use snapshot::*;
24use tinymist_std::hash::{FxDashMap, FxHashMap};
25
26/// Provides notify access model which retrieves file system events and changes
27/// from some notify backend.
28pub mod notify;
29pub use notify::{FilesystemEvent, MemoryEvent};
30/// Provides overlay access model which allows to shadow the underlying access
31/// model with memory contents.
32pub mod overlay;
33/// Provides resolve access model.
34pub mod resolve;
35/// Provides trace access model which traces the underlying access model.
36pub mod trace;
37mod utils;
38
39mod path_mapper;
40pub use path_mapper::{PathResolution, RootResolver, WorkspaceResolution, WorkspaceResolver};
41
42use core::fmt;
43use std::num::NonZeroUsize;
44use std::sync::OnceLock;
45use std::{path::Path, sync::Arc};
46
47use ecow::EcoVec;
48use parking_lot::Mutex;
49use rpds::RedBlackTreeMapSync;
50use typst::diag::{FileError, FileResult};
51use typst::foundations::Dict;
52use typst::syntax::Source;
53use typst::utils::LazyHash;
54
55use crate::notify::NotifyAccessModel;
56use crate::overlay::OverlayAccessModel;
57use crate::resolve::ResolveAccessModel;
58
59pub use tinymist_std::time::Time;
60pub use tinymist_std::ImmutPath;
61pub use typst::foundations::Bytes;
62pub use typst::syntax::FileId;
63
64/// Immutable prehashed reference to dictionary.
65pub type ImmutDict = Arc<LazyHash<Dict>>;
66
67/// A trait for accessing underlying file system.
68///
69/// This trait is simplified by [`Vfs`] and requires a minimal method set for
70/// typst compilation.
71pub trait PathAccessModel {
72    /// Clears the cache of the access model.
73    ///
74    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
75    /// more information.
76    fn reset(&mut self) {}
77
78    /// Returns the content of a file entry.
79    fn content(&self, src: &Path) -> FileResult<Bytes>;
80}
81
82/// A trait for accessing underlying file system.
83///
84/// This trait is simplified by [`Vfs`] and requires a minimal method set for
85/// typst compilation.
86pub trait AccessModel {
87    /// Clears the cache of the access model.
88    ///
89    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
90    /// more information.
91    fn reset(&mut self) {}
92
93    /// Returns the content of a file entry.
94    fn content(&self, src: FileId) -> (Option<ImmutPath>, FileResult<Bytes>);
95}
96
97type VfsPathAccessModel<M> = OverlayAccessModel<ImmutPath, NotifyAccessModel<M>>;
98/// we add notify access model here since notify access model doesn't introduce
99/// overheads by our observation
100type VfsAccessModel<M> = OverlayAccessModel<FileId, ResolveAccessModel<VfsPathAccessModel<M>>>;
101
102/// A trait to perform file system query.
103pub trait FsProvider {
104    /// Gets the file path corresponding to the given `id`.
105    fn file_path(&self, id: FileId) -> FileResult<PathResolution>;
106    /// Gets the file content corresponding to the given `id`.
107    fn read(&self, id: FileId) -> FileResult<Bytes>;
108    /// Gets the source code corresponding to the given `id`. It is preferred to
109    /// be used for source files so that parsing is reused across editions.
110    fn read_source(&self, id: FileId) -> FileResult<Source>;
111}
112
113struct SourceEntry {
114    last_accessed_rev: NonZeroUsize,
115    source: FileResult<Source>,
116}
117
118#[derive(Default)]
119struct SourceIdShard {
120    last_accessed_rev: usize,
121    recent_source: Option<Source>,
122    sources: FxHashMap<Bytes, SourceEntry>,
123}
124
125/// A source cache shared across VFS.
126#[derive(Default, Clone)]
127pub struct SourceCache {
128    /// The cache entries for each paths
129    cache_entries: Arc<FxDashMap<FileId, SourceIdShard>>,
130}
131
132impl SourceCache {
133    /// Evicts cache, given a current revision `curr`, and a threshold. The too
134    /// old cache entries will be evicted from the cache.
135    pub fn evict(&self, curr: NonZeroUsize, threshold: usize) {
136        self.cache_entries.retain(|_, shard| {
137            let diff = curr.get().saturating_sub(shard.last_accessed_rev);
138            if diff > threshold {
139                return false;
140            }
141
142            shard.sources.retain(|_, entry| {
143                let diff = curr.get().saturating_sub(entry.last_accessed_rev.get());
144                diff <= threshold
145            });
146
147            true
148        });
149    }
150}
151
152/// Creates a new `Vfs` harnessing over the given `access_model` specific for
153/// `reflexo_world::CompilerWorld`. With vfs, we can minimize the
154/// implementation overhead for [`AccessModel`] trait.
155pub struct Vfs<M: PathAccessModel + Sized> {
156    source_cache: SourceCache,
157    managed: Arc<Mutex<EntryMap>>,
158    paths: Arc<Mutex<PathMap>>,
159    revision: NonZeroUsize,
160    // access_model: TraceAccessModel<VfsAccessModel<M>>,
161    /// The wrapped access model.
162    access_model: VfsAccessModel<M>,
163}
164
165impl<M: PathAccessModel + Sized> fmt::Debug for Vfs<M> {
166    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167        f.debug_struct("Vfs")
168            .field("revision", &self.revision)
169            .field("managed", &self.managed.lock().entries.size())
170            .field("paths", &self.paths.lock().paths.len())
171            .finish()
172    }
173}
174
175impl<M: PathAccessModel + Clone + Sized> Vfs<M> {
176    /// Gets current revision of the vfs.
177    pub fn revision(&self) -> NonZeroUsize {
178        self.revision
179    }
180
181    /// Performs snapshot with sharing cache and managed resource.
182    pub fn snapshot(&self) -> Self {
183        Self {
184            revision: self.revision,
185            source_cache: self.source_cache.clone(),
186            managed: self.managed.clone(),
187            paths: self.paths.clone(),
188            access_model: self.access_model.clone(),
189        }
190    }
191
192    /// Performs snapshot with sharing cache, but not the resources.
193    pub fn fork(&self) -> Self {
194        Self {
195            // todo: it is not correct to merely share source cache.
196            source_cache: self.source_cache.clone(),
197            managed: Arc::new(Mutex::new(EntryMap::default())),
198            paths: Arc::new(Mutex::new(PathMap::default())),
199            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
200            access_model: self.access_model.clone(),
201        }
202    }
203
204    /// Detects whether the vfs is clean respecting a given revision and
205    /// `file_ids`.
206    pub fn is_clean_compile(&self, rev: usize, file_ids: &[FileId]) -> bool {
207        let mut m = self.managed.lock();
208        for id in file_ids {
209            let Some(entry) = m.entries.get_mut(id) else {
210                log::debug!("Vfs(dirty, {id:?}): file id not found");
211                return false;
212            };
213            if entry.changed_at > rev {
214                log::debug!("Vfs(dirty, {id:?}): rev {rev:?} => {:?}", entry.changed_at);
215                return false;
216            }
217            log::debug!(
218                "Vfs(clean, {id:?}, rev={rev}, changed_at={})",
219                entry.changed_at
220            );
221        }
222        true
223    }
224}
225
226impl<M: PathAccessModel + Sized> Vfs<M> {
227    /// Creates a new `Vfs` with a given `access_model`.
228    ///
229    /// Retrieving an [`AccessModel`], it will further wrap the access model
230    /// with [`OverlayAccessModel`] and [`NotifyAccessModel`]. This means that
231    /// you don't need to implement:
232    /// + overlay: allowing to shadow the underlying access model with memory
233    ///   contents, which is useful for a limited execution environment and
234    ///   instrumenting or overriding source files or packages.
235    /// + notify: regards problems of synchronizing with the file system when
236    ///   the vfs is watching the file system.
237    ///
238    /// See [`AccessModel`] for more information.
239    pub fn new(resolver: Arc<dyn RootResolver + Send + Sync>, access_model: M) -> Self {
240        let access_model = NotifyAccessModel::new(access_model);
241        let access_model = OverlayAccessModel::new(access_model);
242        let access_model = ResolveAccessModel {
243            resolver,
244            inner: access_model,
245        };
246        let access_model = OverlayAccessModel::new(access_model);
247
248        // If you want to trace the access model, uncomment the following line
249        // let access_model = TraceAccessModel::new(access_model);
250
251        Self {
252            source_cache: SourceCache::default(),
253            managed: Arc::default(),
254            paths: Arc::default(),
255            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
256            access_model,
257        }
258    }
259
260    /// Resets all state.
261    pub fn reset_all(&mut self) {
262        self.reset_access_model();
263        self.reset_read();
264        self.take_source_cache();
265    }
266
267    /// Resets access model.
268    pub fn reset_access_model(&mut self) {
269        self.access_model.reset();
270    }
271
272    /// Resets all read caches. This can happen when:
273    /// - package paths are reconfigured.
274    /// - The root of the workspace is switched.
275    pub fn reset_read(&mut self) {
276        self.managed = Arc::default();
277        self.paths = Arc::default();
278    }
279
280    /// Clears the cache that is not touched for a long time.
281    pub fn evict(&mut self, threshold: usize) {
282        let mut m = self.managed.lock();
283        let rev = self.revision.get();
284        for (id, entry) in m.entries.clone().iter() {
285            let entry_rev = entry.bytes.get().map(|e| e.1).unwrap_or_default();
286            if entry_rev + threshold < rev {
287                m.entries.remove_mut(id);
288            }
289        }
290    }
291
292    /// Takes source cache. It also cleans the cache in the current vfs.
293    pub fn take_source_cache(&mut self) -> SourceCache {
294        std::mem::take(&mut self.source_cache)
295    }
296
297    /// Takes source cache for sharing.
298    pub fn clone_source_cache(&self) -> SourceCache {
299        self.source_cache.clone()
300    }
301
302    /// Resolve the real path for a file id.
303    pub fn file_path(&self, id: FileId) -> Result<PathResolution, FileError> {
304        self.access_model.inner.resolver.path_for_id(id)
305    }
306
307    /// Get paths to all the shadowing paths in [`OverlayAccessModel`].
308    pub fn shadow_paths(&self) -> Vec<ImmutPath> {
309        self.access_model.inner.inner.file_paths()
310    }
311
312    /// Get paths to all the shadowing file ids in [`OverlayAccessModel`].
313    ///
314    /// The in memory untitled files can have no path so
315    /// they only have file ids.
316    pub fn shadow_ids(&self) -> Vec<FileId> {
317        self.access_model.file_paths()
318    }
319
320    /// Returns the overall memory usage for the stored files.
321    pub fn memory_usage(&self) -> usize {
322        0
323    }
324
325    /// Obtains an object to revise. The object will update the original vfs
326    /// when it is dropped.
327    pub fn revise(&mut self) -> RevisingVfs<M> {
328        let managed = self.managed.lock().clone();
329        let paths = self.paths.lock().clone();
330        let goal_revision = self.revision.checked_add(1).expect("revision overflowed");
331
332        RevisingVfs {
333            managed,
334            paths,
335            inner: self,
336            goal_revision,
337            view_changed: false,
338        }
339    }
340
341    /// Obtains an object to display.
342    pub fn display(&self) -> DisplayVfs<M> {
343        DisplayVfs { inner: self }
344    }
345
346    /// Reads a file by id.
347    pub fn read(&self, fid: FileId) -> FileResult<Bytes> {
348        let bytes = self.managed.lock().slot(fid, |entry| entry.bytes.clone());
349
350        self.read_content(&bytes, fid).clone()
351    }
352
353    /// Reads a source file by id. It is preferred to be used for source files
354    /// so that parsing is reused across editions.
355    pub fn source(&self, file_id: FileId) -> FileResult<Source> {
356        let (bytes, source) = self
357            .managed
358            .lock()
359            .slot(file_id, |entry| (entry.bytes.clone(), entry.source.clone()));
360
361        let source = source.get_or_init(|| {
362            let content = self
363                .read_content(&bytes, file_id)
364                .as_ref()
365                .map_err(Clone::clone)?;
366
367            let mut cache_entry = self.source_cache.cache_entries.entry(file_id).or_default();
368            if let Some(source) = cache_entry.sources.get(content) {
369                return source.source.clone();
370            }
371
372            let source = (|| {
373                let prev = cache_entry.recent_source.clone();
374                let content = from_utf8_or_bom(content).map_err(|_| FileError::InvalidUtf8)?;
375
376                let next = match prev {
377                    Some(mut prev) => {
378                        prev.replace(content);
379                        prev
380                    }
381                    None => Source::new(file_id, content.to_owned()),
382                };
383
384                let should_update = cache_entry.recent_source.is_none()
385                    || cache_entry.last_accessed_rev < self.revision.get();
386                if should_update {
387                    cache_entry.recent_source = Some(next.clone());
388                }
389
390                Ok(next)
391            })();
392
393            let entry = cache_entry
394                .sources
395                .entry(content.clone())
396                .or_insert_with(|| SourceEntry {
397                    last_accessed_rev: self.revision,
398                    source: source.clone(),
399                });
400
401            if entry.last_accessed_rev < self.revision {
402                entry.last_accessed_rev = self.revision;
403            }
404
405            source
406        });
407
408        source.clone()
409    }
410
411    /// Reads and caches content of a file.
412    fn read_content<'a>(&self, bytes: &'a BytesQuery, fid: FileId) -> &'a FileResult<Bytes> {
413        &bytes
414            .get_or_init(|| {
415                let (path, content) = self.access_model.content(fid);
416                if let Some(path) = path.as_ref() {
417                    self.paths.lock().insert(path, fid, self.revision);
418                }
419
420                (path, self.revision.get(), content)
421            })
422            .2
423    }
424}
425
426/// A display wrapper for [`Vfs`].
427pub struct DisplayVfs<'a, M: PathAccessModel + Sized> {
428    inner: &'a Vfs<M>,
429}
430
431impl<M: PathAccessModel + Sized> fmt::Debug for DisplayVfs<'_, M> {
432    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433        f.debug_struct("Vfs")
434            .field("revision", &self.inner.revision)
435            .field("managed", &self.inner.managed.lock().display())
436            .field("paths", &self.inner.paths.lock().display())
437            .finish()
438    }
439}
440
441/// A revising wrapper for [`Vfs`].
442pub struct RevisingVfs<'a, M: PathAccessModel + Sized> {
443    inner: &'a mut Vfs<M>,
444    managed: EntryMap,
445    paths: PathMap,
446    goal_revision: NonZeroUsize,
447    view_changed: bool,
448}
449
450impl<M: PathAccessModel + Sized> Drop for RevisingVfs<'_, M> {
451    fn drop(&mut self) {
452        if self.view_changed {
453            self.inner.managed = Arc::new(Mutex::new(std::mem::take(&mut self.managed)));
454            self.inner.paths = Arc::new(Mutex::new(std::mem::take(&mut self.paths)));
455            let revision = &mut self.inner.revision;
456            *revision = self.goal_revision;
457        }
458    }
459}
460
461impl<M: PathAccessModel + Sized> RevisingVfs<'_, M> {
462    /// Returns the underlying vfs.
463    pub fn vfs(&mut self) -> &mut Vfs<M> {
464        self.inner
465    }
466
467    fn am(&mut self) -> &mut VfsAccessModel<M> {
468        &mut self.inner.access_model
469    }
470
471    fn invalidate_path(&mut self, path: &Path) {
472        if let Some(fids) = self.paths.get(path) {
473            if fids.is_empty() {
474                return;
475            }
476
477            self.view_changed = true;
478            for fid in fids.clone() {
479                self.invalidate_file_id(fid);
480            }
481        }
482    }
483
484    fn invalidate_file_id(&mut self, file_id: FileId) {
485        self.view_changed = true;
486        self.managed.slot(file_id, |e| {
487            e.changed_at = self.goal_revision.get();
488            e.bytes = Arc::default();
489            e.source = Arc::default();
490        });
491    }
492
493    /// Reset the shadowing files in [`OverlayAccessModel`].
494    pub fn reset_shadow(&mut self) {
495        for path in self.am().inner.inner.file_paths() {
496            self.invalidate_path(&path);
497        }
498        for fid in self.am().file_paths() {
499            self.invalidate_file_id(fid);
500        }
501
502        self.am().clear_shadow();
503        self.am().inner.inner.clear_shadow();
504    }
505
506    /// Adds a shadowing file to the [`OverlayAccessModel`].
507    pub fn map_shadow(&mut self, path: &Path, snap: FileSnapshot) -> FileResult<()> {
508        self.view_changed = true;
509        self.invalidate_path(path);
510        self.am().inner.inner.add_file(path, snap, |c| c.into());
511
512        Ok(())
513    }
514
515    /// Removes a shadowing file from the [`OverlayAccessModel`].
516    pub fn unmap_shadow(&mut self, path: &Path) -> FileResult<()> {
517        self.view_changed = true;
518        self.invalidate_path(path);
519        self.am().inner.inner.remove_file(path);
520
521        Ok(())
522    }
523
524    /// Adds a shadowing file to the [`OverlayAccessModel`] by file id.
525    pub fn map_shadow_by_id(&mut self, file_id: FileId, snap: FileSnapshot) -> FileResult<()> {
526        self.view_changed = true;
527        self.invalidate_file_id(file_id);
528        self.am().add_file(&file_id, snap, |c| *c);
529
530        Ok(())
531    }
532
533    /// Removes a shadowing file from the [`OverlayAccessModel`] by file id.
534    pub fn remove_shadow_by_id(&mut self, file_id: FileId) {
535        self.view_changed = true;
536        self.invalidate_file_id(file_id);
537        self.am().remove_file(&file_id);
538    }
539
540    /// Notifies the access model with a filesystem event.
541    ///
542    /// See [`NotifyAccessModel`] for more information.
543    pub fn notify_fs_event(&mut self, event: FilesystemEvent) {
544        self.notify_fs_changes(event.split().0);
545    }
546    /// Notifies the access model with a filesystem changes.
547    ///
548    /// See [`NotifyAccessModel`] for more information.
549    pub fn notify_fs_changes(&mut self, event: FileChangeSet) {
550        for path in &event.removes {
551            self.invalidate_path(path);
552        }
553        for (path, _) in &event.inserts {
554            self.invalidate_path(path);
555        }
556
557        self.am().inner.inner.inner.notify(event);
558    }
559}
560
561type BytesQuery = Arc<OnceLock<(Option<ImmutPath>, usize, FileResult<Bytes>)>>;
562
563#[derive(Debug, Clone, Default)]
564struct VfsEntry {
565    changed_at: usize,
566    bytes: BytesQuery,
567    source: Arc<OnceLock<FileResult<Source>>>,
568}
569
570#[derive(Debug, Clone, Default)]
571struct EntryMap {
572    entries: RedBlackTreeMapSync<FileId, VfsEntry>,
573}
574
575impl EntryMap {
576    /// Read a slot.
577    #[inline(always)]
578    fn slot<T>(&mut self, path: FileId, f: impl FnOnce(&mut VfsEntry) -> T) -> T {
579        if let Some(entry) = self.entries.get_mut(&path) {
580            f(entry)
581        } else {
582            let mut entry = VfsEntry::default();
583            let res = f(&mut entry);
584            self.entries.insert_mut(path, entry);
585            res
586        }
587    }
588
589    fn display(&self) -> DisplayEntryMap {
590        DisplayEntryMap { map: self }
591    }
592}
593
594/// A display wrapper for [`EntryMap`].
595pub struct DisplayEntryMap<'a> {
596    map: &'a EntryMap,
597}
598
599impl fmt::Debug for DisplayEntryMap<'_> {
600    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
601        f.debug_map().entries(self.map.entries.iter()).finish()
602    }
603}
604
605#[derive(Debug, Clone, Default)]
606struct PathMap {
607    paths: FxHashMap<ImmutPath, EcoVec<FileId>>,
608    file_ids: FxHashMap<FileId, (ImmutPath, NonZeroUsize)>,
609}
610
611impl PathMap {
612    fn insert(&mut self, next: &ImmutPath, fid: FileId, rev: NonZeroUsize) {
613        use std::collections::hash_map::Entry;
614        let rev_entry = self.file_ids.entry(fid);
615
616        match rev_entry {
617            Entry::Occupied(mut entry) => {
618                let (prev, prev_rev) = entry.get_mut();
619                if prev != next {
620                    if *prev_rev == rev {
621                        log::warn!("Vfs: {fid:?} is changed in rev({rev:?}), {prev:?} -> {next:?}");
622                    }
623
624                    if let Some(fids) = self.paths.get_mut(prev) {
625                        fids.retain(|f| *f != fid);
626                    }
627
628                    *prev = next.clone();
629                    *prev_rev = rev;
630
631                    self.paths.entry(next.clone()).or_default().push(fid);
632                }
633            }
634            Entry::Vacant(entry) => {
635                entry.insert((next.clone(), rev));
636                self.paths.entry(next.clone()).or_default().push(fid);
637            }
638        }
639    }
640
641    fn get(&mut self, path: &Path) -> Option<&EcoVec<FileId>> {
642        self.paths.get(path)
643    }
644
645    fn display(&self) -> DisplayPathMap {
646        DisplayPathMap { map: self }
647    }
648}
649
650/// A display wrapper for [`PathMap`].
651pub struct DisplayPathMap<'a> {
652    map: &'a PathMap,
653}
654
655impl fmt::Debug for DisplayPathMap<'_> {
656    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
657        f.debug_map().entries(self.map.paths.iter()).finish()
658    }
659}
660
661/// Convert a byte slice to a string, removing UTF-8 BOM if present.
662fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
663    Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
664        // remove UTF-8 BOM
665        &buf[3..]
666    } else {
667        // Assume UTF-8
668        buf
669    })?)
670}
671
672#[cfg(test)]
673mod tests {
674    fn is_send<T: Send>() {}
675    fn is_sync<T: Sync>() {}
676
677    #[test]
678    fn test_vfs_send_sync() {
679        is_send::<super::Vfs<super::dummy::DummyAccessModel>>();
680        is_sync::<super::Vfs<super::dummy::DummyAccessModel>>();
681    }
682}