tinymist_vfs/
lib.rs

1//! upstream of following files <https://github.com/rust-lang/rust-analyzer/tree/master/crates/vfs>
2//!   ::path_interner.rs -> path_interner.rs
3
4#![allow(missing_docs)]
5
6/// Provides ProxyAccessModel that makes access to JavaScript objects for
7/// browser compilation.
8#[cfg(feature = "browser")]
9pub mod browser;
10
11/// Provides SystemAccessModel that makes access to the local file system for
12/// system compilation.
13#[cfg(feature = "system")]
14pub mod system;
15
16/// Provides dummy access model.
17///
18/// Note: we can still perform compilation with dummy access model, since
19/// [`Vfs`] will make a overlay access model over the provided dummy access
20/// model.
21pub mod dummy;
22
23/// Provides snapshot models
24pub mod snapshot;
25pub use snapshot::*;
26use tinymist_std::hash::{FxDashMap, FxHashMap};
27
28/// Provides notify access model which retrieves file system events and changes
29/// from some notify backend.
30pub mod notify;
31pub use notify::{FilesystemEvent, MemoryEvent};
32/// Provides overlay access model which allows to shadow the underlying access
33/// model with memory contents.
34pub mod overlay;
35/// Provides resolve access model.
36pub mod resolve;
37/// Provides trace access model which traces the underlying access model.
38pub mod trace;
39mod utils;
40
41mod path_mapper;
42pub use path_mapper::{PathResolution, RootResolver, WorkspaceResolution, WorkspaceResolver};
43
44use core::fmt;
45use std::num::NonZeroUsize;
46use std::sync::OnceLock;
47use std::{path::Path, sync::Arc};
48
49use ecow::EcoVec;
50use parking_lot::{Mutex, RwLock};
51use rpds::RedBlackTreeMapSync;
52use typst::diag::{FileError, FileResult};
53use typst::foundations::Dict;
54use typst::syntax::Source;
55use typst::utils::LazyHash;
56
57use crate::notify::NotifyAccessModel;
58use crate::overlay::OverlayAccessModel;
59use crate::resolve::ResolveAccessModel;
60
61pub use tinymist_std::time::Time;
62pub use tinymist_std::ImmutPath;
63pub use typst::foundations::Bytes;
64pub use typst::syntax::FileId as TypstFileId;
65
66/// Handle to a file in [`Vfs`]
67pub type FileId = TypstFileId;
68/// Immutable prehashed reference to dictionary.
69pub type ImmutDict = Arc<LazyHash<Dict>>;
70
71/// A trait for accessing underlying file system.
72///
73/// This trait is simplified by [`Vfs`] and requires a minimal method set for
74/// typst compilation.
75pub trait PathAccessModel {
76    /// Clear the cache of the access model.
77    ///
78    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
79    /// more information.
80    fn reset(&mut self) {}
81
82    /// Return the content of a file entry.
83    fn content(&self, src: &Path) -> FileResult<Bytes>;
84}
85
86/// A trait for accessing underlying file system.
87///
88/// This trait is simplified by [`Vfs`] and requires a minimal method set for
89/// typst compilation.
90pub trait AccessModel {
91    /// Clear the cache of the access model.
92    ///
93    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
94    /// more information.
95    fn reset(&mut self) {}
96
97    /// Return the content of a file entry.
98    fn content(&self, src: TypstFileId) -> (Option<ImmutPath>, FileResult<Bytes>);
99}
100
101#[derive(Clone)]
102pub struct SharedAccessModel<M> {
103    pub inner: Arc<RwLock<M>>,
104}
105
106impl<M> SharedAccessModel<M> {
107    pub fn new(inner: M) -> Self {
108        Self {
109            inner: Arc::new(RwLock::new(inner)),
110        }
111    }
112}
113
114impl<M> PathAccessModel for SharedAccessModel<M>
115where
116    M: PathAccessModel,
117{
118    #[inline]
119    fn reset(&mut self) {
120        self.inner.write().reset();
121    }
122
123    fn content(&self, src: &Path) -> FileResult<Bytes> {
124        self.inner.read().content(src)
125    }
126}
127
128type VfsPathAccessModel<M> = OverlayAccessModel<ImmutPath, NotifyAccessModel<SharedAccessModel<M>>>;
129/// we add notify access model here since notify access model doesn't introduce
130/// overheads by our observation
131type VfsAccessModel<M> = OverlayAccessModel<TypstFileId, ResolveAccessModel<VfsPathAccessModel<M>>>;
132
133pub trait FsProvider {
134    /// Arbitrary one of file path corresponding to the given `id`.
135    fn file_path(&self, id: TypstFileId) -> FileResult<PathResolution>;
136
137    fn read(&self, id: TypstFileId) -> FileResult<Bytes>;
138    fn read_source(&self, id: TypstFileId) -> FileResult<Source>;
139}
140
141struct SourceEntry {
142    last_accessed_rev: NonZeroUsize,
143    source: FileResult<Source>,
144}
145
146#[derive(Default)]
147struct SourceIdShard {
148    last_accessed_rev: usize,
149    recent_source: Option<Source>,
150    sources: FxHashMap<Bytes, SourceEntry>,
151}
152
153#[derive(Default, Clone)]
154pub struct SourceCache {
155    /// The cache entries for each paths
156    cache_entries: Arc<FxDashMap<TypstFileId, SourceIdShard>>,
157}
158
159impl SourceCache {
160    pub fn evict(&self, curr: NonZeroUsize, threshold: usize) {
161        self.cache_entries.retain(|_, shard| {
162            let diff = curr.get().saturating_sub(shard.last_accessed_rev);
163            if diff > threshold {
164                return false;
165            }
166
167            shard.sources.retain(|_, entry| {
168                let diff = curr.get().saturating_sub(entry.last_accessed_rev.get());
169                diff <= threshold
170            });
171
172            true
173        });
174    }
175}
176
177/// Create a new `Vfs` harnessing over the given `access_model` specific for
178/// `reflexo_world::CompilerWorld`. With vfs, we can minimize the
179/// implementation overhead for [`AccessModel`] trait.
180pub struct Vfs<M: PathAccessModel + Sized> {
181    source_cache: SourceCache,
182    // The slots for all the files during a single lifecycle.
183    // pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
184    managed: Arc<Mutex<EntryMap>>,
185    paths: Arc<Mutex<PathMap>>,
186    revision: NonZeroUsize,
187    // access_model: TraceAccessModel<VfsAccessModel<M>>,
188    /// The wrapped access model.
189    access_model: VfsAccessModel<M>,
190}
191
192impl<M: PathAccessModel + Sized> fmt::Debug for Vfs<M> {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        f.debug_struct("Vfs")
195            .field("revision", &self.revision)
196            .field("managed", &self.managed.lock().entries.size())
197            .field("paths", &self.paths.lock().paths.len())
198            .finish()
199    }
200}
201
202impl<M: PathAccessModel + Clone + Sized> Vfs<M> {
203    pub fn revision(&self) -> NonZeroUsize {
204        self.revision
205    }
206
207    pub fn snapshot(&self) -> Self {
208        Self {
209            source_cache: self.source_cache.clone(),
210            managed: self.managed.clone(),
211            paths: self.paths.clone(),
212            revision: self.revision,
213            access_model: self.access_model.clone(),
214        }
215    }
216
217    pub fn fork(&self) -> Self {
218        Self {
219            source_cache: self.source_cache.clone(),
220            managed: Arc::new(Mutex::new(EntryMap::default())),
221            paths: Arc::new(Mutex::new(PathMap::default())),
222            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
223            access_model: self.access_model.clone(),
224        }
225    }
226
227    pub fn is_clean_compile(&self, rev: usize, file_ids: &[FileId]) -> bool {
228        let mut m = self.managed.lock();
229        for id in file_ids {
230            let Some(entry) = m.entries.get_mut(id) else {
231                log::info!("Vfs(dirty, {id:?}): file id not found");
232                return false;
233            };
234            if entry.changed_at > rev {
235                log::info!("Vfs(dirty, {id:?}): rev {rev:?} => {:?}", entry.changed_at);
236                return false;
237            }
238            log::debug!(
239                "Vfs(clean, {id:?}, rev={rev}, changed_at={})",
240                entry.changed_at
241            );
242        }
243        true
244    }
245}
246
247impl<M: PathAccessModel + Sized> Vfs<M> {
248    /// Create a new `Vfs` with a given `access_model`.
249    ///
250    /// Retrieving an [`AccessModel`], it will further wrap the access model
251    /// with [`OverlayAccessModel`] and [`NotifyAccessModel`]. This means that
252    /// you don't need to implement:
253    /// + overlay: allowing to shadow the underlying access model with memory
254    ///   contents, which is useful for a limited execution environment and
255    ///   instrumenting or overriding source files or packages.
256    /// + notify: regards problems of synchronizing with the file system when
257    ///   the vfs is watching the file system.
258    ///
259    /// See [`AccessModel`] for more information.
260    pub fn new(resolver: Arc<dyn RootResolver + Send + Sync>, access_model: M) -> Self {
261        let access_model = SharedAccessModel::new(access_model);
262        let access_model = NotifyAccessModel::new(access_model);
263        let access_model = OverlayAccessModel::new(access_model);
264        let access_model = ResolveAccessModel {
265            resolver,
266            inner: access_model,
267        };
268        let access_model = OverlayAccessModel::new(access_model);
269
270        // If you want to trace the access model, uncomment the following line
271        // let access_model = TraceAccessModel::new(access_model);
272
273        Self {
274            source_cache: SourceCache::default(),
275            managed: Arc::default(),
276            paths: Arc::default(),
277            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
278            access_model,
279        }
280    }
281
282    /// Reset all state.
283    pub fn reset_all(&mut self) {
284        self.reset_access_model();
285        self.reset_mapping();
286        self.take_source_cache();
287    }
288
289    /// Reset access model.
290    pub fn reset_access_model(&mut self) {
291        self.access_model.reset();
292    }
293
294    /// Reset all possible caches.
295    pub fn reset_mapping(&mut self) {
296        self.revise().reset_cache();
297    }
298
299    /// Clear the cache that is not touched for a long time.
300    pub fn evict(&mut self, threshold: usize) {
301        let mut m = self.managed.lock();
302        let rev = self.revision.get();
303        for (id, entry) in m.entries.clone().iter() {
304            let entry_rev = entry.bytes.get().map(|e| e.1).unwrap_or_default();
305            if entry_rev + threshold < rev {
306                m.entries.remove_mut(id);
307            }
308        }
309    }
310
311    pub fn take_source_cache(&mut self) -> SourceCache {
312        std::mem::take(&mut self.source_cache)
313    }
314
315    pub fn clone_source_cache(&self) -> SourceCache {
316        self.source_cache.clone()
317    }
318
319    /// Resolve the real path for a file id.
320    pub fn file_path(&self, id: TypstFileId) -> Result<PathResolution, FileError> {
321        self.access_model.inner.resolver.path_for_id(id)
322    }
323
324    /// Get paths to all the shadowing paths in [`OverlayAccessModel`].
325    pub fn shadow_paths(&self) -> Vec<ImmutPath> {
326        self.access_model.inner.inner.file_paths()
327    }
328
329    /// Get paths to all the shadowing file ids in [`OverlayAccessModel`].
330    ///
331    /// The in memory untitled files can have no path so
332    /// they only have file ids.
333    pub fn shadow_ids(&self) -> Vec<TypstFileId> {
334        self.access_model.file_paths()
335    }
336
337    /// Returns the overall memory usage for the stored files.
338    pub fn memory_usage(&self) -> usize {
339        0
340    }
341
342    pub fn revise(&mut self) -> RevisingVfs<M> {
343        let managed = self.managed.lock().clone();
344        let paths = self.paths.lock().clone();
345        let goal_revision = self.revision.checked_add(1).expect("revision overflowed");
346
347        RevisingVfs {
348            managed,
349            paths,
350            inner: self,
351            goal_revision,
352            view_changed: false,
353        }
354    }
355
356    pub fn display(&self) -> DisplayVfs<M> {
357        DisplayVfs { inner: self }
358    }
359
360    /// Reads a file.
361    pub fn read(&self, fid: TypstFileId) -> FileResult<Bytes> {
362        let bytes = self.managed.lock().slot(fid, |entry| entry.bytes.clone());
363
364        self.read_content(&bytes, fid).clone()
365    }
366
367    /// Reads a source.
368    pub fn source(&self, file_id: TypstFileId) -> FileResult<Source> {
369        let (bytes, source) = self
370            .managed
371            .lock()
372            .slot(file_id, |entry| (entry.bytes.clone(), entry.source.clone()));
373
374        let source = source.get_or_init(|| {
375            let content = self
376                .read_content(&bytes, file_id)
377                .as_ref()
378                .map_err(Clone::clone)?;
379
380            let mut cache_entry = self.source_cache.cache_entries.entry(file_id).or_default();
381            if let Some(source) = cache_entry.sources.get(content) {
382                return source.source.clone();
383            }
384
385            let source = (|| {
386                let prev = cache_entry.recent_source.clone();
387                let content = from_utf8_or_bom(content).map_err(|_| FileError::InvalidUtf8)?;
388
389                let next = match prev {
390                    Some(mut prev) => {
391                        prev.replace(content);
392                        prev
393                    }
394                    None => Source::new(file_id, content.to_owned()),
395                };
396
397                let should_update = cache_entry.recent_source.is_none()
398                    || cache_entry.last_accessed_rev < self.revision.get();
399                if should_update {
400                    cache_entry.recent_source = Some(next.clone());
401                }
402
403                Ok(next)
404            })();
405
406            let entry = cache_entry
407                .sources
408                .entry(content.clone())
409                .or_insert_with(|| SourceEntry {
410                    last_accessed_rev: self.revision,
411                    source: source.clone(),
412                });
413
414            if entry.last_accessed_rev < self.revision {
415                entry.last_accessed_rev = self.revision;
416            }
417
418            source
419        });
420
421        source.clone()
422    }
423
424    /// Reads and caches content of a file.
425    fn read_content<'a>(&self, bytes: &'a BytesQuery, fid: TypstFileId) -> &'a FileResult<Bytes> {
426        &bytes
427            .get_or_init(|| {
428                let (path, content) = self.access_model.content(fid);
429                if let Some(path) = path.as_ref() {
430                    self.paths.lock().insert(path, fid, self.revision);
431                }
432
433                (path, self.revision.get(), content)
434            })
435            .2
436    }
437}
438
439pub struct DisplayVfs<'a, M: PathAccessModel + Sized> {
440    inner: &'a Vfs<M>,
441}
442
443impl<M: PathAccessModel + Sized> fmt::Debug for DisplayVfs<'_, M> {
444    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
445        f.debug_struct("Vfs")
446            .field("revision", &self.inner.revision)
447            .field("managed", &self.inner.managed.lock().display())
448            .field("paths", &self.inner.paths.lock().display())
449            .finish()
450    }
451}
452
453pub struct RevisingVfs<'a, M: PathAccessModel + Sized> {
454    inner: &'a mut Vfs<M>,
455    managed: EntryMap,
456    paths: PathMap,
457    goal_revision: NonZeroUsize,
458    view_changed: bool,
459}
460
461impl<M: PathAccessModel + Sized> Drop for RevisingVfs<'_, M> {
462    fn drop(&mut self) {
463        if self.view_changed {
464            self.inner.managed = Arc::new(Mutex::new(std::mem::take(&mut self.managed)));
465            self.inner.paths = Arc::new(Mutex::new(std::mem::take(&mut self.paths)));
466            let revision = &mut self.inner.revision;
467            *revision = self.goal_revision;
468        }
469    }
470}
471
472impl<M: PathAccessModel + Sized> RevisingVfs<'_, M> {
473    pub fn vfs(&mut self) -> &mut Vfs<M> {
474        self.inner
475    }
476
477    fn am(&mut self) -> &mut VfsAccessModel<M> {
478        &mut self.inner.access_model
479    }
480
481    fn invalidate_path(&mut self, path: &Path) {
482        if let Some(fids) = self.paths.get(path) {
483            if fids.is_empty() {
484                return;
485            }
486
487            self.view_changed = true;
488            for fid in fids.clone() {
489                self.invalidate_file_id(fid);
490            }
491        }
492    }
493
494    fn invalidate_file_id(&mut self, file_id: TypstFileId) {
495        self.view_changed = true;
496        self.managed.slot(file_id, |e| {
497            e.changed_at = self.goal_revision.get();
498            e.bytes = Arc::default();
499            e.source = Arc::default();
500        });
501    }
502
503    /// Reset the shadowing files in [`OverlayAccessModel`].
504    pub fn reset_shadow(&mut self) {
505        for path in self.am().inner.inner.file_paths() {
506            self.invalidate_path(&path);
507        }
508        for fid in self.am().file_paths() {
509            self.invalidate_file_id(fid);
510        }
511
512        self.am().clear_shadow();
513        self.am().inner.inner.clear_shadow();
514    }
515
516    /// Reset all caches. This can happen when:
517    /// - package paths are reconfigured.
518    /// - The root of the workspace is switched.
519    pub fn reset_cache(&mut self) {
520        self.view_changed = true;
521        self.managed = EntryMap::default();
522        self.paths = PathMap::default();
523    }
524
525    /// Add a shadowing file to the [`OverlayAccessModel`].
526    pub fn map_shadow(&mut self, path: &Path, snap: FileSnapshot) -> FileResult<()> {
527        self.view_changed = true;
528        self.invalidate_path(path);
529        self.am().inner.inner.add_file(path, snap, |c| c.into());
530
531        Ok(())
532    }
533
534    /// Remove a shadowing file from the [`OverlayAccessModel`].
535    pub fn unmap_shadow(&mut self, path: &Path) -> FileResult<()> {
536        self.view_changed = true;
537        self.invalidate_path(path);
538        self.am().inner.inner.remove_file(path);
539
540        Ok(())
541    }
542
543    /// Add a shadowing file to the [`OverlayAccessModel`] by file id.
544    pub fn map_shadow_by_id(&mut self, file_id: TypstFileId, snap: FileSnapshot) -> FileResult<()> {
545        self.view_changed = true;
546        self.invalidate_file_id(file_id);
547        self.am().add_file(&file_id, snap, |c| *c);
548
549        Ok(())
550    }
551
552    /// Remove a shadowing file from the [`OverlayAccessModel`] by file id.
553    pub fn remove_shadow_by_id(&mut self, file_id: TypstFileId) {
554        self.view_changed = true;
555        self.invalidate_file_id(file_id);
556        self.am().remove_file(&file_id);
557    }
558
559    /// Let the vfs notify the access model with a filesystem event.
560    ///
561    /// See [`NotifyAccessModel`] for more information.
562    pub fn notify_fs_event(&mut self, event: FilesystemEvent) {
563        self.notify_fs_changes(event.split().0);
564    }
565    /// Let the vfs notify the access model with a filesystem changes.
566    ///
567    /// See [`NotifyAccessModel`] for more information.
568    pub fn notify_fs_changes(&mut self, event: FileChangeSet) {
569        for path in &event.removes {
570            self.invalidate_path(path);
571        }
572        for (path, _) in &event.inserts {
573            self.invalidate_path(path);
574        }
575
576        self.am().inner.inner.inner.notify(event);
577    }
578}
579
580type BytesQuery = Arc<OnceLock<(Option<ImmutPath>, usize, FileResult<Bytes>)>>;
581
582#[derive(Debug, Clone, Default)]
583struct VfsEntry {
584    changed_at: usize,
585    bytes: BytesQuery,
586    source: Arc<OnceLock<FileResult<Source>>>,
587}
588
589#[derive(Debug, Clone, Default)]
590struct EntryMap {
591    entries: RedBlackTreeMapSync<TypstFileId, VfsEntry>,
592}
593
594impl EntryMap {
595    /// Read a slot.
596    #[inline(always)]
597    fn slot<T>(&mut self, path: TypstFileId, f: impl FnOnce(&mut VfsEntry) -> T) -> T {
598        if let Some(entry) = self.entries.get_mut(&path) {
599            f(entry)
600        } else {
601            let mut entry = VfsEntry::default();
602            let res = f(&mut entry);
603            self.entries.insert_mut(path, entry);
604            res
605        }
606    }
607
608    fn display(&self) -> DisplayEntryMap {
609        DisplayEntryMap { map: self }
610    }
611}
612
613pub struct DisplayEntryMap<'a> {
614    map: &'a EntryMap,
615}
616
617impl fmt::Debug for DisplayEntryMap<'_> {
618    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
619        f.debug_map().entries(self.map.entries.iter()).finish()
620    }
621}
622
623#[derive(Debug, Clone, Default)]
624struct PathMap {
625    paths: FxHashMap<ImmutPath, EcoVec<TypstFileId>>,
626    file_ids: FxHashMap<TypstFileId, (ImmutPath, NonZeroUsize)>,
627}
628
629impl PathMap {
630    fn insert(&mut self, next: &ImmutPath, fid: TypstFileId, rev: NonZeroUsize) {
631        use std::collections::hash_map::Entry;
632        let rev_entry = self.file_ids.entry(fid);
633
634        match rev_entry {
635            Entry::Occupied(mut entry) => {
636                let (prev, prev_rev) = entry.get_mut();
637                if prev != next {
638                    if *prev_rev == rev {
639                        log::warn!("Vfs: {fid:?} is changed in rev({rev:?}), {prev:?} -> {next:?}");
640                    }
641
642                    if let Some(fids) = self.paths.get_mut(prev) {
643                        fids.retain(|f| *f != fid);
644                    }
645
646                    *prev = next.clone();
647                    *prev_rev = rev;
648
649                    self.paths.entry(next.clone()).or_default().push(fid);
650                }
651            }
652            Entry::Vacant(entry) => {
653                entry.insert((next.clone(), rev));
654                self.paths.entry(next.clone()).or_default().push(fid);
655            }
656        }
657    }
658
659    fn get(&mut self, path: &Path) -> Option<&EcoVec<TypstFileId>> {
660        self.paths.get(path)
661    }
662
663    fn display(&self) -> DisplayPathMap {
664        DisplayPathMap { map: self }
665    }
666}
667
668pub struct DisplayPathMap<'a> {
669    map: &'a PathMap,
670}
671
672impl fmt::Debug for DisplayPathMap<'_> {
673    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
674        f.debug_map().entries(self.map.paths.iter()).finish()
675    }
676}
677
678/// Convert a byte slice to a string, removing UTF-8 BOM if present.
679fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
680    Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
681        // remove UTF-8 BOM
682        &buf[3..]
683    } else {
684        // Assume UTF-8
685        buf
686    })?)
687}
688
689#[cfg(test)]
690mod tests {
691    fn is_send<T: Send>() {}
692    fn is_sync<T: Sync>() {}
693
694    #[test]
695    fn test_vfs_send_sync() {
696        is_send::<super::Vfs<super::dummy::DummyAccessModel>>();
697        is_sync::<super::Vfs<super::dummy::DummyAccessModel>>();
698    }
699}