reflexo_world/
source.rs

1// use std::sync::Arc;
2
3use core::fmt;
4use std::{num::NonZeroUsize, sync::Arc};
5
6use parking_lot::{Mutex, RwLock};
7use reflexo::hash::FxHashMap;
8use reflexo::{ImmutPath, QueryRef};
9use reflexo_vfs::{Bytes, FileId, FsProvider, TypstFileId};
10use typst::{
11    diag::{FileError, FileResult},
12    syntax::Source,
13};
14
15/// incrementally query a value from a self holding state
16type IncrQueryRef<S, E> = QueryRef<S, E, Option<S>>;
17
18type FileQuery<T> = QueryRef<T, FileError>;
19type IncrFileQuery<T> = IncrQueryRef<T, FileError>;
20
21pub trait Revised {
22    fn last_accessed_rev(&self) -> NonZeroUsize;
23}
24
25pub struct SharedState<T> {
26    pub committed_revision: Option<usize>,
27    // todo: fine-grained lock
28    /// The cache entries for each paths
29    cache_entries: FxHashMap<TypstFileId, T>,
30}
31
32impl<T> fmt::Debug for SharedState<T> {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        f.debug_struct("SharedState")
35            .field("committed_revision", &self.committed_revision)
36            .finish()
37    }
38}
39
40impl<T> Default for SharedState<T> {
41    fn default() -> Self {
42        SharedState {
43            committed_revision: None,
44            cache_entries: FxHashMap::default(),
45        }
46    }
47}
48
49impl<T: Revised> SharedState<T> {
50    fn gc(&mut self) {
51        let committed = self.committed_revision.unwrap_or(0);
52        self.cache_entries
53            .retain(|_, v| committed.saturating_sub(v.last_accessed_rev().get()) <= 30);
54    }
55}
56
57pub struct SourceCache {
58    last_accessed_rev: NonZeroUsize,
59    fid: FileId,
60    source: IncrFileQuery<Source>,
61    buffer: FileQuery<Bytes>,
62}
63
64impl fmt::Debug for SourceCache {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        f.debug_struct("SourceCache").finish()
67    }
68}
69
70impl Revised for SourceCache {
71    fn last_accessed_rev(&self) -> NonZeroUsize {
72        self.last_accessed_rev
73    }
74}
75
76pub struct SourceState {
77    pub revision: NonZeroUsize,
78    pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
79}
80
81impl SourceState {
82    pub fn commit_impl(self, state: &mut SharedState<SourceCache>) {
83        log::debug!("drop source db revision {}", self.revision);
84
85        if let Ok(slots) = Arc::try_unwrap(self.slots) {
86            // todo: utilize the committed revision is not zero
87            if state
88                .committed_revision
89                .map_or(false, |committed| committed >= self.revision.get())
90            {
91                return;
92            }
93
94            log::debug!("committing source db revision {}", self.revision);
95            state.committed_revision = Some(self.revision.get());
96            state.cache_entries = slots.into_inner();
97            state.gc();
98        }
99    }
100}
101
102#[derive(Clone)]
103pub struct SourceDb {
104    pub revision: NonZeroUsize,
105    pub shared: Arc<RwLock<SharedState<SourceCache>>>,
106    /// The slots for all the files during a single lifecycle.
107    pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
108    /// Whether to reparse the file when it is changed.
109    /// Default to `true`.
110    pub do_reparse: bool,
111}
112
113impl fmt::Debug for SourceDb {
114    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115        f.debug_struct("SourceDb").finish()
116    }
117}
118
119impl SourceDb {
120    pub fn take_state(&mut self) -> SourceState {
121        SourceState {
122            revision: self.revision,
123            slots: std::mem::take(&mut self.slots),
124        }
125    }
126
127    /// Set the `do_reparse` flag that indicates whether to reparsing the file
128    /// instead of creating a new [`Source`] when the file is changed.
129    /// Default to `true`.
130    ///
131    /// You usually want to set this flag to `true` for better performance.
132    /// However, one could disable this flag for debugging purpose.
133    pub fn set_do_reparse(&mut self, do_reparse: bool) {
134        self.do_reparse = do_reparse;
135    }
136
137    /// Returns the overall memory usage for the stored files.
138    pub fn memory_usage(&self) -> usize {
139        let mut w = self.slots.lock().len() * core::mem::size_of::<SourceCache>();
140        w += self
141            .slots
142            .lock()
143            .iter()
144            .map(|(_, slot)| {
145                slot.source
146                    .get_uninitialized()
147                    .and_then(|e| e.as_ref().ok())
148                    .map_or(16, |e| e.text().len() * 8)
149                    + slot
150                        .buffer
151                        .get_uninitialized()
152                        .and_then(|e| e.as_ref().ok())
153                        .map_or(16, |e| e.len())
154            })
155            .sum::<usize>();
156
157        w
158    }
159
160    /// Get all the files that are currently in the VFS.
161    ///
162    /// This is typically corresponds to the file dependencies of a single
163    /// compilation.
164    ///
165    /// When you don't reset the vfs for each compilation, this function will
166    /// still return remaining files from the previous compilation.
167    pub fn iter_dependencies_dyn<'a>(
168        &'a self,
169        p: &'a impl FsProvider,
170        f: &mut dyn FnMut(ImmutPath),
171    ) {
172        for slot in self.slots.lock().iter() {
173            f(p.file_path(slot.1.fid));
174        }
175    }
176
177    /// Get file content by path.
178    pub fn file(&self, id: TypstFileId, fid: FileId, p: &impl FsProvider) -> FileResult<Bytes> {
179        self.slot(id, fid, |slot| slot.buffer.compute(|| p.read(fid)).cloned())
180    }
181
182    /// Get source content by path and assign the source with a given typst
183    /// global file id.
184    ///
185    /// See `Vfs::resolve_with_f` for more information.
186    pub fn source(&self, id: TypstFileId, fid: FileId, p: &impl FsProvider) -> FileResult<Source> {
187        self.slot(id, fid, |slot| {
188            slot.source
189                .compute_with_context(|prev| {
190                    let content = p.read(fid)?;
191                    let next = from_utf8_or_bom(&content)?.to_owned();
192
193                    // otherwise reparse the source
194                    match prev {
195                        Some(mut source) if self.do_reparse => {
196                            source.replace(&next);
197                            Ok(source)
198                        }
199                        // Return a new source if we don't have a reparse feature or no prev
200                        _ => Ok(Source::new(id, next)),
201                    }
202                })
203                .cloned()
204        })
205    }
206
207    /// Insert a new slot into the vfs.
208    fn slot<T>(&self, id: TypstFileId, fid: FileId, f: impl FnOnce(&SourceCache) -> T) -> T {
209        let mut slots = self.slots.lock();
210        f(slots.entry(id).or_insert_with(|| {
211            let state = self.shared.read();
212            let cache_entry = state.cache_entries.get(&id);
213
214            cache_entry
215                .map(|e| SourceCache {
216                    last_accessed_rev: self.revision.max(e.last_accessed_rev),
217                    fid,
218                    source: IncrFileQuery::with_context(
219                        e.source
220                            .get_uninitialized()
221                            .cloned()
222                            .transpose()
223                            .ok()
224                            .flatten(),
225                    ),
226                    buffer: FileQuery::default(),
227                })
228                .unwrap_or_else(|| SourceCache {
229                    last_accessed_rev: self.revision,
230                    fid,
231                    source: IncrFileQuery::with_context(None),
232                    buffer: FileQuery::default(),
233                })
234        }))
235    }
236}
237
238pub trait MergeCache: Sized {
239    fn merge(self, _other: Self) -> Self {
240        self
241    }
242}
243
244pub struct FontDb {}
245pub struct PackageDb {}
246
247/// Convert a byte slice to a string, removing UTF-8 BOM if present.
248fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
249    Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
250        // remove UTF-8 BOM
251        &buf[3..]
252    } else {
253        // Assume UTF-8
254        buf
255    })?)
256}