reflexo_world/
source.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
// use std::sync::Arc;

use core::fmt;
use std::{num::NonZeroUsize, sync::Arc};

use parking_lot::{Mutex, RwLock};
use reflexo::hash::FxHashMap;
use reflexo::{ImmutPath, QueryRef};
use reflexo_vfs::{Bytes, FileId, FsProvider, TypstFileId};
use typst::{
    diag::{FileError, FileResult},
    syntax::Source,
};

/// incrementally query a value from a self holding state
type IncrQueryRef<S, E> = QueryRef<S, E, Option<S>>;

type FileQuery<T> = QueryRef<T, FileError>;
type IncrFileQuery<T> = IncrQueryRef<T, FileError>;

pub trait Revised {
    fn last_accessed_rev(&self) -> NonZeroUsize;
}

pub struct SharedState<T> {
    pub committed_revision: Option<usize>,
    // todo: fine-grained lock
    /// The cache entries for each paths
    cache_entries: FxHashMap<TypstFileId, T>,
}

impl<T> fmt::Debug for SharedState<T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("SharedState")
            .field("committed_revision", &self.committed_revision)
            .finish()
    }
}

impl<T> Default for SharedState<T> {
    fn default() -> Self {
        SharedState {
            committed_revision: None,
            cache_entries: FxHashMap::default(),
        }
    }
}

impl<T: Revised> SharedState<T> {
    fn gc(&mut self) {
        let committed = self.committed_revision.unwrap_or(0);
        self.cache_entries
            .retain(|_, v| committed.saturating_sub(v.last_accessed_rev().get()) <= 30);
    }
}

pub struct SourceCache {
    last_accessed_rev: NonZeroUsize,
    fid: FileId,
    source: IncrFileQuery<Source>,
    buffer: FileQuery<Bytes>,
}

impl fmt::Debug for SourceCache {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("SourceCache").finish()
    }
}

impl Revised for SourceCache {
    fn last_accessed_rev(&self) -> NonZeroUsize {
        self.last_accessed_rev
    }
}

pub struct SourceState {
    pub revision: NonZeroUsize,
    pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
}

impl SourceState {
    pub fn commit_impl(self, state: &mut SharedState<SourceCache>) {
        log::debug!("drop source db revision {}", self.revision);

        if let Ok(slots) = Arc::try_unwrap(self.slots) {
            // todo: utilize the committed revision is not zero
            if state
                .committed_revision
                .map_or(false, |committed| committed >= self.revision.get())
            {
                return;
            }

            log::debug!("committing source db revision {}", self.revision);
            state.committed_revision = Some(self.revision.get());
            state.cache_entries = slots.into_inner();
            state.gc();
        }
    }
}

#[derive(Clone)]
pub struct SourceDb {
    pub revision: NonZeroUsize,
    pub shared: Arc<RwLock<SharedState<SourceCache>>>,
    /// The slots for all the files during a single lifecycle.
    pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
    /// Whether to reparse the file when it is changed.
    /// Default to `true`.
    pub do_reparse: bool,
}

impl fmt::Debug for SourceDb {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("SourceDb").finish()
    }
}

impl SourceDb {
    pub fn take_state(&mut self) -> SourceState {
        SourceState {
            revision: self.revision,
            slots: std::mem::take(&mut self.slots),
        }
    }

    /// Set the `do_reparse` flag that indicates whether to reparsing the file
    /// instead of creating a new [`Source`] when the file is changed.
    /// Default to `true`.
    ///
    /// You usually want to set this flag to `true` for better performance.
    /// However, one could disable this flag for debugging purpose.
    pub fn set_do_reparse(&mut self, do_reparse: bool) {
        self.do_reparse = do_reparse;
    }

    /// Returns the overall memory usage for the stored files.
    pub fn memory_usage(&self) -> usize {
        let mut w = self.slots.lock().len() * core::mem::size_of::<SourceCache>();
        w += self
            .slots
            .lock()
            .iter()
            .map(|(_, slot)| {
                slot.source
                    .get_uninitialized()
                    .and_then(|e| e.as_ref().ok())
                    .map_or(16, |e| e.text().len() * 8)
                    + slot
                        .buffer
                        .get_uninitialized()
                        .and_then(|e| e.as_ref().ok())
                        .map_or(16, |e| e.len())
            })
            .sum::<usize>();

        w
    }

    /// Get all the files that are currently in the VFS.
    ///
    /// This is typically corresponds to the file dependencies of a single
    /// compilation.
    ///
    /// When you don't reset the vfs for each compilation, this function will
    /// still return remaining files from the previous compilation.
    pub fn iter_dependencies_dyn<'a>(
        &'a self,
        p: &'a impl FsProvider,
        f: &mut dyn FnMut(ImmutPath),
    ) {
        for slot in self.slots.lock().iter() {
            f(p.file_path(slot.1.fid));
        }
    }

    /// Get file content by path.
    pub fn file(&self, id: TypstFileId, fid: FileId, p: &impl FsProvider) -> FileResult<Bytes> {
        self.slot(id, fid, |slot| slot.buffer.compute(|| p.read(fid)).cloned())
    }

    /// Get source content by path and assign the source with a given typst
    /// global file id.
    ///
    /// See `Vfs::resolve_with_f` for more information.
    pub fn source(&self, id: TypstFileId, fid: FileId, p: &impl FsProvider) -> FileResult<Source> {
        self.slot(id, fid, |slot| {
            slot.source
                .compute_with_context(|prev| {
                    let content = p.read(fid)?;
                    let next = from_utf8_or_bom(&content)?.to_owned();

                    // otherwise reparse the source
                    match prev {
                        Some(mut source) if self.do_reparse => {
                            source.replace(&next);
                            Ok(source)
                        }
                        // Return a new source if we don't have a reparse feature or no prev
                        _ => Ok(Source::new(id, next)),
                    }
                })
                .cloned()
        })
    }

    /// Insert a new slot into the vfs.
    fn slot<T>(&self, id: TypstFileId, fid: FileId, f: impl FnOnce(&SourceCache) -> T) -> T {
        let mut slots = self.slots.lock();
        f(slots.entry(id).or_insert_with(|| {
            let state = self.shared.read();
            let cache_entry = state.cache_entries.get(&id);

            cache_entry
                .map(|e| SourceCache {
                    last_accessed_rev: self.revision.max(e.last_accessed_rev),
                    fid,
                    source: IncrFileQuery::with_context(
                        e.source
                            .get_uninitialized()
                            .cloned()
                            .transpose()
                            .ok()
                            .flatten(),
                    ),
                    buffer: FileQuery::default(),
                })
                .unwrap_or_else(|| SourceCache {
                    last_accessed_rev: self.revision,
                    fid,
                    source: IncrFileQuery::with_context(None),
                    buffer: FileQuery::default(),
                })
        }))
    }
}

pub trait MergeCache: Sized {
    fn merge(self, _other: Self) -> Self {
        self
    }
}

pub struct FontDb {}
pub struct PackageDb {}

/// Convert a byte slice to a string, removing UTF-8 BOM if present.
fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
    Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
        // remove UTF-8 BOM
        &buf[3..]
    } else {
        // Assume UTF-8
        buf
    })?)
}