tytanic_core/world_builder/
file.rs

1use std::collections::HashMap;
2use std::fmt::Debug;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Mutex;
6use std::sync::MutexGuard;
7
8use ecow::eco_format;
9use typst::diag::FileError;
10use typst::diag::FileResult;
11use typst::diag::PackageError;
12use typst::foundations::Bytes;
13use typst::syntax::FileId;
14use typst::syntax::Source;
15use typst_kit::download::Progress;
16use typst_kit::package::PackageStorage;
17
18use super::ProvideFile;
19
20/// Provides access to files from memory.
21#[derive(Debug)]
22pub struct VirtualFileProvider {
23    slots: Mutex<HashMap<FileId, VirtualFileSlot>>,
24}
25
26impl VirtualFileProvider {
27    /// Creates a new file provider with no files.
28    pub fn new() -> Self {
29        Self::from_slots(HashMap::new())
30    }
31
32    /// Creates a new file provider with the given file slots.
33    pub fn from_slots(slots: HashMap<FileId, VirtualFileSlot>) -> Self {
34        Self {
35            slots: Mutex::new(slots),
36        }
37    }
38}
39
40impl VirtualFileProvider {
41    /// The slots used to store file contents.
42    pub fn slots(&self) -> MutexGuard<'_, HashMap<FileId, VirtualFileSlot>> {
43        self.slots.lock().unwrap()
44    }
45
46    /// The slots used to store file contents.
47    pub fn slots_mut(&mut self) -> &mut HashMap<FileId, VirtualFileSlot> {
48        self.slots.get_mut().unwrap()
49    }
50}
51
52impl VirtualFileProvider {
53    /// Access the canonical slot for the given file id.
54    pub fn slot<T, F>(&self, id: FileId, f: F) -> FileResult<T>
55    where
56        F: FnOnce(&VirtualFileSlot) -> T,
57    {
58        let map = self.slots.lock().unwrap();
59        map.get(&id)
60            .map(f)
61            .ok_or_else(|| FileError::NotFound(id.vpath().as_rooted_path().to_owned()))
62    }
63}
64
65impl Default for VirtualFileProvider {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71impl ProvideFile for VirtualFileProvider {
72    fn provide_source(&self, id: FileId, _progress: &mut dyn Progress) -> FileResult<Source> {
73        self.slot(id, |slot| slot.source())?
74            .ok_or_else(|| FileError::NotSource)
75    }
76
77    fn provide_bytes(&self, id: FileId, _progress: &mut dyn Progress) -> FileResult<Bytes> {
78        self.slot(id, |slot| slot.bytes())
79    }
80
81    fn reset_all(&self) {}
82}
83
84/// Holds the processed data for a file ID.
85///
86/// Is eagerly populated with data (unlike [`FileSlot`]).
87#[derive(Debug)]
88pub struct VirtualFileSlot {
89    id: FileId,
90    source: Option<Source>,
91    bytes: Bytes,
92}
93
94impl VirtualFileSlot {
95    /// Create a new source file with the given source code.
96    pub fn from_source(source: Source) -> Self {
97        Self {
98            id: source.id(),
99            bytes: Bytes::new(source.text().as_bytes().to_vec()),
100            source: Some(source),
101        }
102    }
103
104    /// Create a new generic file with the given bytes.
105    pub fn from_bytes<T>(id: FileId, bytes: T) -> Self
106    where
107        T: AsRef<[u8]> + Send + Sync + 'static,
108    {
109        Self {
110            id,
111            bytes: Bytes::new(bytes),
112            source: None,
113        }
114    }
115}
116
117impl VirtualFileSlot {
118    /// The file id of this file.
119    pub fn id(&self) -> FileId {
120        self.id
121    }
122
123    /// The optional source of this file.
124    pub fn source(&self) -> Option<Source> {
125        self.source.clone()
126    }
127
128    /// The bytes of this file.
129    pub fn bytes(&self) -> Bytes {
130        self.bytes.clone()
131    }
132}
133
134/// Provides access to files from the filesystem.
135#[derive(Debug)]
136pub struct FilesystemFileProvider {
137    root: PathBuf,
138    slots: Mutex<HashMap<FileId, FileSlot>>,
139    package_storage: Option<PackageStorage>,
140}
141
142impl FilesystemFileProvider {
143    /// Creates a new file provider for the given project root.
144    ///
145    /// The package storage will be used to download and prepare packages.
146    pub fn new<P>(root: P, package_storage: Option<PackageStorage>) -> Self
147    where
148        P: Into<PathBuf>,
149    {
150        Self {
151            root: root.into(),
152            slots: Mutex::new(HashMap::new()),
153            package_storage,
154        }
155    }
156}
157
158impl FilesystemFileProvider {
159    /// The project root.
160    pub fn root(&self) -> &Path {
161        &self.root
162    }
163
164    /// The slots used to store file contents.
165    pub fn slots(&self) -> MutexGuard<'_, HashMap<FileId, FileSlot>> {
166        self.slots.lock().unwrap()
167    }
168
169    /// The slots used to store file contents.
170    pub fn slots_mut(&mut self) -> &mut HashMap<FileId, FileSlot> {
171        self.slots.get_mut().unwrap()
172    }
173
174    /// The package storage if one is given.
175    pub fn package_storage(&self) -> Option<&PackageStorage> {
176        self.package_storage.as_ref()
177    }
178}
179
180impl FilesystemFileProvider {
181    /// Reset the slots for the next compilation.
182    pub fn reset_slots(&self) {
183        for slot in self.slots.lock().unwrap().values_mut() {
184            slot.reset();
185        }
186    }
187
188    /// Access the canonical slot for the given file id.
189    pub fn slot<F, T>(&self, id: FileId, f: F) -> T
190    where
191        F: FnOnce(&mut FileSlot) -> T,
192    {
193        let mut map = self.slots.lock().unwrap();
194        f(map.entry(id).or_insert_with(|| FileSlot::new(id)))
195    }
196}
197
198impl ProvideFile for FilesystemFileProvider {
199    fn provide_source(&self, id: FileId, progress: &mut dyn Progress) -> FileResult<Source> {
200        self.slot(id, |slot| {
201            slot.source(self.root(), self.package_storage(), progress)
202        })
203    }
204
205    fn provide_bytes(&self, id: FileId, progress: &mut dyn Progress) -> FileResult<Bytes> {
206        self.slot(id, |slot| {
207            slot.bytes(self.root(), self.package_storage(), progress)
208        })
209    }
210
211    fn reset_all(&self) {
212        self.reset_slots();
213    }
214}
215
216/// Holds the processed data for a file ID.
217///
218/// Both fields can be populated if the file is both imported and read().
219#[derive(Debug)]
220pub struct FileSlot {
221    /// The slot's file id.
222    id: FileId,
223    /// The lazily loaded and incrementally updated source file.
224    source: SlotCell<Source>,
225    /// The lazily loaded raw byte buffer.
226    file: SlotCell<Bytes>,
227}
228
229impl FileSlot {
230    /// Create a new file slot.
231    pub fn new(id: FileId) -> Self {
232        Self {
233            id,
234            file: SlotCell::new(),
235            source: SlotCell::new(),
236        }
237    }
238
239    /// Marks the file as not yet accessed in preparation of the next
240    /// compilation.
241    pub fn reset(&mut self) {
242        self.source.reset();
243        self.file.reset();
244    }
245
246    /// Retrieve the source for this file.
247    pub fn source(
248        &mut self,
249        root: &Path,
250        package_storage: Option<&PackageStorage>,
251        progress: &mut dyn Progress,
252    ) -> FileResult<Source> {
253        self.source.get_or_init(
254            || read(self.id, root, package_storage, progress),
255            |data, prev| {
256                let text = decode_utf8(&data)?;
257                if let Some(mut prev) = prev {
258                    prev.replace(text);
259                    Ok(prev)
260                } else {
261                    Ok(Source::new(self.id, text.into()))
262                }
263            },
264        )
265    }
266
267    /// Retrieve the file's bytes.
268    pub fn bytes(
269        &mut self,
270        root: &Path,
271        package_storage: Option<&PackageStorage>,
272        progress: &mut dyn Progress,
273    ) -> FileResult<Bytes> {
274        self.file.get_or_init(
275            || read(self.id, root, package_storage, progress),
276            |data, _| Ok(Bytes::new(data)),
277        )
278    }
279}
280
281/// Lazily processes data for a file.
282#[derive(Debug)]
283struct SlotCell<T> {
284    /// The processed data.
285    data: Option<FileResult<T>>,
286    /// A hash of the raw file contents / access error.
287    fingerprint: u128,
288    /// Whether the slot has been accessed in the current compilation.
289    accessed: bool,
290}
291
292impl<T: Clone> SlotCell<T> {
293    /// Creates a new, empty cell.
294    fn new() -> Self {
295        Self {
296            data: None,
297            fingerprint: 0,
298            accessed: false,
299        }
300    }
301
302    /// Marks the cell as not yet accessed in preparation of the next
303    /// compilation.
304    fn reset(&mut self) {
305        self.accessed = false;
306    }
307
308    /// Gets the contents of the cell or initialize them.
309    fn get_or_init(
310        &mut self,
311        load: impl FnOnce() -> FileResult<Vec<u8>>,
312        f: impl FnOnce(Vec<u8>, Option<T>) -> FileResult<T>,
313    ) -> FileResult<T> {
314        // If we accessed the file already in this compilation, retrieve it.
315        if std::mem::replace(&mut self.accessed, true) {
316            if let Some(data) = &self.data {
317                return data.clone();
318            }
319        }
320
321        // Read and hash the file.
322        let result = load();
323        let fingerprint = typst::utils::hash128(&result);
324
325        // If the file contents didn't change, yield the old processed data.
326        if std::mem::replace(&mut self.fingerprint, fingerprint) == fingerprint {
327            if let Some(data) = &self.data {
328                return data.clone();
329            }
330        }
331
332        let prev = self.data.take().and_then(Result::ok);
333        let value = result.and_then(|data| f(data, prev));
334        self.data = Some(value.clone());
335
336        value
337    }
338}
339
340/// Resolves the path of a file id on the system, downloading a package if
341/// necessary.
342fn system_path(
343    root: &Path,
344    id: FileId,
345    package_storage: Option<&PackageStorage>,
346    progress: &mut dyn Progress,
347) -> FileResult<PathBuf> {
348    // Determine the root path relative to which the file path
349    // will be resolved.
350    let buf;
351    let mut root = root;
352
353    match (id.package(), package_storage) {
354        (Some(spec), Some(storage)) => {
355            tracing::trace!(?spec, "preparing package");
356            buf = storage.prepare_package(spec, progress)?;
357            root = &buf;
358        }
359        (Some(spec), None) => {
360            tracing::error!(?spec, "cannot prepare package, no package storage provided");
361            return Err(FileError::Package(PackageError::Other(Some(eco_format!(
362                "cannot access package {spec}"
363            )))));
364        }
365        (None, _) => {}
366    }
367
368    // Join the path to the root. If it tries to escape, deny
369    // access. Note: It can still escape via symlinks.
370    id.vpath().resolve(root).ok_or(FileError::AccessDenied)
371}
372
373/// Reads a file from a `FileId`.
374///
375/// If the ID represents stdin it will read from standard input,
376/// otherwise it gets the file path of the ID and reads the file from disk.
377fn read(
378    id: FileId,
379    root: &Path,
380    package_storage: Option<&PackageStorage>,
381    progress: &mut dyn Progress,
382) -> FileResult<Vec<u8>> {
383    read_from_disk(&system_path(root, id, package_storage, progress)?)
384}
385
386/// Read a file from disk.
387fn read_from_disk(path: &Path) -> FileResult<Vec<u8>> {
388    let f = |e| FileError::from_io(e, path);
389    if std::fs::metadata(path).map_err(f)?.is_dir() {
390        Err(FileError::IsDirectory)
391    } else {
392        std::fs::read(path).map_err(f)
393    }
394}
395
396/// Decode UTF-8 with an optional BOM.
397fn decode_utf8(buf: &[u8]) -> FileResult<&str> {
398    // Remove UTF-8 BOM.
399    Ok(std::str::from_utf8(
400        buf.strip_prefix(b"\xef\xbb\xbf").unwrap_or(buf),
401    )?)
402}