tytanic_core/world_builder/
file.rs

1use std::collections::HashMap;
2use std::fmt::Debug;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Mutex;
6use std::sync::MutexGuard;
7
8use ecow::eco_format;
9use typst::diag::FileError;
10use typst::diag::FileResult;
11use typst::diag::PackageError;
12use typst::foundations::Bytes;
13use typst::syntax::FileId;
14use typst::syntax::Source;
15use typst::syntax::package::PackageSpec;
16use typst_kit::download::Progress;
17use typst_kit::package::PackageStorage;
18
19use super::ProvideFile;
20
21/// Provides access to files from memory.
22#[derive(Debug)]
23pub struct VirtualFileProvider {
24    slots: Mutex<HashMap<FileId, VirtualFileSlot>>,
25}
26
27impl VirtualFileProvider {
28    /// Creates a new file provider with no files.
29    pub fn new() -> Self {
30        Self::from_slots(HashMap::new())
31    }
32
33    /// Creates a new file provider with the given file slots.
34    pub fn from_slots(slots: HashMap<FileId, VirtualFileSlot>) -> Self {
35        Self {
36            slots: Mutex::new(slots),
37        }
38    }
39}
40
41impl VirtualFileProvider {
42    /// The slots used to store file contents.
43    pub fn slots(&self) -> MutexGuard<'_, HashMap<FileId, VirtualFileSlot>> {
44        self.slots.lock().unwrap()
45    }
46
47    /// The slots used to store file contents.
48    pub fn slots_mut(&mut self) -> &mut HashMap<FileId, VirtualFileSlot> {
49        self.slots.get_mut().unwrap()
50    }
51}
52
53impl VirtualFileProvider {
54    /// Access the canonical slot for the given file id.
55    pub fn slot<T, F>(&self, id: FileId, f: F) -> FileResult<T>
56    where
57        F: FnOnce(&VirtualFileSlot) -> T,
58    {
59        let map = self.slots.lock().unwrap();
60        map.get(&id)
61            .map(f)
62            .ok_or_else(|| FileError::NotFound(id.vpath().as_rooted_path().to_owned()))
63    }
64}
65
66impl Default for VirtualFileProvider {
67    fn default() -> Self {
68        Self::new()
69    }
70}
71
72impl ProvideFile for VirtualFileProvider {
73    fn provide_source(&self, id: FileId, _progress: &mut dyn Progress) -> FileResult<Source> {
74        self.slot(id, |slot| slot.source())?
75            .ok_or_else(|| FileError::NotSource)
76    }
77
78    fn provide_bytes(&self, id: FileId, _progress: &mut dyn Progress) -> FileResult<Bytes> {
79        self.slot(id, |slot| slot.bytes())
80    }
81
82    fn reset_all(&self) {}
83}
84
85/// Holds the processed data for a file ID.
86///
87/// Is eagerly populated with data (unlike [`FileSlot`]).
88#[derive(Debug)]
89pub struct VirtualFileSlot {
90    id: FileId,
91    source: Option<Source>,
92    bytes: Bytes,
93}
94
95impl VirtualFileSlot {
96    /// Create a new source file with the given source code.
97    pub fn from_source(source: Source) -> Self {
98        Self {
99            id: source.id(),
100            bytes: Bytes::new(source.text().as_bytes().to_vec()),
101            source: Some(source),
102        }
103    }
104
105    /// Create a new generic file with the given bytes.
106    pub fn from_bytes<T>(id: FileId, bytes: T) -> Self
107    where
108        T: AsRef<[u8]> + Send + Sync + 'static,
109    {
110        Self {
111            id,
112            bytes: Bytes::new(bytes),
113            source: None,
114        }
115    }
116}
117
118impl VirtualFileSlot {
119    /// The file id of this file.
120    pub fn id(&self) -> FileId {
121        self.id
122    }
123
124    /// The optional source of this file.
125    pub fn source(&self) -> Option<Source> {
126        self.source.clone()
127    }
128
129    /// The bytes of this file.
130    pub fn bytes(&self) -> Bytes {
131        self.bytes.clone()
132    }
133}
134
135/// Provides access to files from the filesystem.
136#[derive(Debug)]
137pub struct FilesystemFileProvider {
138    root: PathBuf,
139    overrides: HashMap<PackageSpec, PathBuf>,
140    slots: Mutex<HashMap<FileId, FileSlot>>,
141    package_storage: Option<PackageStorage>,
142}
143
144impl FilesystemFileProvider {
145    /// Creates a new file provider for the given project root.
146    ///
147    /// The package storage will be used to download and prepare packages.
148    pub fn new<P>(root: P, package_storage: Option<PackageStorage>) -> Self
149    where
150        P: Into<PathBuf>,
151    {
152        Self {
153            root: root.into(),
154            overrides: HashMap::new(),
155            slots: Mutex::new(HashMap::new()),
156            package_storage,
157        }
158    }
159
160    /// Creates a new file provider for the given project root.
161    ///
162    /// The map of package specs to root paths can be used to re-route package
163    /// imports, pointing them to local roots instead.
164    ///
165    /// The package storage will be used to download and prepare packages.
166    pub fn with_overrides<P, I>(
167        root: P,
168        overrides: I,
169        package_storage: Option<PackageStorage>,
170    ) -> Self
171    where
172        P: Into<PathBuf>,
173        I: IntoIterator<Item = (PackageSpec, PathBuf)>,
174    {
175        Self {
176            root: root.into(),
177            overrides: HashMap::from_iter(overrides),
178            slots: Mutex::new(HashMap::new()),
179            package_storage,
180        }
181    }
182}
183
184impl FilesystemFileProvider {
185    /// The project root.
186    pub fn root(&self) -> &Path {
187        &self.root
188    }
189
190    /// The package spec overrides of this file provider.
191    pub fn overrides(&self) -> &HashMap<PackageSpec, PathBuf> {
192        &self.overrides
193    }
194
195    /// The slots used to store file contents.
196    pub fn slots(&self) -> MutexGuard<'_, HashMap<FileId, FileSlot>> {
197        self.slots.lock().unwrap()
198    }
199
200    /// The slots used to store file contents.
201    pub fn slots_mut(&mut self) -> &mut HashMap<FileId, FileSlot> {
202        self.slots.get_mut().unwrap()
203    }
204
205    /// The package storage if one is given.
206    pub fn package_storage(&self) -> Option<&PackageStorage> {
207        self.package_storage.as_ref()
208    }
209}
210
211impl FilesystemFileProvider {
212    /// Reset the slots for the next compilation.
213    pub fn reset_slots(&self) {
214        for slot in self.slots.lock().unwrap().values_mut() {
215            slot.reset();
216        }
217    }
218
219    /// Access the canonical slot for the given file id.
220    pub fn slot<F, T>(&self, id: FileId, f: F) -> T
221    where
222        F: FnOnce(&mut FileSlot) -> T,
223    {
224        let mut map = self.slots.lock().unwrap();
225        f(map.entry(id).or_insert_with(|| FileSlot::new(id)))
226    }
227}
228
229impl ProvideFile for FilesystemFileProvider {
230    fn provide_source(&self, id: FileId, progress: &mut dyn Progress) -> FileResult<Source> {
231        self.slot(id, |slot| {
232            slot.source(
233                self.root(),
234                &self.overrides,
235                self.package_storage(),
236                progress,
237            )
238        })
239    }
240
241    fn provide_bytes(&self, id: FileId, progress: &mut dyn Progress) -> FileResult<Bytes> {
242        self.slot(id, |slot| {
243            slot.bytes(
244                self.root(),
245                &self.overrides,
246                self.package_storage(),
247                progress,
248            )
249        })
250    }
251
252    fn reset_all(&self) {
253        self.reset_slots();
254    }
255}
256
257/// Holds the processed data for a file ID.
258///
259/// Both fields can be populated if the file is both imported and read().
260#[derive(Debug)]
261pub struct FileSlot {
262    /// The slot's file id.
263    id: FileId,
264    /// The lazily loaded and incrementally updated source file.
265    source: SlotCell<Source>,
266    /// The lazily loaded raw byte buffer.
267    file: SlotCell<Bytes>,
268}
269
270impl FileSlot {
271    /// Create a new file slot.
272    pub fn new(id: FileId) -> Self {
273        Self {
274            id,
275            file: SlotCell::new(),
276            source: SlotCell::new(),
277        }
278    }
279
280    /// Marks the file as not yet accessed in preparation of the next
281    /// compilation.
282    pub fn reset(&mut self) {
283        self.source.reset();
284        self.file.reset();
285    }
286
287    /// Retrieve the source for this file.
288    pub fn source(
289        &mut self,
290        root: &Path,
291        overrides: &HashMap<PackageSpec, PathBuf>,
292        package_storage: Option<&PackageStorage>,
293        progress: &mut dyn Progress,
294    ) -> FileResult<Source> {
295        self.source.get_or_init(
296            || read(self.id, root, overrides, package_storage, progress),
297            |data, prev| {
298                let text = decode_utf8(&data)?;
299                if let Some(mut prev) = prev {
300                    prev.replace(text);
301                    Ok(prev)
302                } else {
303                    Ok(Source::new(self.id, text.into()))
304                }
305            },
306        )
307    }
308
309    /// Retrieve the file's bytes.
310    pub fn bytes(
311        &mut self,
312        root: &Path,
313        overrides: &HashMap<PackageSpec, PathBuf>,
314        package_storage: Option<&PackageStorage>,
315        progress: &mut dyn Progress,
316    ) -> FileResult<Bytes> {
317        self.file.get_or_init(
318            || read(self.id, root, overrides, package_storage, progress),
319            |data, _| Ok(Bytes::new(data)),
320        )
321    }
322}
323
324/// Lazily processes data for a file.
325#[derive(Debug)]
326struct SlotCell<T> {
327    /// The processed data.
328    data: Option<FileResult<T>>,
329    /// A hash of the raw file contents / access error.
330    fingerprint: u128,
331    /// Whether the slot has been accessed in the current compilation.
332    accessed: bool,
333}
334
335impl<T: Clone> SlotCell<T> {
336    /// Creates a new, empty cell.
337    fn new() -> Self {
338        Self {
339            data: None,
340            fingerprint: 0,
341            accessed: false,
342        }
343    }
344
345    /// Marks the cell as not yet accessed in preparation of the next
346    /// compilation.
347    fn reset(&mut self) {
348        self.accessed = false;
349    }
350
351    /// Gets the contents of the cell or initialize them.
352    fn get_or_init(
353        &mut self,
354        load: impl FnOnce() -> FileResult<Vec<u8>>,
355        f: impl FnOnce(Vec<u8>, Option<T>) -> FileResult<T>,
356    ) -> FileResult<T> {
357        // If we accessed the file already in this compilation, retrieve it.
358        if std::mem::replace(&mut self.accessed, true)
359            && let Some(data) = &self.data
360        {
361            return data.clone();
362        }
363
364        // Read and hash the file.
365        let result = load();
366        let fingerprint = typst::utils::hash128(&result);
367
368        // If the file contents didn't change, yield the old processed data.
369        if std::mem::replace(&mut self.fingerprint, fingerprint) == fingerprint
370            && let Some(data) = &self.data
371        {
372            return data.clone();
373        }
374
375        let prev = self.data.take().and_then(Result::ok);
376        let value = result.and_then(|data| f(data, prev));
377        self.data = Some(value.clone());
378
379        value
380    }
381}
382
383/// Resolves the path of a file id on the system, downloading a package if
384/// necessary.
385fn system_path(
386    root: &Path,
387    id: FileId,
388    overrides: &HashMap<PackageSpec, PathBuf>,
389    package_storage: Option<&PackageStorage>,
390    progress: &mut dyn Progress,
391) -> FileResult<PathBuf> {
392    // Determine the root path relative to which the file path
393    // will be resolved.
394    let buf;
395    let mut root = root;
396
397    if let Some(spec) = id.package() {
398        if let Some(local_root) = overrides.get(spec) {
399            tracing::trace!(?spec, ?local_root, "resolving self reference locally");
400            root = local_root;
401        } else if let Some(storage) = package_storage {
402            tracing::trace!(?spec, "preparing package");
403            buf = storage.prepare_package(spec, progress)?;
404            root = &buf;
405        } else {
406            tracing::error!(
407                ?spec,
408                "cannot prepare package, no package storage or local root provided"
409            );
410            return Err(FileError::Package(PackageError::Other(Some(eco_format!(
411                "cannot access package {spec}"
412            )))));
413        }
414    }
415
416    // Join the path to the root. If it tries to escape, deny
417    // access. Note: It can still escape via symlinks.
418    id.vpath().resolve(root).ok_or(FileError::AccessDenied)
419}
420
421/// Reads a file from a `FileId`.
422///
423/// If the ID represents stdin it will read from standard input,
424/// otherwise it gets the file path of the ID and reads the file from disk.
425fn read(
426    id: FileId,
427    root: &Path,
428    overrides: &HashMap<PackageSpec, PathBuf>,
429    package_storage: Option<&PackageStorage>,
430    progress: &mut dyn Progress,
431) -> FileResult<Vec<u8>> {
432    read_from_disk(&system_path(
433        root,
434        id,
435        overrides,
436        package_storage,
437        progress,
438    )?)
439}
440
441/// Read a file from disk.
442fn read_from_disk(path: &Path) -> FileResult<Vec<u8>> {
443    let f = |e| FileError::from_io(e, path);
444    if std::fs::metadata(path).map_err(f)?.is_dir() {
445        Err(FileError::IsDirectory)
446    } else {
447        std::fs::read(path).map_err(f)
448    }
449}
450
451/// Decode UTF-8 with an optional BOM.
452fn decode_utf8(buf: &[u8]) -> FileResult<&str> {
453    // Remove UTF-8 BOM.
454    Ok(std::str::from_utf8(
455        buf.strip_prefix(b"\xef\xbb\xbf").unwrap_or(buf),
456    )?)
457}
458
459#[cfg(test)]
460mod tests {
461    use typst::syntax::VirtualPath;
462    use typst::syntax::package::PackageVersion;
463    use typst_kit::download::ProgressSink;
464    use tytanic_utils::fs::TempTestEnv;
465
466    use super::*;
467
468    #[test]
469    fn test_overrides() {
470        TempTestEnv::run_no_check(
471            |root| {
472                root.setup_file("template/main.typ", "template-main")
473                    .setup_file("template/lib.typ", "template-lib")
474                    .setup_file("lib.typ", "src-lib")
475            },
476            |root| {
477                let spec = PackageSpec {
478                    namespace: "preview".into(),
479                    name: "self".into(),
480                    version: PackageVersion {
481                        major: 0,
482                        minor: 0,
483                        patch: 1,
484                    },
485                };
486
487                let files = FilesystemFileProvider::with_overrides(
488                    root.join("template"),
489                    [(spec.clone(), root.to_path_buf())],
490                    None,
491                );
492
493                // lib.typ is available inside the template
494                assert_eq!(
495                    files
496                        .provide_source(
497                            FileId::new(None, VirtualPath::new("lib.typ")),
498                            &mut ProgressSink
499                        )
500                        .unwrap()
501                        .text(),
502                    "template-lib",
503                );
504
505                // main.typ is available inside the template
506                assert_eq!(
507                    files
508                        .provide_source(
509                            FileId::new(None, VirtualPath::new("main.typ")),
510                            &mut ProgressSink
511                        )
512                        .unwrap()
513                        .text(),
514                    "template-main",
515                );
516
517                // lib.typ is also available from the project
518                assert_eq!(
519                    files
520                        .provide_source(
521                            FileId::new(Some(spec), VirtualPath::new("lib.typ")),
522                            &mut ProgressSink
523                        )
524                        .unwrap()
525                        .text(),
526                    "src-lib",
527                );
528            },
529        );
530    }
531}