Skip to main content

tectonic_bundles/
cache.rs

1// Copyright 2017-2021 the Tectonic Project
2// Licensed under the MIT License.
3
4//! Local caching of bundle data.
5//!
6//! This module implements Tectonic’s local filesystem caching mechanism for TeX
7//! support files. To make a cachable bundle, wrap any [`CachableBundle`] with a
8//! [`BundleCache`].
9
10use crate::{Bundle, CachableBundle, FileIndex, FileInfo};
11use std::{
12    fs::{self, File},
13    io::{self, BufReader, Read, Write},
14    path::{Path, PathBuf},
15    process,
16    str::FromStr,
17};
18use tectonic_errors::{anyhow::Context, prelude::*};
19use tectonic_io_base::{
20    app_dirs,
21    digest::{self, DigestData},
22    InputHandle, InputOrigin, IoProvider, OpenResult,
23};
24use tectonic_status_base::StatusBackend;
25
26/// A convenience method to provide a better error message when writing to a created file.
27fn file_create_write<P, F, E>(path: P, write_fn: F) -> Result<()>
28where
29    P: AsRef<Path>,
30    F: FnOnce(&mut File) -> std::result::Result<(), E>,
31    E: std::error::Error + 'static + Sync + Send,
32{
33    let path = path.as_ref();
34    let mut f = atry!(
35        File::create(path);
36        ["couldn't open {} for writing", path.display()]
37    );
38    atry!(
39        write_fn(&mut f);
40        ["couldn't write to {}", path.display()]
41    );
42    Ok(())
43}
44
45// Make sure a directory exists.
46// "inline" version is for convenience.
47macro_rules! ensure_dir {
48    (inline, $path:expr) => {
49        {
50            atry!(
51                fs::create_dir_all(&$path);
52                ["failed to create directory `{}` or one of its parents", $path.display()]
53            );
54            $path
55        }
56    };
57
58    ($path:expr) => {
59        atry!(
60            fs::create_dir_all(&$path);
61            ["failed to create directory `{}` or one of its parents", $path.display()]
62        );
63    };
64}
65
66/// A cache wrapper for another bundle.
67///
68/// This bundle implementation is the key to Tectonic’s ability to download TeX
69/// support files on the fly. This is usually used to wrap some kind of network-
70/// based bundle, but can be used with any struct that implements [`Bundle`].
71///
72/// The caching scheme here is designed so that a document build may avoid
73/// touching the network altogether if no new files need to be downloaded.
74pub struct BundleCache<'this, T> {
75    /// If true, only use cached files -- never connect to the backend.
76    ///
77    /// This option can be useful if we are operating disconnected from the
78    /// network (e.g., on an airplane). If you add a new figure to your
79    /// document, the engine will inquire about several related files that it
80    /// thinks might exist. Without this option, such an inquiry might require
81    /// Tectonic to hit the network, when the user knows for sure that the
82    /// bundle is not going to contain these files.
83    only_cached: bool,
84
85    /// The bundle we're wrapping. When files don't exist in the cache,
86    /// we'll get them from here.
87    bundle: Box<dyn CachableBundle<'this, T>>,
88
89    /// The root directory of this cache.
90    /// All other paths are subdirectories of this path.
91    cache_root: PathBuf,
92
93    // The hash of the bundle we're caching.
94    bundle_hash: DigestData,
95}
96
97impl<'this, T: FileIndex<'this>> BundleCache<'this, T> {
98    /// Make a new filesystem-backed cache from `bundle`.
99    ///
100    /// This method will fail if we can't connect to the bundle AND
101    /// we don't already have it in our cache.
102    /// Other than that, this method does not require network access.
103    pub fn new(
104        mut bundle: Box<dyn CachableBundle<'this, T>>,
105        only_cached: bool,
106        cache_root: Option<PathBuf>,
107    ) -> Result<Self> {
108        // If cache_root is none, use default location.
109        let cache_root = match cache_root {
110            None => app_dirs::get_user_cache_dir("bundles").context("while making cache root")?,
111            Some(p) => ensure_dir!(inline, p),
112        };
113
114        let hash_dir = ensure_dir!(inline, &cache_root.join("hashes"));
115        let hash_file = hash_dir.join(app_dirs::app_dirs2::sanitized(&bundle.get_location()));
116
117        let saved_hash = {
118            if !hash_file.exists() {
119                None
120            } else {
121                match File::open(&hash_file) {
122                    Err(e) => return Err(e.into()),
123                    Ok(f) => {
124                        let mut digest_text = String::with_capacity(digest::DIGEST_LEN);
125                        f.take(digest::DIGEST_LEN as u64)
126                            .read_to_string(&mut digest_text)
127                            .with_context(|| {
128                                format!("while reading hash from {hash_file:?} in cache")
129                            })?;
130                        Some(
131                            DigestData::from_str(&digest_text)
132                                .with_context(|| format!("while parsing hash `{digest_text}`"))?,
133                        )
134                    }
135                }
136            }
137        };
138
139        // ===== BEGIN AWARE REPORTS PATCH =====================================
140        // A warm cache must not phone home. Upstream replaces this whole block
141        // with an unconditional `let live_hash = bundle.get_digest();` followed
142        // by a `match (saved_hash, live_hash)`, so for a network bundle it does a
143        // remote round-trip on *every* open even when every needed file is already
144        // cached — a large per-invocation slowdown versus the pre-0.4 caching
145        // bundle, which short-circuited on a warm cache ("avoid connecting to the
146        // backend if at all possible"; see upstream issue #456: a warm cache
147        // should not contact the server). We restore that by matching on the
148        // cache state directly: a cached digest is trusted as-is (no network, no
149        // per-open freshness re-check); only a cold cache that is allowed to use
150        // the network fetches the live digest; an offline (`only_cached`) cold
151        // cache fails fast without connecting. Aware Reports carries this fix
152        // (commit msg / repo: tptools-rust) until it is upstreamed.
153        let bundle_hash: DigestData = match saved_hash {
154            Some(cached) => cached,
155            None if only_cached => bail!(
156                "bundle is not cached and offline mode forbids network access"
157            ),
158            None => {
159                let live = bundle
160                    .get_digest()
161                    .context("while fetching the bundle digest from the network")?;
162                file_create_write(&hash_file, |f| writeln!(f, "{}", &live.to_string()))
163                    .with_context(|| {
164                        format!("while writing bundle hash to {hash_file:?} in cache")
165                    })?;
166                live
167            }
168        };
169        // ===== END AWARE REPORTS PATCH =======================================
170
171        let bundle = BundleCache {
172            only_cached,
173            bundle,
174            cache_root,
175            bundle_hash,
176        };
177
178        // Right now, files are stored in
179        // `<root>/data/<bundle hash>/<file path>.
180        // This works for now, but may cause issues if we add multiple
181        // bundle formats with incompatible path schemes. We assume that
182        // all bundles with the same hash use the same path scheme,
183        // which is true for network TTB and fs TTB.
184        // Adding support for multiple formats of a single bundle hash
185        // shouldn't be too hard, but isn't necessary yet.
186        ensure_dir!(&bundle
187            .cache_root
188            .join(format!("data/{}", bundle.bundle_hash)));
189
190        Ok(bundle)
191    }
192
193    /// Build a cache path for the given bundle file
194    fn get_file_path(&self, info: &T::InfoType) -> PathBuf {
195        let mut out = self.cache_root.clone();
196        out.push(format!("data/{}", self.bundle_hash));
197        out.push(info.path());
198        out
199    }
200
201    /// Build a temporary path for the given bundle file
202    /// To ensure safety with multiple instances of tectonic,
203    /// files are first downloaded to a known-unique location, then renamed.
204    fn get_file_path_tmp(&self, info: &T::InfoType) -> PathBuf {
205        let mut out = self.cache_root.clone();
206        out.push(format!("data/{}", self.bundle_hash));
207        out.push(format!("{}-tmp-pid{}", info.path(), process::id()));
208        out
209    }
210
211    fn ensure_index(&mut self) -> Result<()> {
212        let target = self
213            .cache_root
214            .join(format!("data/{}.index", self.bundle_hash));
215
216        // We check for two things here:
217        // - that the bundle index is initialized
218        // - that the bundle index is cached.
219        //
220        // It would be nice to assume that the bundle index is never initialized
221        // before this function is called, but we can't do that. Unlike ttb,
222        // itar bundles cannot retrieve the bundle hash without loading the index.
223        if target.exists() {
224            if self.bundle.index().is_initialized() {
225                return Ok(());
226            }
227
228            // Initialize bundle index using cached file
229            let mut file = File::open(&target)
230                .with_context(|| format!("while opening index {target:?} in cache"))?;
231            self.bundle
232                .initialize_index(&mut file)
233                .with_context(|| format!("while inititalizing index using cached {target:?}"))?;
234        } else {
235            // Download index
236
237            // We first download to a temporary file, rename to target
238            // Makes sure that parallel runs of tectonic don't break the index
239            let tmp_target = self.cache_root.join(format!(
240                "data/{}.index-tmp-pid{}",
241                self.bundle_hash,
242                process::id()
243            ));
244
245            let mut reader = self
246                .bundle
247                .get_index_reader()
248                .context("while getting index reader")?;
249            let mut file = File::create(&tmp_target)
250                .with_context(|| format!("while creating index {tmp_target:?} in cache"))?;
251            io::copy(&mut reader, &mut file)
252                .with_context(|| format!("while writing index {tmp_target:?} in cache"))?;
253            drop(file);
254
255            fs::rename(&tmp_target, &target).with_context(|| {
256                format!("while renaming index {tmp_target:?} to {target:?} in cache")
257            })?;
258
259            if self.bundle.index().is_initialized() {
260                return Ok(());
261            }
262
263            let mut file = File::open(&target)
264                .with_context(|| format!("while opening index from {target:?} in cache"))?;
265            self.bundle
266                .initialize_index(&mut file)
267                .with_context(|| format!("while initializing index {target:?} in cache"))?;
268        }
269
270        Ok(())
271    }
272
273    /// Get a FileInfo from a name.
274    /// This returns (in_cache, info), where in_cache is true
275    /// if this file is already in our cache and can be retrieved
276    /// without touching the backing bundle.
277    fn get_fileinfo(&mut self, name: &str) -> OpenResult<(bool, T::InfoType)> {
278        if let Err(e) = self.ensure_index() {
279            return OpenResult::Err(e);
280        };
281
282        let info = match self.bundle.search(name) {
283            Some(i) => i,
284            None => return OpenResult::NotAvailable,
285        };
286
287        let target = self.get_file_path(&info);
288        OpenResult::Ok((target.exists(), info))
289    }
290
291    /// Fetch a file from the bundle backing this cache.
292    /// Returns a path to the file that was created.
293    fn fetch_file(
294        &mut self,
295        info: T::InfoType,
296        status: &mut dyn StatusBackend,
297    ) -> OpenResult<PathBuf> {
298        let target = self.get_file_path(&info);
299        match fs::create_dir_all(target.parent().unwrap()) {
300            Ok(()) => {}
301            Err(e) => return OpenResult::Err(e.into()),
302        };
303
304        // Already in the cache?
305        if target.exists() {
306            return OpenResult::Ok(target);
307        }
308
309        // No, it's not. Are we in cache-only mode?
310        if self.only_cached {
311            return OpenResult::NotAvailable;
312        }
313
314        // Get the file.
315        let mut handle = match self.bundle.open_fileinfo(&info, status) {
316            OpenResult::Ok(c) => c,
317            OpenResult::Err(e) => return OpenResult::Err(e),
318            OpenResult::NotAvailable => return OpenResult::NotAvailable,
319        };
320
321        // Download to a known-unique temporary location, then move.
322        // This prevents issues when running multiple processes.
323        let tmp_path = self.get_file_path_tmp(&info);
324        if let Err(e) = file_create_write(&tmp_path, |f| io::copy(&mut handle, f).map(|_| ())) {
325            return OpenResult::Err(e);
326        }
327        if let Err(e) = fs::rename(&tmp_path, &target) {
328            return OpenResult::Err(e.into());
329        };
330
331        OpenResult::Ok(target)
332    }
333}
334
335impl<'this, T: FileIndex<'this>> IoProvider for BundleCache<'this, T> {
336    fn input_open_name(
337        &mut self,
338        name: &str,
339        status: &mut dyn StatusBackend,
340    ) -> OpenResult<InputHandle> {
341        let path = match self.get_fileinfo(name) {
342            OpenResult::NotAvailable => return OpenResult::NotAvailable,
343            OpenResult::Err(e) => return OpenResult::Err(e),
344            OpenResult::Ok((true, f)) => self.get_file_path(&f),
345            OpenResult::Ok((false, f)) => match self.fetch_file(f, status) {
346                OpenResult::Ok(p) => p,
347                OpenResult::NotAvailable => return OpenResult::NotAvailable,
348                OpenResult::Err(e) => return OpenResult::Err(e),
349            },
350        };
351
352        let f = match File::open(path) {
353            Ok(f) => f,
354            Err(e) => return OpenResult::Err(e.into()),
355        };
356
357        OpenResult::Ok(InputHandle::new_read_only(
358            name,
359            BufReader::new(f),
360            InputOrigin::Other,
361        ))
362    }
363}
364
365impl<'this, T: FileIndex<'this>> Bundle for BundleCache<'this, T> {
366    fn get_digest(&mut self) -> Result<DigestData> {
367        Ok(self.bundle_hash)
368    }
369
370    fn all_files(&self) -> Vec<String> {
371        self.bundle.all_files()
372    }
373}