Skip to main content

wasmtime_internal_cache/
lib.rs

1//! > **⚠️ Warning ⚠️**: this crate is an internal-only crate for the Wasmtime
2//! > project and is not intended for general use. APIs are not strictly
3//! > reviewed for safety and usage outside of Wasmtime may have bugs. If
4//! > you're interested in using this feel free to file an issue on the
5//! > Wasmtime repository to start a discussion about doing so, but otherwise
6//! > be aware that your usage of this crate is not supported.
7
8use base64::Engine;
9use log::{debug, trace, warn};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12use std::hash::Hash;
13use std::hash::Hasher;
14use std::io::Write;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
18use std::time::Duration;
19use std::{fs, io};
20use wasmtime_environ::error::Result;
21
22#[macro_use] // for tests
23mod config;
24mod worker;
25
26pub use config::{CacheConfig, create_new_config};
27use worker::Worker;
28
29/// Global configuration for how the cache is managed
30#[derive(Debug, Clone)]
31pub struct Cache {
32    config: CacheConfig,
33    worker: Worker,
34    state: Arc<CacheState>,
35}
36
37macro_rules! generate_config_setting_getter {
38    ($setting:ident: $setting_type:ty) => {
39        #[doc = concat!("Returns ", "`", stringify!($setting), "`.")]
40        pub fn $setting(&self) -> $setting_type {
41            self.config.$setting()
42        }
43    };
44}
45
46impl Cache {
47    /// Builds a [`Cache`] from the configuration and spawns the cache worker.
48    ///
49    /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
50    /// You can call [`CacheConfig::new`] for the default configuration.
51    ///
52    /// # Errors
53    /// Returns an error if the configuration is invalid.
54    pub fn new(mut config: CacheConfig) -> Result<Self> {
55        config.validate()?;
56        Ok(Self {
57            worker: Worker::start_new(&config),
58            config,
59            state: Default::default(),
60        })
61    }
62
63    /// Loads cache configuration specified at `path`.
64    ///
65    /// This method will read the file specified by `path` on the filesystem and
66    /// attempt to load cache configuration from it. This method can also fail
67    /// due to I/O errors, misconfiguration, syntax errors, etc. For expected
68    /// syntax in the configuration file see the [documentation online][docs].
69    ///
70    /// Passing in `None` loads cache configuration from the system default path.
71    /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
72    /// and is typically created with the `wasmtime config new` command.
73    ///
74    /// # Errors
75    ///
76    /// This method can fail due to any error that happens when loading the file
77    /// pointed to by `path` and attempting to load the cache configuration.
78    ///
79    /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
80    pub fn from_file(path: Option<&Path>) -> Result<Self> {
81        let config = CacheConfig::from_file(path)?;
82        Self::new(config)
83    }
84
85    generate_config_setting_getter!(worker_event_queue_size: u64);
86    generate_config_setting_getter!(baseline_compression_level: i32);
87    generate_config_setting_getter!(optimized_compression_level: i32);
88    generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
89    generate_config_setting_getter!(cleanup_interval: Duration);
90    generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
91    generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
92    generate_config_setting_getter!(file_count_soft_limit: u64);
93    generate_config_setting_getter!(files_total_size_soft_limit: u64);
94    generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
95    generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
96
97    /// Returns path to the cache directory.
98    pub fn directory(&self) -> &PathBuf {
99        &self
100            .config
101            .directory()
102            .expect("directory should be validated in Config::new")
103    }
104
105    #[cfg(test)]
106    fn worker(&self) -> &Worker {
107        &self.worker
108    }
109
110    /// Returns the number of cache hits seen so far
111    pub fn cache_hits(&self) -> usize {
112        self.state.hits.load(SeqCst)
113    }
114
115    /// Returns the number of cache misses seen so far
116    pub fn cache_misses(&self) -> usize {
117        self.state.misses.load(SeqCst)
118    }
119
120    pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
121        self.state.hits.fetch_add(1, SeqCst);
122        self.worker.on_cache_get_async(path)
123    }
124
125    pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
126        self.state.misses.fetch_add(1, SeqCst);
127        self.worker.on_cache_update_async(path)
128    }
129}
130
131#[derive(Default, Debug)]
132struct CacheState {
133    hits: AtomicUsize,
134    misses: AtomicUsize,
135}
136
137/// Module level cache entry.
138pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
139
140struct ModuleCacheEntryInner<'cache> {
141    root_path: PathBuf,
142    cache: &'cache Cache,
143}
144
145struct Sha256Hasher(Sha256);
146
147impl<'cache> ModuleCacheEntry<'cache> {
148    /// Create the cache entry.
149    pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
150        Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
151    }
152
153    #[cfg(test)]
154    fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
155        Self(Some(inner))
156    }
157
158    /// Gets cached data if state matches, otherwise calls `compute`.
159    ///
160    /// Data is automatically serialized/deserialized with `bincode`.
161    pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
162    where
163        T: Hash,
164        U: Serialize + for<'a> Deserialize<'a>,
165    {
166        self.get_data_raw(
167            &state,
168            compute,
169            |_state, data| postcard::to_allocvec(data).ok(),
170            |_state, data| postcard::from_bytes(&data).ok(),
171        )
172    }
173
174    /// Gets cached data if state matches, otherwise calls `compute`.
175    ///
176    /// If the cache is disabled or no cached data is found then `compute` is
177    /// called to calculate the data. If the data was found in cache it is
178    /// passed to `deserialize`, which if successful will be the returned value.
179    /// When computed the `serialize` function is used to generate the bytes
180    /// from the returned value.
181    pub fn get_data_raw<T, U, E>(
182        &self,
183        state: &T,
184        // NOTE: These are function pointers instead of closures so that they
185        // don't accidentally close over something not accounted in the cache.
186        compute: fn(&T) -> Result<U, E>,
187        serialize: fn(&T, &U) -> Option<Vec<u8>>,
188        deserialize: fn(&T, Vec<u8>) -> Option<U>,
189    ) -> Result<U, E>
190    where
191        T: Hash,
192    {
193        let inner = match &self.0 {
194            Some(inner) => inner,
195            None => return compute(state),
196        };
197
198        let mut hasher = Sha256Hasher(Sha256::new());
199        state.hash(&mut hasher);
200        let hash: [u8; 32] = hasher.0.finalize().into();
201        // standard encoding uses '/' which can't be used for filename
202        let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
203
204        if let Some(cached_val) = inner.get_data(&hash) {
205            if let Some(val) = deserialize(state, cached_val) {
206                let mod_cache_path = inner.root_path.join(&hash);
207                inner.cache.on_cache_get_async(&mod_cache_path); // call on success
208                return Ok(val);
209            }
210        }
211        let val_to_cache = compute(state)?;
212        if let Some(bytes) = serialize(state, &val_to_cache) {
213            if inner.update_data(&hash, &bytes).is_some() {
214                let mod_cache_path = inner.root_path.join(&hash);
215                inner.cache.on_cache_update_async(&mod_cache_path); // call on success
216            }
217        }
218        Ok(val_to_cache)
219    }
220}
221
222impl<'cache> ModuleCacheEntryInner<'cache> {
223    fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
224        // For git builds (see `build.rs`), include the executable's mtime so
225        // successive local rebuilds don't share cached compilations from prior
226        // source states. crates.io builds rely on `COMPILER_VERSION` alone,
227        // which is stable across rebuilds.
228        let maybe_mtime = {
229            if env!("USE_MTIME") == "true" {
230                fn self_mtime() -> Option<String> {
231                    let path = std::env::current_exe().ok()?;
232                    let metadata = path.metadata().ok()?;
233                    let mtime = metadata.modified().ok()?;
234                    Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
235                        Ok(dur) => format!("-{}", dur.as_millis()),
236                        Err(err) => format!("-m{}", err.duration().as_millis()),
237                    })
238                }
239                self_mtime().unwrap_or_else(|| "-no-mtime".to_string())
240            } else {
241                String::new()
242            }
243        };
244        let compiler_dir = format!(
245            "{comp_name}-{comp_ver}{maybe_mtime}",
246            comp_name = compiler_name,
247            comp_ver = env!("COMPILER_VERSION"),
248        );
249        let root_path = cache.directory().join("modules").join(compiler_dir);
250
251        Self { root_path, cache }
252    }
253
254    fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
255        let mod_cache_path = self.root_path.join(hash);
256        trace!("get_data() for path: {}", mod_cache_path.display());
257        let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
258        let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
259            .map_err(|err| warn!("Failed to decompress cached code: {err}"))
260            .ok()?;
261        Some(cache_bytes)
262    }
263
264    fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
265        let mod_cache_path = self.root_path.join(hash);
266        trace!("update_data() for path: {}", mod_cache_path.display());
267        let compressed_data = zstd::encode_all(
268            &serialized_data[..],
269            self.cache.baseline_compression_level(),
270        )
271        .map_err(|err| warn!("Failed to compress cached code: {err}"))
272        .ok()?;
273
274        // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
275        // Otherwise, try creating the cache directory and retry writing to the file.
276        if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
277            return Some(());
278        }
279
280        debug!(
281            "Attempting to create the cache directory, because \
282             failed to write cached code to disk, path: {}",
283            mod_cache_path.display(),
284        );
285
286        let cache_dir = mod_cache_path.parent().unwrap();
287        fs::create_dir_all(cache_dir)
288            .map_err(|err| {
289                warn!(
290                    "Failed to create cache directory, path: {}, message: {}",
291                    cache_dir.display(),
292                    err
293                )
294            })
295            .ok()?;
296
297        match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
298            Ok(_) => Some(()),
299            Err(err) => {
300                warn!(
301                    "Failed to write file with rename, target path: {}, err: {}",
302                    mod_cache_path.display(),
303                    err
304                );
305                None
306            }
307        }
308    }
309}
310
311impl Hasher for Sha256Hasher {
312    fn finish(&self) -> u64 {
313        panic!("Sha256Hasher doesn't support finish!");
314    }
315
316    fn write(&mut self, bytes: &[u8]) {
317        self.0.update(bytes);
318    }
319}
320
321// Assumption: path inside cache directory.
322// Then, we don't have to use sound OS-specific exclusive file access.
323// Note: there's no need to remove temporary file here - cleanup task will do it later.
324fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
325    let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
326    fs::OpenOptions::new()
327        .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
328        .write(true)
329        .open(&lock_path)
330        .and_then(|mut file| file.write_all(contents))
331        // file should go out of scope and be closed at this point
332        .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
333}
334
335#[cfg(test)]
336mod tests;