cosmwasm_vm/modules/
file_system_cache.rs

1use blake2::{digest::consts::U5, Blake2b, Digest};
2use std::fs;
3use std::hash::Hash;
4use std::io;
5use std::panic::catch_unwind;
6use std::path::{Path, PathBuf};
7use std::sync::OnceLock;
8use thiserror::Error;
9
10use wasmer::{DeserializeError, Module, Target};
11
12use cosmwasm_std::Checksum;
13
14use crate::errors::{VmError, VmResult};
15use crate::filesystem::mkdir_p;
16use crate::modules::current_wasmer_module_version;
17use crate::wasm_backend::make_runtime_engine;
18use crate::wasm_backend::COST_FUNCTION_HASH;
19use crate::Size;
20
21use super::cached_module::engine_size_estimate;
22use super::CachedModule;
23
24/// This is a value you can manually modify to the cache.
25/// You normally _do not_ need to change this value yourself.
26///
27/// Cases where you might need to update it yourself, is things like when the memory layout of some types in Rust [std] changes.
28///
29/// ---
30///
31/// Now follows the legacy documentation of this value:
32///
33/// ## Version history:
34/// - **v1**:<br>
35///   cosmwasm_vm < 1.0.0-beta5. This is working well up to Wasmer 2.0.0 as
36///   [in wasmvm 1.0.0-beta2](https://github.com/CosmWasm/wasmvm/blob/v1.0.0-beta2/libwasmvm/Cargo.lock#L1412-L1413)
37///   and [wasmvm 0.16.3](https://github.com/CosmWasm/wasmvm/blob/v0.16.3/libwasmvm/Cargo.lock#L1408-L1409).
38///   Versions that ship with Wasmer 2.1.x such [as wasmvm 1.0.0-beta3](https://github.com/CosmWasm/wasmvm/blob/v1.0.0-beta3/libwasmvm/Cargo.lock#L1534-L1535)
39///   to [wasmvm 1.0.0-beta5](https://github.com/CosmWasm/wasmvm/blob/v1.0.0-beta5/libwasmvm/Cargo.lock#L1530-L1531)
40///   are broken, i.e. they will crash when reading older v1 modules.
41/// - **v2**:<br>
42///   Version for cosmwasm_vm 1.0.0-beta5 / wasmvm 1.0.0-beta6 that ships with Wasmer 2.1.1.
43/// - **v3**:<br>
44///   Version for Wasmer 2.2.0 which contains a [module breaking change to 2.1.x](https://github.com/wasmerio/wasmer/pull/2747).
45/// - **v4**:<br>
46///   Version for Wasmer 2.3.0 which contains a module breaking change to 2.2.0 that was not reflected in
47///   the module header version (<https://github.com/wasmerio/wasmer/issues/3193>). In cosmwasm-vm 1.1.0-1.1.1
48///   the old value "v3" is still used along with Wasmer 2.3.0 (bug). From cosmwasm 1.1.2 onwards, this is
49///   fixed by bumping to "v4".
50/// - **v5**:<br>
51///   A change in memory layout of some types in Rust [std] caused
52///   [issues with module deserialization](https://github.com/CosmWasm/wasmvm/issues/426).
53///   To work around this, the version was bumped to "v5" here to invalidate these corrupt caches.
54/// - **v6**:<br>
55///   Version for cosmwasm_vm 1.3+ which adds a sub-folder with the target identifier for the modules.
56/// - **v7**:<br>
57///   New version because of Wasmer 2.3.0 -> 4 upgrade.
58///   This internally changes how rkyv is used for module serialization, making compatibility unlikely.
59/// - **v8**:<br>
60///   New version because of Wasmer 4.1.2 -> 4.2.2 upgrade.
61///   Module compatibility between Wasmer versions is not guaranteed.
62/// - **v9**:<br>
63///   New version because of Wasmer 4.2.2 -> 4.2.6 upgrade.
64///   Module compatibility between Wasmer versions is not guaranteed.
65/// - **v10**:<br>
66///   New version because of Metering middleware change.
67/// - **v20**:<br>
68///   New version because of Wasmer 4.3.3 -> 4.3.7 upgrade.
69///   Module compatibility between Wasmer versions is not guaranteed.
70const MODULE_SERIALIZATION_VERSION: &str = "v20";
71
72/// Function that actually does the heavy lifting of creating the module version discriminator.
73///
74/// Separated for sanity tests because otherwise the `OnceLock` would cache the result.
75#[inline]
76fn raw_module_version_discriminator() -> String {
77    let hashes = [COST_FUNCTION_HASH];
78
79    let mut hasher = Blake2b::<U5>::new();
80
81    hasher.update(MODULE_SERIALIZATION_VERSION.as_bytes());
82    hasher.update(wasmer::VERSION.as_bytes());
83
84    for hash in hashes {
85        hasher.update(hash);
86    }
87
88    hex::encode(hasher.finalize())
89}
90
91/// This version __MUST__ change whenever the module system changes in a way
92/// that old stored modules would be corrupt when loaded in the new system.
93/// This needs to be done e.g. when switching between the jit/native engine.
94///
95/// By default, this derived by performing the following operation:
96///
97/// ```ignore
98/// BLAKE2(
99///   manual module version,
100///   wasmer version requirement,
101///   BLAKE2_512(cost_fn)
102/// )
103/// ```
104///
105/// If anything else changes, you must change the manual module version.
106///
107/// See https://github.com/wasmerio/wasmer/issues/2781 for more information
108/// on Wasmer's module stability concept.
109#[inline]
110fn module_version_discriminator() -> &'static str {
111    static DISCRIMINATOR: OnceLock<String> = OnceLock::new();
112
113    DISCRIMINATOR.get_or_init(raw_module_version_discriminator)
114}
115
116/// Representation of a directory that contains compiled Wasm artifacts.
117pub struct FileSystemCache {
118    modules_path: PathBuf,
119    /// If true, the cache uses the `*_unchecked` wasmer functions for loading modules from disk.
120    unchecked_modules: bool,
121}
122
123/// An error type that hides system specific error information
124/// to ensure deterministic errors across operating systems.
125#[derive(Error, Debug)]
126pub enum NewFileSystemCacheError {
127    #[error("Could not get metadata of cache path")]
128    CouldntGetMetadata,
129    #[error("The supplied path is readonly")]
130    ReadonlyPath,
131    #[error("The supplied path already exists but is no directory")]
132    ExistsButNoDirectory,
133    #[error("Could not create cache path")]
134    CouldntCreatePath,
135}
136
137impl FileSystemCache {
138    /// Construct a new `FileSystemCache` around the specified directory.
139    /// The contents of the cache are stored in sub-versioned directories.
140    /// If `unchecked_modules` is set to true, it uses the `*_unchecked`
141    /// wasmer functions for loading modules from disk (no validity checks).
142    ///
143    /// # Safety
144    ///
145    /// This method is unsafe because there's no way to ensure the artifacts
146    /// stored in this cache haven't been corrupted or tampered with.
147    pub unsafe fn new(
148        base_path: impl Into<PathBuf>,
149        unchecked_modules: bool,
150    ) -> Result<Self, NewFileSystemCacheError> {
151        let base_path: PathBuf = base_path.into();
152        if base_path.exists() {
153            let metadata = base_path
154                .metadata()
155                .map_err(|_e| NewFileSystemCacheError::CouldntGetMetadata)?;
156            if !metadata.is_dir() {
157                return Err(NewFileSystemCacheError::ExistsButNoDirectory);
158            }
159            if metadata.permissions().readonly() {
160                return Err(NewFileSystemCacheError::ReadonlyPath);
161            }
162        } else {
163            // Create the directory and any parent directories if they don't yet exist.
164            mkdir_p(&base_path).map_err(|_e| NewFileSystemCacheError::CouldntCreatePath)?;
165        }
166
167        Ok(Self {
168            modules_path: modules_path(
169                &base_path,
170                current_wasmer_module_version(),
171                &Target::default(),
172            ),
173            unchecked_modules,
174        })
175    }
176
177    /// If `unchecked` is true, the cache will use the `*_unchecked` wasmer functions for
178    /// loading modules from disk.
179    pub fn set_module_unchecked(&mut self, unchecked: bool) {
180        self.unchecked_modules = unchecked;
181    }
182
183    /// Returns the path to the serialized module with the given checksum.
184    fn module_file(&self, checksum: &Checksum) -> PathBuf {
185        let mut path = self.modules_path.clone();
186        path.push(checksum.to_hex());
187        path.set_extension("module");
188        path
189    }
190
191    /// Loads a serialized module from the file system and returns a Module + Engine,
192    /// along with a size estimation for the pair.
193    pub fn load(
194        &self,
195        checksum: &Checksum,
196        memory_limit: Option<Size>,
197    ) -> VmResult<Option<CachedModule>> {
198        let file_path = self.module_file(checksum);
199
200        let engine = make_runtime_engine(memory_limit);
201        let result = if self.unchecked_modules {
202            unsafe { Module::deserialize_from_file_unchecked(&engine, &file_path) }
203        } else {
204            unsafe { Module::deserialize_from_file(&engine, &file_path) }
205        };
206        match result {
207            Ok(module) => {
208                let module_size = module_size(&file_path)?;
209                Ok(Some(CachedModule {
210                    module,
211                    engine,
212                    size_estimate: module_size + engine_size_estimate(),
213                }))
214            }
215            Err(DeserializeError::Io(err)) => match err.kind() {
216                io::ErrorKind::NotFound => Ok(None),
217                _ => Err(VmError::cache_err(format!(
218                    "Error opening module file: {err}"
219                ))),
220            },
221            Err(err) => Err(VmError::cache_err(format!(
222                "Error deserializing module: {err}"
223            ))),
224        }
225    }
226
227    /// Stores a serialized module to the file system. Returns the size of the serialized module.
228    pub fn store(&mut self, checksum: &Checksum, module: &Module) -> VmResult<usize> {
229        mkdir_p(&self.modules_path)
230            .map_err(|_e| VmError::cache_err("Error creating modules directory"))?;
231
232        let path = self.module_file(checksum);
233        catch_unwind(|| {
234            module
235                .serialize_to_file(&path)
236                .map_err(|e| VmError::cache_err(format!("Error writing module to disk: {e}")))
237        })
238        .map_err(|_| VmError::cache_err("Could not write module to disk"))??;
239        let module_size = module_size(&path)?;
240        Ok(module_size)
241    }
242
243    /// Removes a serialized module from the file system.
244    ///
245    /// Returns true if the file existed and false if the file did not exist.
246    pub fn remove(&mut self, checksum: &Checksum) -> VmResult<bool> {
247        let file_path = self.module_file(checksum);
248
249        if file_path.exists() {
250            fs::remove_file(file_path)
251                .map_err(|_e| VmError::cache_err("Error deleting module from disk"))?;
252            Ok(true)
253        } else {
254            Ok(false)
255        }
256    }
257}
258
259/// Returns the size of the module stored on disk
260fn module_size(module_path: &Path) -> VmResult<usize> {
261    let module_size: usize = module_path
262        .metadata()
263        .map_err(|_e| VmError::cache_err("Error getting file metadata"))? // ensure error message is not system specific
264        .len()
265        .try_into()
266        .expect("Could not convert file size to usize");
267    Ok(module_size)
268}
269
270/// Creates an identifier for the Wasmer `Target` that is used for
271/// cache invalidation. The output is reasonable human friendly to be usable
272/// in file path component.
273fn target_id(target: &Target) -> String {
274    // Use a custom Hasher implementation to avoid randomization.
275    let mut deterministic_hasher = crc32fast::Hasher::new();
276    target.hash(&mut deterministic_hasher);
277    let hash = deterministic_hasher.finalize();
278    format!("{}-{:08X}", target.triple(), hash) // print 4 byte hash as 8 hex characters
279}
280
281/// The path to the latest version of the modules.
282fn modules_path(base_path: &Path, wasmer_module_version: u32, target: &Target) -> PathBuf {
283    let version_dir = format!(
284        "{}-wasmer{wasmer_module_version}",
285        module_version_discriminator()
286    );
287    let target_dir = target_id(target);
288    base_path.join(version_dir).join(target_dir)
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use crate::wasm_backend::{compile, make_compiling_engine};
295    use tempfile::TempDir;
296    use wasmer::{imports, Instance as WasmerInstance, Store};
297    use wasmer_middlewares::metering::set_remaining_points;
298
299    const TESTING_MEMORY_LIMIT: Option<Size> = Some(Size::mebi(16));
300    const TESTING_GAS_LIMIT: u64 = 500_000;
301
302    const SOME_WAT: &str = r#"(module
303        (type $t0 (func (param i32) (result i32)))
304        (func $add_one (export "add_one") (type $t0) (param $p0 i32) (result i32)
305            local.get $p0
306            i32.const 1
307            i32.add))
308    "#;
309
310    #[test]
311    fn file_system_cache_run() {
312        let tmp_dir = TempDir::new().unwrap();
313        let mut cache = unsafe { FileSystemCache::new(tmp_dir.path(), false).unwrap() };
314
315        // Create module
316        let wasm = wat::parse_str(SOME_WAT).unwrap();
317        let checksum = Checksum::generate(&wasm);
318
319        // Module does not exist
320        let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
321        assert!(cached.is_none());
322
323        // Store module
324        let compiling_engine = make_compiling_engine(TESTING_MEMORY_LIMIT);
325        let module = compile(&compiling_engine, &wasm).unwrap();
326        cache.store(&checksum, &module).unwrap();
327
328        // Load module
329        let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
330        assert!(cached.is_some());
331
332        // Check the returned module is functional.
333        // This is not really testing the cache API but better safe than sorry.
334        {
335            let CachedModule {
336                module: cached_module,
337                engine: runtime_engine,
338                size_estimate,
339            } = cached.unwrap();
340            assert_eq!(
341                size_estimate,
342                module.serialize().unwrap().len() + 10240 /* engine size estimate */
343            );
344            let import_object = imports! {};
345            let mut store = Store::new(runtime_engine);
346            let instance = WasmerInstance::new(&mut store, &cached_module, &import_object).unwrap();
347            set_remaining_points(&mut store, &instance, TESTING_GAS_LIMIT);
348            let add_one = instance.exports.get_function("add_one").unwrap();
349            let result = add_one.call(&mut store, &[42.into()]).unwrap();
350            assert_eq!(result[0].unwrap_i32(), 43);
351        }
352    }
353
354    #[test]
355    fn file_system_cache_store_uses_expected_path() {
356        let tmp_dir = TempDir::new().unwrap();
357        let mut cache = unsafe { FileSystemCache::new(tmp_dir.path(), false).unwrap() };
358
359        // Create module
360        let wasm = wat::parse_str(SOME_WAT).unwrap();
361        let checksum = Checksum::generate(&wasm);
362
363        // Store module
364        let engine = make_compiling_engine(TESTING_MEMORY_LIMIT);
365        let module = compile(&engine, &wasm).unwrap();
366        cache.store(&checksum, &module).unwrap();
367
368        let discriminator = raw_module_version_discriminator();
369        let mut globber = glob::glob(&format!(
370            "{}/{}-wasmer8/**/{}.module",
371            tmp_dir.path().to_string_lossy(),
372            discriminator,
373            checksum
374        ))
375        .expect("Failed to read glob pattern");
376        let file_path = globber.next().unwrap().unwrap();
377        let _serialized_module = fs::read(file_path).unwrap();
378    }
379
380    #[test]
381    fn file_system_cache_remove_works() {
382        let tmp_dir = TempDir::new().unwrap();
383        let mut cache = unsafe { FileSystemCache::new(tmp_dir.path(), false).unwrap() };
384
385        // Create module
386        let wasm = wat::parse_str(SOME_WAT).unwrap();
387        let checksum = Checksum::generate(&wasm);
388
389        // Store module
390        let compiling_engine = make_compiling_engine(TESTING_MEMORY_LIMIT);
391        let module = compile(&compiling_engine, &wasm).unwrap();
392        cache.store(&checksum, &module).unwrap();
393
394        // It's there
395        assert!(cache
396            .load(&checksum, TESTING_MEMORY_LIMIT)
397            .unwrap()
398            .is_some());
399
400        // Remove module
401        let existed = cache.remove(&checksum).unwrap();
402        assert!(existed);
403
404        // it's gone now
405        assert!(cache
406            .load(&checksum, TESTING_MEMORY_LIMIT)
407            .unwrap()
408            .is_none());
409
410        // Remove again
411        let existed = cache.remove(&checksum).unwrap();
412        assert!(!existed);
413    }
414
415    #[test]
416    fn target_id_works() {
417        let triple = wasmer::Triple {
418            architecture: wasmer::Architecture::X86_64,
419            vendor: target_lexicon::Vendor::Nintendo,
420            operating_system: target_lexicon::OperatingSystem::Fuchsia,
421            environment: target_lexicon::Environment::Gnu,
422            binary_format: target_lexicon::BinaryFormat::Coff,
423        };
424        let target = Target::new(triple.clone(), wasmer::CpuFeature::POPCNT.into());
425        let id = target_id(&target);
426        assert_eq!(id, "x86_64-nintendo-fuchsia-gnu-coff-719EEF18");
427        // Changing CPU features changes the hash part
428        let target = Target::new(triple, wasmer::CpuFeature::AVX512DQ.into());
429        let id = target_id(&target);
430        assert_eq!(id, "x86_64-nintendo-fuchsia-gnu-coff-E3770FA3");
431
432        // Works for durrect target (hashing is deterministic);
433        let target = Target::default();
434        let id1 = target_id(&target);
435        let id2 = target_id(&target);
436        assert_eq!(id1, id2);
437    }
438
439    #[test]
440    fn modules_path_works() {
441        let base = PathBuf::from("modules");
442        let triple = wasmer::Triple {
443            architecture: wasmer::Architecture::X86_64,
444            vendor: target_lexicon::Vendor::Nintendo,
445            operating_system: target_lexicon::OperatingSystem::Fuchsia,
446            environment: target_lexicon::Environment::Gnu,
447            binary_format: target_lexicon::BinaryFormat::Coff,
448        };
449        let target = Target::new(triple, wasmer::CpuFeature::POPCNT.into());
450        let p = modules_path(&base, 17, &target);
451        let discriminator = raw_module_version_discriminator();
452
453        assert_eq!(
454            p.as_os_str(),
455            if cfg!(windows) {
456                format!(
457                    "modules\\{discriminator}-wasmer17\\x86_64-nintendo-fuchsia-gnu-coff-719EEF18"
458                )
459            } else {
460                format!(
461                    "modules/{discriminator}-wasmer17/x86_64-nintendo-fuchsia-gnu-coff-719EEF18"
462                )
463            }
464            .as_str()
465        );
466    }
467
468    #[test]
469    fn module_version_discriminator_stays_the_same() {
470        let v1 = raw_module_version_discriminator();
471        let v2 = raw_module_version_discriminator();
472        let v3 = raw_module_version_discriminator();
473        let v4 = raw_module_version_discriminator();
474
475        assert_eq!(v1, v2);
476        assert_eq!(v2, v3);
477        assert_eq!(v3, v4);
478    }
479
480    #[test]
481    fn module_version_static() {
482        let version = raw_module_version_discriminator();
483        assert_eq!(version, "6c36aacf76");
484    }
485}