wasmtime_wizer/
snapshot.rs

1use crate::InstanceState;
2use crate::info::ModuleContext;
3use rayon::iter::{IntoParallelIterator, ParallelExtend, ParallelIterator};
4use std::convert::TryFrom;
5use std::ops::Range;
6
7/// The maximum number of data segments that we will emit. Most
8/// engines support more than this, but we want to leave some
9/// headroom.
10const MAX_DATA_SEGMENTS: usize = 10_000;
11
12/// A "snapshot" of Wasm state from its default value after having been initialized.
13pub struct Snapshot {
14    /// Maps global index to its initialized value.
15    ///
16    /// Note that this only tracks defined mutable globals, not all globals.
17    pub globals: Vec<(u32, SnapshotVal)>,
18
19    /// A new minimum size for each memory (in units of pages).
20    pub memory_mins: Vec<u64>,
21
22    /// Segments of non-zero memory.
23    pub data_segments: Vec<DataSegment>,
24}
25
26/// A value from a snapshot, currently a subset of wasm types that aren't
27/// reference types.
28#[expect(missing_docs, reason = "self-describing variants")]
29pub enum SnapshotVal {
30    I32(i32),
31    I64(i64),
32    F32(u32),
33    F64(u64),
34    V128(u128),
35}
36
37/// A data segment initializer for a memory.
38#[derive(Clone)]
39pub struct DataSegment {
40    /// The index of this data segment's memory.
41    pub memory_index: u32,
42
43    /// This data segment's initialized memory that it originated from.
44    pub data: Vec<u8>,
45
46    /// The offset within the memory that `data` should be copied to.
47    pub offset: u64,
48
49    /// Whether or not `memory_index` is a 64-bit memory.
50    pub is64: bool,
51}
52
53/// Snapshot the given instance's globals, memories, and instances from the Wasm
54/// defaults.
55//
56// TODO: when we support reference types, we will have to snapshot tables.
57pub async fn snapshot(module: &ModuleContext<'_>, ctx: &mut impl InstanceState) -> Snapshot {
58    log::debug!("Snapshotting the initialized state");
59
60    let globals = snapshot_globals(module, ctx).await;
61    let (memory_mins, data_segments) = snapshot_memories(module, ctx).await;
62
63    Snapshot {
64        globals,
65        memory_mins,
66        data_segments,
67    }
68}
69
70/// Get the initialized values of all globals.
71async fn snapshot_globals(
72    module: &ModuleContext<'_>,
73    ctx: &mut impl InstanceState,
74) -> Vec<(u32, SnapshotVal)> {
75    log::debug!("Snapshotting global values");
76
77    let mut ret = Vec::new();
78    for (i, name) in module.defined_global_exports.as_ref().unwrap().iter() {
79        let val = ctx.global_get(&name).await;
80        ret.push((*i, val));
81    }
82    ret
83}
84
85#[derive(Clone)]
86struct DataSegmentRange {
87    memory_index: u32,
88    range: Range<usize>,
89}
90
91impl DataSegmentRange {
92    /// What is the gap between two consecutive data segments?
93    ///
94    /// `self` must be in front of `other` and they must not overlap with each
95    /// other.
96    fn gap(&self, other: &Self) -> usize {
97        debug_assert_eq!(self.memory_index, other.memory_index);
98        debug_assert!(self.range.end <= other.range.start);
99        other.range.start - self.range.end
100    }
101
102    /// Merge two consecutive data segments.
103    ///
104    /// `self` must be in front of `other` and they must not overlap with each
105    /// other.
106    fn merge(&mut self, other: &Self) {
107        debug_assert_eq!(self.memory_index, other.memory_index);
108        debug_assert!(self.range.end <= other.range.start);
109        self.range.end = other.range.end;
110    }
111}
112
113/// Find the initialized minimum page size of each memory, as well as all
114/// regions of non-zero memory.
115async fn snapshot_memories(
116    module: &ModuleContext<'_>,
117    instance: &mut impl InstanceState,
118) -> (Vec<u64>, Vec<DataSegment>) {
119    log::debug!("Snapshotting memories");
120
121    // Find and record non-zero regions of memory (in parallel).
122    let mut memory_mins = vec![];
123    let mut data_segments = vec![];
124    let iter = module
125        .defined_memories()
126        .zip(module.defined_memory_exports.as_ref().unwrap());
127    for ((memory_index, ty), name) in iter {
128        instance
129            .memory_contents(&name, |memory| {
130                let page_size = 1 << ty.page_size_log2.unwrap_or(16);
131                let num_wasm_pages = memory.len() / page_size;
132                memory_mins.push(num_wasm_pages as u64);
133
134                let memory_data = &memory[..];
135
136                // Consider each Wasm page in parallel. Create data segments for each
137                // region of non-zero memory.
138                data_segments.par_extend((0..num_wasm_pages).into_par_iter().flat_map(|i| {
139                    let page_end = (i + 1) * page_size;
140                    let mut start = i * page_size;
141                    let mut segments = vec![];
142                    while start < page_end {
143                        let nonzero = match memory_data[start..page_end]
144                            .iter()
145                            .position(|byte| *byte != 0)
146                        {
147                            None => break,
148                            Some(i) => i,
149                        };
150                        start += nonzero;
151                        let end = memory_data[start..page_end]
152                            .iter()
153                            .position(|byte| *byte == 0)
154                            .map_or(page_end, |zero| start + zero);
155                        segments.push(DataSegmentRange {
156                            memory_index,
157                            range: start..end,
158                        });
159                        start = end;
160                    }
161                    segments
162                }));
163            })
164            .await;
165    }
166
167    if data_segments.is_empty() {
168        return (memory_mins, Vec::new());
169    }
170
171    // Sort data segments to enforce determinism in the face of the
172    // parallelism above.
173    data_segments.sort_by_key(|s| (s.memory_index, s.range.start));
174
175    // Merge any contiguous segments (caused by spanning a Wasm page boundary,
176    // and therefore created in separate logical threads above) or pages that
177    // are within four bytes of each other. Four because this is the minimum
178    // overhead of defining a new active data segment: one for the memory index
179    // LEB, two for the memory offset init expression (one for the `i32.const`
180    // opcode and another for the constant immediate LEB), and finally one for
181    // the data length LEB).
182    const MIN_ACTIVE_SEGMENT_OVERHEAD: usize = 4;
183    let mut merged_data_segments = Vec::with_capacity(data_segments.len());
184    merged_data_segments.push(data_segments[0].clone());
185    for b in &data_segments[1..] {
186        let a = merged_data_segments.last_mut().unwrap();
187
188        // Only merge segments for the same memory.
189        if a.memory_index != b.memory_index {
190            merged_data_segments.push(b.clone());
191            continue;
192        }
193
194        // Only merge segments if they are contiguous or if it is definitely
195        // more size efficient than leaving them apart.
196        let gap = a.gap(b);
197        if gap > MIN_ACTIVE_SEGMENT_OVERHEAD {
198            merged_data_segments.push(b.clone());
199            continue;
200        }
201
202        // Okay, merge them together into `a` (so that the next iteration can
203        // merge it with its predecessor) and then omit `b`!
204        a.merge(b);
205    }
206
207    remove_excess_segments(&mut merged_data_segments);
208
209    // With the final set of data segments now extract the actual data of each
210    // memory, copying it into a `DataSegment`, to return the final list of
211    // segments.
212    //
213    // Here the memories are iterated over again and, in tandem, the
214    // `merged_data_segments` list is traversed to extract a `DataSegment` for
215    // each range that `merged_data_segments` indicates. This relies on
216    // `merged_data_segments` being a sorted list by `memory_index` at least.
217    let mut final_data_segments = Vec::with_capacity(merged_data_segments.len());
218    let mut merged = merged_data_segments.iter().peekable();
219    let iter = module
220        .defined_memories()
221        .zip(module.defined_memory_exports.as_ref().unwrap());
222    for ((memory_index, ty), name) in iter {
223        instance
224            .memory_contents(&name, |memory| {
225                while let Some(segment) = merged.next_if(|s| s.memory_index == memory_index) {
226                    final_data_segments.push(DataSegment {
227                        memory_index,
228                        data: memory[segment.range.clone()].to_vec(),
229                        offset: segment.range.start.try_into().unwrap(),
230                        is64: ty.memory64,
231                    });
232                }
233            })
234            .await;
235    }
236    assert!(merged.next().is_none());
237
238    (memory_mins, final_data_segments)
239}
240
241/// Engines apply a limit on how many segments a module may contain, and Wizer
242/// can run afoul of it. When that happens, we need to merge data segments
243/// together until our number of data segments fits within the limit.
244fn remove_excess_segments(merged_data_segments: &mut Vec<DataSegmentRange>) {
245    if merged_data_segments.len() < MAX_DATA_SEGMENTS {
246        return;
247    }
248
249    // We need to remove `excess` number of data segments.
250    let excess = merged_data_segments.len() - MAX_DATA_SEGMENTS;
251
252    #[derive(Clone, Copy, PartialEq, Eq)]
253    struct GapIndex {
254        gap: u32,
255        // Use a `u32` instead of `usize` to fit `GapIndex` within a word on
256        // 64-bit systems, using less memory.
257        index: u32,
258    }
259
260    // Find the gaps between the start of one segment and the next (if they are
261    // both in the same memory). We will merge the `excess` segments with the
262    // smallest gaps together. Because they are the smallest gaps, this will
263    // bloat the size of our data segment the least.
264    let mut smallest_gaps = Vec::with_capacity(merged_data_segments.len() - 1);
265    for (index, w) in merged_data_segments.windows(2).enumerate() {
266        if w[0].memory_index != w[1].memory_index {
267            continue;
268        }
269        let gap = match u32::try_from(w[0].gap(&w[1])) {
270            Ok(gap) => gap,
271            // If the gap is larger than 4G then don't consider these two data
272            // segments for merging and assume there's enough other data
273            // segments close enough together to still consider for merging to
274            // get under the limit.
275            Err(_) => continue,
276        };
277        let index = u32::try_from(index).unwrap();
278        smallest_gaps.push(GapIndex { gap, index });
279    }
280    smallest_gaps.sort_unstable_by_key(|g| g.gap);
281    smallest_gaps.truncate(excess);
282
283    // Now merge the chosen segments together in reverse index order so that
284    // merging two segments doesn't mess up the index of the next segments we
285    // will to merge.
286    smallest_gaps.sort_unstable_by(|a, b| a.index.cmp(&b.index).reverse());
287    for GapIndex { index, .. } in smallest_gaps {
288        let index = usize::try_from(index).unwrap();
289        let [a, b] = merged_data_segments
290            .get_disjoint_mut([index, index + 1])
291            .unwrap();
292        a.merge(b);
293
294        // Okay to use `swap_remove` here because, even though it makes
295        // `merged_data_segments` unsorted, the segments are still sorted within
296        // the range `0..index` and future iterations will only operate within
297        // that subregion because we are iterating over largest to smallest
298        // indices.
299        merged_data_segments.swap_remove(index + 1);
300    }
301
302    // Finally, sort the data segments again so that our output is
303    // deterministic.
304    merged_data_segments.sort_by_key(|s| (s.memory_index, s.range.start));
305}
wasmtime_wizer/snapshot.rs

wasmtime_wizer/
snapshot.rs