1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
//! Data segments within a wasm module.

use crate::emit::{Emit, EmitContext, Section};
use crate::ir::Value;
use crate::parse::IndicesToIds;
use crate::tombstone_arena::{Id, Tombstone, TombstoneArena};
use crate::{GlobalId, InitExpr, MemoryId, Module, Result, ValType};
use failure::{bail, ResultExt};

/// A passive element segment identifier
pub type DataId = Id<Data>;

/// A data segment.
///
/// Every data segment has an associated value. This value gets copied into a
/// memory. It is either automatically copied into a specific memory at Wasm
/// instantiation time (active data segments) or dynamically copied into a
/// memory (or memories) via the `memory.init` instruction (passive data
/// segments). See the `kind` member and `DataKind` type for more details on the
/// active/passive distinction.
#[derive(Debug)]
pub struct Data {
    id: DataId,
    /// What kind of data segment is this? Passive or active?
    pub kind: DataKind,
    /// The data payload of this data segment.
    pub value: Vec<u8>,
}

/// The kind of data segment: passive or active.
#[derive(Debug)]
pub enum DataKind {
    /// An active data segment that is automatically initialized at some address
    /// in a static memory.
    Active(ActiveData),
    /// A passive data segment that must be manually initialized at a dynamic
    /// address via the `memory.init` instruction (perhaps multiple times in
    /// multiple different memories) and then manually freed when it's no longer
    /// needed via the `data.drop` instruction.
    Passive,
}

/// The parts of a data segment that are only present in active data segments.
#[derive(Clone, Debug)]
pub struct ActiveData {
    /// The memory that this active data segment will be automatically
    /// initialized in.
    pub memory: MemoryId,
    /// The memory location where this active data segment will be automatically
    /// initialized.
    pub location: ActiveDataLocation,
}

/// The memory location where an active data segment will be automatically
/// initialized.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ActiveDataLocation {
    /// A static, absolute address within the memory.
    Absolute(u32),
    /// A relative address (expressed as a global's value) within the memory.
    Relative(GlobalId),
}

impl Tombstone for Data {
    fn on_delete(&mut self) {
        self.value = Vec::new();
    }
}

impl Data {
    /// Returns the id of this passive data segment
    pub fn id(&self) -> DataId {
        self.id
    }

    /// Is this a passive data segment?
    pub fn is_passive(&self) -> bool {
        match self.kind {
            DataKind::Passive => true,
            _ => false,
        }
    }
}

/// All passive data sections of a wasm module, used to initialize memories via
/// various instructions.
#[derive(Debug, Default)]
pub struct ModuleData {
    arena: TombstoneArena<Data>,
}

impl ModuleData {
    /// Get an element associated with an ID
    pub fn get(&self, id: DataId) -> &Data {
        &self.arena[id]
    }

    /// Get an element associated with an ID
    pub fn get_mut(&mut self, id: DataId) -> &mut Data {
        &mut self.arena[id]
    }

    /// Delete a passive data segment from this module.
    ///
    /// It is up to you to ensure that all references to the deleted segment are
    /// removed, eg `memory.init` and `data.drop` expressions.
    pub fn delete(&mut self, id: DataId) {
        self.arena.delete(id);
    }

    /// Get a shared reference to this module's passive elements.
    pub fn iter(&self) -> impl Iterator<Item = &Data> {
        self.arena.iter().map(|(_, f)| f)
    }

    // Note that this is inaccordance with the upstream bulk memory proposal to
    // WebAssembly and isn't currently part of the WebAssembly standard.
    pub(crate) fn emit_data_count(&self, cx: &mut EmitContext) {
        #[cfg(feature = "parallel")]
        use rayon::iter::ParallelIterator;

        if self.arena.len() == 0 {
            return;
        }

        let mut count = 0;
        let mut any_passive = false;

        for data in self.iter() {
            cx.indices.set_data_index(data.id(), count as u32);
            count += 1;
            any_passive |= data.is_passive();
        }

        // We only emit the `DataCount` section if there are passive data
        // segments, or `data.drop` or `memory.init` instructions that use
        // (passive or active) data segments. Yes! You can use `data.drop` and
        // `memory.init` with active data segments, it just results in a runtime
        // error.
        //
        // The key is that we don't want to generate this section for MVP Wasm,
        // which has niether passive data segments, nor the `data.drop` and
        // `memory.init` instructions.
        let funcs = &cx.module.funcs;
        if any_passive
            || maybe_parallel!(funcs.(iter_local | par_iter_local))
                .any(|(_, f)| !f.used_data_segments().is_empty())
        {
            cx.start_section(Section::DataCount).encoder.usize(count);
        }
    }
}

impl Module {
    /// Called when we see the data section section to create an id for all data
    /// indices
    ///
    /// Note that during function parsing all data indices less than `count` are
    /// considered valid, and it's only afterwards that we discover whether
    /// they're actually passive or not, and that property is checked during
    /// validation.
    pub(crate) fn reserve_data(&mut self, count: u32, ids: &mut IndicesToIds) {
        log::debug!("reserving space for {} data segments", count);
        for _ in 0..count {
            ids.push_data(self.data.arena.alloc_with_id(|id| Data {
                id,
                // NB: We'll update the `value` and `kind` once we actually
                // parse the data segments.
                value: Vec::new(),
                kind: DataKind::Passive,
            }));
        }
    }

    /// Parses a raw wasm section into a fully-formed `ModuleData` instance.
    pub(crate) fn parse_data(
        &mut self,
        section: wasmparser::DataSectionReader,
        ids: &IndicesToIds,
        data_count: Option<u32>,
    ) -> Result<()> {
        log::debug!("parse data section");
        if let Some(count) = data_count {
            if count != section.get_count() {
                bail!("data count section mismatches actual data section");
            }
        }
        for (i, segment) in section.into_iter().enumerate() {
            let segment = segment?;

            // If we had the `DataCount` section, then we already pre-allocated
            // a data segment. Otherwise, allocate one now.
            let id = if data_count.is_some() {
                ids.get_data(i as u32)?
            } else {
                self.data.arena.alloc_with_id(|id| Data {
                    id,
                    value: Vec::new(),
                    kind: DataKind::Passive,
                })
            };
            let data = self.data.get_mut(id);

            match segment.kind {
                wasmparser::DataKind::Passive => {
                    data.value = segment.data.to_vec();
                    data.kind = DataKind::Passive;
                }
                wasmparser::DataKind::Active {
                    memory_index,
                    init_expr,
                } => {
                    data.value = segment.data.to_vec();

                    let memory_id = ids.get_memory(memory_index)?;
                    let memory = self.memories.get_mut(memory_id);
                    memory.data_segments.insert(data.id);

                    let offset = InitExpr::eval(&init_expr, ids)
                        .with_context(|_e| format!("in segment {}", i))?;
                    data.kind = DataKind::Active(ActiveData {
                        memory: memory_id,
                        location: match offset {
                            InitExpr::Value(Value::I32(n)) => {
                                ActiveDataLocation::Absolute(n as u32)
                            }
                            InitExpr::Global(global)
                                if self.globals.get(global).ty == ValType::I32 =>
                            {
                                ActiveDataLocation::Relative(global)
                            }
                            _ => bail!("non-i32 constant in segment {}", i),
                        },
                    });
                }
            }
        }
        Ok(())
    }
}

impl Emit for ModuleData {
    fn emit(&self, cx: &mut EmitContext) {
        log::debug!("emit data section");
        if self.arena.len() == 0 {
            return;
        }

        let mut cx = cx.start_section(Section::Data);
        cx.encoder.usize(self.arena.len());

        // The encodings here are with respect to the bulk memory proposal, but
        // should be backwards compatible with the current MVP WebAssembly spec
        // so long as the only memory 0 is used.
        for data in self.iter() {
            match data.kind {
                DataKind::Passive => {
                    cx.encoder.byte(0x01);
                    cx.encoder.bytes(&data.value);
                }
                DataKind::Active(ref a) => {
                    let index = cx.indices.get_memory_index(a.memory);
                    if index == 0 {
                        cx.encoder.byte(0x00);
                    } else {
                        cx.encoder.byte(0x02);
                        cx.encoder.u32(index);
                    }
                    let init_expr = match a.location {
                        ActiveDataLocation::Absolute(a) => InitExpr::Value(Value::I32(a as i32)),
                        ActiveDataLocation::Relative(g) => InitExpr::Global(g),
                    };
                    init_expr.emit(&mut cx);
                    cx.encoder.bytes(&data.value);
                }
            }
        }
    }
}