1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
mod matching;
mod parse;

use matching::MatchingContext;
use parity_wasm::elements::*;
use parse::ModuleInfo;
use std::convert::TryFrom;
use thiserror::Error;

#[derive(Debug)]
pub struct Remapper<'wasm> {
    input: &'wasm [u8],
    reference: &'wasm [u8],
    ingore_constant_data_section_pointers: bool,
    require_exact_function_locals: bool,
    matching_threshold: f32,
}

impl<'wasm> Remapper<'wasm> {
    pub fn builder() -> RemapperBuilder<'wasm> {
        RemapperBuilder {
            input: None,
            reference: None,
            ingore_constant_data_section_pointers: true,
            require_exact_function_locals: true,
            matching_threshold: None,
        }
    }

    pub fn remap(&self) -> Result<RemapperOutput, RemapperError> {
        let input: Module = parity_wasm::deserialize_buffer(self.input)
            .map_err(|_| RemapperError::InvalidInputBinary)?;
        let reference: Module = parity_wasm::deserialize_buffer::<Module>(self.reference)
            .map_err(|_| RemapperError::InvalidReferenceBinary)?
            .parse_names()
            .map_err(|_| RemapperError::InvalidReferenceBinary)?;
        let input_info = ModuleInfo::try_from(&input)?;
        let reference_info = ModuleInfo::try_from(&reference)?;

        let mut data_regions = input_info.data_regions.clone();
        data_regions.extend(reference_info.data_regions.clone());

        let match_ctx = MatchingContext::new(&data_regions, &self);
        let name_map = self.build_name_map(&input_info, &reference_info, match_ctx);
        let name_section = self.build_name_section(&input, name_map.clone());

        let mut output_module = input.clone();
        output_module
            .insert_section(Section::Name(name_section))
            .expect("unable to insert custom name section into output module");
        let output_module_buf =
            parity_wasm::serialize(output_module).expect("unable to serialize output module");

        Ok(RemapperOutput {
            output: output_module_buf,
            names: name_map,
        })
    }

    fn build_name_map(
        &self,
        input_info: &ModuleInfo,
        reference_info: &ModuleInfo,
        match_ctx: MatchingContext,
    ) -> NameMap {
        let mut name_map = NameMap::with_capacity(input_info.functions.len());
        let mappings: Vec<_> = input_info
            .functions
            .iter()
            .map(|input_func| {
                let mut matches = match_ctx.find_matches(
                    input_func,
                    &reference_info.functions,
                    self.matching_threshold,
                );
                matches.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
                (input_func, matches)
            })
            .filter(|(_, match_weights)| match_weights.len() > 0)
            .collect();

        for (function, match_weights) in mappings {
            let best_name = match_weights
                .first()
                .and_then(|(func, _)| func.name.clone());

            if let Some(best_name) = best_name {
                name_map.insert(function.id, best_name);
            }
        }

        name_map
    }

    fn build_name_section(&self, module: &Module, name_map: NameMap) -> NameSection {
        let mut buffer = Vec::new();
        name_map
            .serialize(&mut buffer)
            .expect("unable to build name section"); // This should never happen
        let name_subsection =
            FunctionNameSubsection::deserialize(module, &mut std::io::Cursor::new(buffer))
                .expect("unable to build name section");
        NameSection::new(None, Some(name_subsection), None)
    }
}

#[derive(Debug)]
pub struct RemapperBuilder<'wasm> {
    input: Option<&'wasm [u8]>,
    reference: Option<&'wasm [u8]>,
    ingore_constant_data_section_pointers: bool,
    require_exact_function_locals: bool,
    matching_threshold: Option<f32>,
}

impl<'wasm> RemapperBuilder<'wasm> {
    /// The binary representation of the wasm that should be remapped.
    pub fn input(mut self, input: &'wasm [u8]) -> Self {
        self.input = Some(input);
        self
    }

    /// A wasm binary with debug names included that can be used as a reference.
    pub fn reference(mut self, reference: &'wasm [u8]) -> Self {
        self.reference = Some(reference);
        self
    }

    /// If constants that appear to be pointers into a wasm's data section should
    /// be ignored when comparing if two instructions match. Enabled by default.
    pub fn ingore_constant_data_section_pointers(mut self, enabled: bool) -> Self {
        self.ingore_constant_data_section_pointers = enabled;
        self
    }

    /// If two functions need to have the exact same locals for them to be considered
    /// a potential match. Enabled by default.
    pub fn require_exact_function_locals(mut self, enabled: bool) -> Self {
        self.require_exact_function_locals = enabled;
        self
    }

    /// How closely two functions have to match on a scale of 0 to 1 for them to be
    /// considered a match good enough to be remapped with.
    pub fn matching_threshold(mut self, threshold: f32) -> Self {
        self.matching_threshold = Some(threshold);
        self
    }

    pub fn build(&self) -> Result<Remapper, RemapperError> {
        Ok(Remapper {
            input: self.input.ok_or(RemapperError::InvalidInputBinary)?,
            reference: self
                .reference
                .ok_or(RemapperError::InvalidReferenceBinary)?,
            ingore_constant_data_section_pointers: self.ingore_constant_data_section_pointers,
            require_exact_function_locals: self.require_exact_function_locals,
            matching_threshold: self.matching_threshold.unwrap_or(0.0),
        })
    }
}

#[derive(Debug)]
pub struct RemapperOutput {
    /// A wasm binary with debug symbols added from the reference binary.
    pub output: Vec<u8>,
    /// A map of function ids to their new names in the output binary.
    pub names: NameMap,
}

#[derive(Debug, Error)]
pub enum RemapperError {
    #[error("input wasm not a valid wasm binary")]
    InvalidInputBinary,
    #[error("reference wasm not a valid wasm binary")]
    InvalidReferenceBinary,
    #[error("unable to parse {0}")]
    Parse(#[from] parse::ParseError),
}

#[cfg(test)]
mod tests {

    use super::*;
    use std::fs;

    fn read_wasm(name: &str) -> Vec<u8> {
        fs::read(format!("test-cases/{}.wasm", name)).expect("unable to open test wasm")
    }

    #[test]
    fn test_invalid_input() {
        let reference = read_wasm("simple.reference");
        let result = Remapper::builder()
            .input(&[])
            .reference(&reference)
            .build()
            .unwrap()
            .remap();

        match result {
            Err(RemapperError::InvalidInputBinary) => {}
            _ => panic!("unexpected result"),
        }
    }

    #[test]
    fn test_invalid_empty_reference() {
        let input = read_wasm("simple.input");
        let result = Remapper::builder()
            .input(&input)
            .reference(&[])
            .build()
            .unwrap()
            .remap();

        match result {
            Err(RemapperError::InvalidReferenceBinary) => {}
            _ => panic!("unexpected result"),
        }
    }

    #[test]
    fn test_remap_simple() {
        let input = read_wasm("simple.input");
        let reference = read_wasm("simple.reference");
        let RemapperOutput { names, output } = Remapper::builder()
            .input(&input)
            .reference(&reference)
            .build()
            .unwrap()
            .remap()
            .unwrap();

        // Assert name map of the remapped output is valid
        assert_eq!(names.len(), 2);
        assert_eq!(names.get(0).unwrap(), "square");
        assert_eq!(names.get(1).unwrap(), "squareTen");

        let output_module = parity_wasm::deserialize_buffer::<Module>(&output)
            .expect("invalid output binary")
            .parse_names()
            .unwrap();
        let output_name_map = output_module
            .names_section()
            .expect("no name section in output module")
            .functions()
            .expect("no function name subsection")
            .names();

        // Ensure that the proveded output names match the actual names in the binary
        assert_eq!(&names, output_name_map);
    }
}