1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//! Wasm DWARF transformation utilites.
//!
//! To use this, see functions like [`emit_debugsections_image`] and the
//! [`types`] module.
//!
//! Many of the types exposed are equivilent to those in the `cranelift-codegen`
//! crate, but maintaining exact compatibilty with `cranelift-codegen` is not
//! a goal of this crate.
//!
//! Types may change in future releases if changes are needed to make this crate
//! more generic.
//!
//! If you're interested in integrating your runtime with debuggers and this
//! crate doesn't meet your needs, please file an issue and we can discuss how
//! to adapt this crate to behave as needed.
//!
//! More documentation to come in future releases.

#![allow(clippy::cast_ptr_alignment)]

use crate::types::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges};
use anyhow::Error;
use faerie::{Artifact, Decl};
use more_asserts::assert_gt;
use target_lexicon::{BinaryFormat, Triple};

pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo};
pub use crate::transform::transform_dwarf;
pub use crate::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver};

mod gc;
mod read_debuginfo;
mod transform;
pub mod types;
mod write_debuginfo;

struct FunctionRelocResolver {}
impl SymbolResolver for FunctionRelocResolver {
    fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol {
        let name = format!("_wasm_function_{}", symbol);
        ResolvedSymbol::Reloc { name, addend }
    }
}

pub fn emit_debugsections(
    obj: &mut Artifact,
    vmctx_info: &ModuleVmctxInfo,
    pointer_width_bytes: u8,
    debuginfo_data: &DebugInfoData,
    at: &ModuleAddressMap,
    ranges: &ValueLabelsRanges,
) -> Result<(), Error> {
    let resolver = FunctionRelocResolver {};
    let dwarf = transform_dwarf(pointer_width_bytes, debuginfo_data, at, vmctx_info, ranges)?;
    emit_dwarf(obj, dwarf, &resolver)?;
    Ok(())
}

struct ImageRelocResolver<'a> {
    func_offsets: &'a Vec<u64>,
}

impl<'a> SymbolResolver for ImageRelocResolver<'a> {
    fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol {
        let func_start = self.func_offsets[symbol];
        ResolvedSymbol::PhysicalAddress(func_start + addend as u64)
    }
}

/// Top level function to get the debug information to give to a debugger.
pub fn emit_debugsections_image(
    triple: Triple,
    pointer_width_bytes: u8,
    debuginfo_data: &DebugInfoData,
    vmctx_info: &ModuleVmctxInfo,
    at: &ModuleAddressMap,
    ranges: &ValueLabelsRanges,
    funcs: &[(*const u8, usize)],
) -> Result<Vec<u8>, Error> {
    let func_offsets = &funcs
        .iter()
        .map(|(ptr, _)| *ptr as u64)
        .collect::<Vec<u64>>();
    let mut obj = Artifact::new(triple, String::from("module"));
    let resolver = ImageRelocResolver { func_offsets };
    let dwarf = transform_dwarf(pointer_width_bytes, debuginfo_data, at, vmctx_info, ranges)?;

    // Assuming all functions in the same code block, looking min/max of its range.
    assert_gt!(funcs.len(), 0);
    let mut segment_body: (usize, usize) = (!0, 0);
    for (body_ptr, body_len) in funcs {
        segment_body.0 = std::cmp::min(segment_body.0, *body_ptr as usize);
        segment_body.1 = std::cmp::max(segment_body.1, *body_ptr as usize + body_len);
    }
    let segment_body = (segment_body.0 as *const u8, segment_body.1 - segment_body.0);

    let body = unsafe { std::slice::from_raw_parts(segment_body.0, segment_body.1) };
    obj.declare_with("all", Decl::function(), body.to_vec())?;

    emit_dwarf(&mut obj, dwarf, &resolver)?;

    // LLDB is too "magical" about mach-o, generating elf
    let mut bytes = obj.emit_as(BinaryFormat::Elf)?;
    // elf is still missing details...
    convert_faerie_elf_to_loadable_file(&mut bytes, segment_body.0);

    // let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file");
    // ::std::io::Write::write(&mut file, &bytes).expect("write");

    Ok(bytes)
}

fn convert_faerie_elf_to_loadable_file(bytes: &mut Vec<u8>, code_ptr: *const u8) {
    use std::ffi::CStr;
    use std::os::raw::c_char;

    assert!(
        bytes[0x4] == 2 && bytes[0x5] == 1,
        "bits and endianess in .ELF"
    );
    let e_phoff = unsafe { *(bytes.as_ptr().offset(0x20) as *const u64) };
    let e_phnum = unsafe { *(bytes.as_ptr().offset(0x38) as *const u16) };
    assert!(
        e_phoff == 0 && e_phnum == 0,
        "program header table is empty"
    );
    let e_phentsize = unsafe { *(bytes.as_ptr().offset(0x36) as *const u16) };
    assert_eq!(e_phentsize, 0x38, "size of ph");
    let e_shentsize = unsafe { *(bytes.as_ptr().offset(0x3A) as *const u16) };
    assert_eq!(e_shentsize, 0x40, "size of sh");

    let e_shoff = unsafe { *(bytes.as_ptr().offset(0x28) as *const u64) };
    let e_shnum = unsafe { *(bytes.as_ptr().offset(0x3C) as *const u16) };
    let mut shstrtab_off = 0;
    let mut segment = None;
    for i in 0..e_shnum {
        let off = e_shoff as isize + i as isize * e_shentsize as isize;
        let sh_type = unsafe { *(bytes.as_ptr().offset(off + 0x4) as *const u32) };
        if sh_type == /* SHT_SYMTAB */ 3 {
            shstrtab_off = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) };
        }
        if sh_type != /* SHT_PROGBITS */ 1 {
            continue;
        }
        // It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function
        let sh_name = unsafe {
            let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32);
            CStr::from_ptr(
                bytes
                    .as_ptr()
                    .offset((shstrtab_off + sh_name_off as u64) as isize)
                    as *const c_char,
            )
            .to_str()
            .expect("name")
        };
        if sh_name != ".text.all" {
            continue;
        }

        assert!(segment.is_none());
        // Functions was added at emit_debugsections_image as .text.all.
        // Patch vaddr, and save file location and its size.
        unsafe {
            *(bytes.as_ptr().offset(off + 0x10) as *mut u64) = code_ptr as u64;
        };
        let sh_offset = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) };
        let sh_size = unsafe { *(bytes.as_ptr().offset(off + 0x20) as *const u64) };
        segment = Some((sh_offset, code_ptr, sh_size));
        // Fix name too: cut it to just ".text"
        unsafe {
            let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32);
            bytes[(shstrtab_off + sh_name_off as u64) as usize + ".text".len()] = 0;
        }
    }

    // LLDB wants segment with virtual address set, placing them at the end of ELF.
    let ph_off = bytes.len();
    if let Some((sh_offset, v_offset, sh_size)) = segment {
        let segment = vec![0; 0x38];
        unsafe {
            *(segment.as_ptr() as *mut u32) = /* PT_LOAD */ 0x1;
            *(segment.as_ptr().offset(0x8) as *mut u64) = sh_offset;
            *(segment.as_ptr().offset(0x10) as *mut u64) = v_offset as u64;
            *(segment.as_ptr().offset(0x18) as *mut u64) = v_offset as u64;
            *(segment.as_ptr().offset(0x20) as *mut u64) = sh_size;
            *(segment.as_ptr().offset(0x28) as *mut u64) = sh_size;
        }
        bytes.extend_from_slice(&segment);
    } else {
        unreachable!();
    }

    // It is somewhat loadable ELF file at this moment.
    // Update e_flags, e_phoff and e_phnum.
    unsafe {
        *(bytes.as_ptr().offset(0x10) as *mut u16) = /* ET_DYN */ 3;
        *(bytes.as_ptr().offset(0x20) as *mut u64) = ph_off as u64;
        *(bytes.as_ptr().offset(0x38) as *mut u16) = 1 as u16;
    }
}