samply_api/asm/
mod.rs

1use std::str::FromStr;
2
3use samply_symbols::debugid::DebugId;
4use samply_symbols::{
5    object, CodeByteReadingError, CodeId, FileAndPathHelper, FileAndPathHelperError, LibraryInfo,
6    LookupAddress, SymbolManager,
7};
8use serde_json::json;
9use yaxpeax_arch::{Arch, DecodeError, LengthedInstruction, Reader, U8Reader};
10use yaxpeax_x86::amd64::{Opcode, Operand};
11
12use self::response_json::Response;
13use crate::asm::response_json::DecodedInstruction;
14
15mod request_json;
16mod response_json;
17
18#[derive(thiserror::Error, Debug)]
19enum AsmError {
20    #[error("Couldn't parse request: {0}")]
21    ParseRequestErrorSerde(#[from] serde_json::error::Error),
22
23    #[error("An error occurred when loading the binary: {0}")]
24    LoadBinaryError(#[from] samply_symbols::Error),
25
26    #[error("object parse error: {0}")]
27    ObjectParseError(#[from] object::Error),
28
29    #[error("The requested address was not found in any section in the binary.")]
30    AddressNotFound,
31
32    #[error("Could not read the requested address range from the section (might be out of bounds or the section might not have any bytes in the file)")]
33    ByteRangeNotInSection,
34
35    #[error("Unrecognized architecture {0:?}")]
36    UnrecognizedArch(String),
37
38    #[error("Could not read the requested address range from the file: {0}")]
39    FileIO(#[from] FileAndPathHelperError),
40}
41
42pub struct AsmApi<'a, H: FileAndPathHelper> {
43    symbol_manager: &'a SymbolManager<H>,
44}
45
46impl<'a, H: FileAndPathHelper> AsmApi<'a, H> {
47    /// Create an [`AsmApi`] instance which uses the provided [`SymbolManager`].
48    pub fn new(symbol_manager: &'a SymbolManager<H>) -> Self {
49        Self { symbol_manager }
50    }
51
52    pub async fn query_api_json(&self, request_json: &str) -> String {
53        match self.query_api_fallible_json(request_json).await {
54            Ok(response_json) => response_json,
55            Err(err) => json!({ "error": err.to_string() }).to_string(),
56        }
57    }
58
59    async fn query_api_fallible_json(&self, request_json: &str) -> Result<String, AsmError> {
60        let request: request_json::Request = serde_json::from_str(request_json)?;
61        let response = self.query_api(&request).await?;
62        Ok(serde_json::to_string(&response)?)
63    }
64
65    async fn query_api(
66        &self,
67        request: &request_json::Request,
68    ) -> Result<response_json::Response, AsmError> {
69        let request_json::Request {
70            debug_id,
71            debug_name,
72            name,
73            code_id,
74            start_address,
75            size,
76            continue_until_function_end,
77            ..
78        } = request;
79
80        let debug_id = debug_id
81            .as_deref()
82            .and_then(|debug_id| DebugId::from_breakpad(debug_id).ok());
83        let code_id = code_id
84            .as_deref()
85            .and_then(|code_id| CodeId::from_str(code_id).ok());
86
87        let library_info = LibraryInfo {
88            debug_name: debug_name.clone(),
89            debug_id,
90            name: name.clone(),
91            code_id,
92            ..Default::default()
93        };
94
95        let binary_image = self
96            .symbol_manager
97            .load_binary(&library_info)
98            .await
99            .map_err(AsmError::LoadBinaryError)?;
100
101        let mut disassembly_len = *size;
102
103        if *continue_until_function_end {
104            if let Some(function_end_address) = self
105                .get_function_end_address(&library_info, *start_address)
106                .await
107            {
108                if function_end_address >= *start_address
109                    && function_end_address - *start_address > *size
110                {
111                    disassembly_len = function_end_address - *start_address;
112                }
113            }
114        }
115
116        // Align the start address, for architectures with instruction alignment.
117        // For example, on ARM, you might be looking for the instructions of a
118        // function whose function symbol has address 0x2001. But this address is
119        // really two pieces of information: 0x2000 is the address of the function's
120        // first instruction (ARM instructions are two-byte aligned), and the 0x1 bit
121        // is the "thumb" bit, meaning that the instructions need to be decoded
122        // with the thumb decoder.
123        let architecture = binary_image.arch();
124        let rel_address = match architecture {
125            Some("arm64" | "arm64e") => start_address & !0b11,
126            Some("arm") => start_address & !0b1,
127            _ => *start_address,
128        };
129
130        // Pad out the number of bytes we read a little, to allow for reading one
131        // more instruction.
132        // We've been asked to decode the instructions whose instruction addresses
133        // are in the range rel_address .. (rel_address + disassembly_len).
134        // If the end of
135        // this range points into the middle of an instruction, we still want to
136        // decode the entire instruction, so we need all of its bytes.
137        // We have another check later to make sure we don't return instructions whose
138        // address is beyond the requested range.
139        const MAX_INSTR_LEN: u32 = 15; // TODO: Get the correct max length for this arch
140
141        // Now read the instruction bytes from the file.
142        let bytes = binary_image
143            .read_bytes_at_relative_address(rel_address, disassembly_len + MAX_INSTR_LEN)
144            .map_err(|e| match e {
145                CodeByteReadingError::AddressNotFound => AsmError::AddressNotFound,
146                CodeByteReadingError::ObjectParseError(e) => AsmError::ObjectParseError(e),
147                CodeByteReadingError::ByteRangeNotInSection => AsmError::ByteRangeNotInSection,
148                CodeByteReadingError::FileIO(e) => AsmError::FileIO(e),
149            })?;
150
151        decode_arch(bytes, architecture, rel_address, disassembly_len)
152    }
153
154    async fn get_function_end_address(
155        &self,
156        library_info: &LibraryInfo,
157        address_within_function: u32,
158    ) -> Option<u32> {
159        let symbol_map_res = self.symbol_manager.load_symbol_map(library_info).await;
160        let symbol = symbol_map_res
161            .ok()?
162            .lookup_sync(LookupAddress::Relative(address_within_function))?
163            .symbol;
164        symbol.address.checked_add(symbol.size?)
165    }
166}
167
168fn decode_arch(
169    bytes: &[u8],
170    arch: Option<&str>,
171    rel_address: u32,
172    decode_len: u32,
173) -> Result<Response, AsmError> {
174    Ok(match arch {
175        Some("x86") => decode::<yaxpeax_x86::protected_mode::Arch>(bytes, rel_address, decode_len),
176        Some("x86_64" | "x86_64h") => {
177            decode::<yaxpeax_x86::amd64::Arch>(bytes, rel_address, decode_len)
178        }
179        Some("arm64" | "arm64e") => {
180            decode::<yaxpeax_arm::armv8::a64::ARMv8>(bytes, rel_address, decode_len)
181        }
182        Some("arm") => decode::<yaxpeax_arm::armv7::ARMv7>(bytes, rel_address, decode_len),
183        _ => {
184            return Err(AsmError::UnrecognizedArch(
185                arch.map_or_else(|| "unknown".to_string(), |a| a.to_string()),
186            ))
187        }
188    })
189}
190
191trait InstructionDecoding: Arch {
192    const ARCH_NAME: &'static str;
193    const SYNTAX: &'static [&'static str];
194    const ADJUST_BY_AFTER_ERROR: usize;
195    fn make_decoder() -> Self::Decoder;
196    fn stringify_inst(rel_address: u32, offset: u32, inst: Self::Instruction)
197        -> DecodedInstruction;
198}
199
200impl InstructionDecoding for yaxpeax_x86::amd64::Arch {
201    const ARCH_NAME: &'static str = "x86_64";
202    const SYNTAX: &'static [&'static str] = &["Intel", "C style"];
203    const ADJUST_BY_AFTER_ERROR: usize = 1;
204
205    fn make_decoder() -> Self::Decoder {
206        yaxpeax_x86::amd64::InstDecoder::default()
207    }
208
209    fn stringify_inst(
210        rel_address: u32,
211        offset: u32,
212        inst: Self::Instruction,
213    ) -> DecodedInstruction {
214        let (mut intel_insn, mut c_insn) = (
215            inst.display_with(yaxpeax_x86::amd64::DisplayStyle::Intel)
216                .to_string(),
217            inst.display_with(yaxpeax_x86::amd64::DisplayStyle::C)
218                .to_string(),
219        );
220
221        fn is_relative_branch(opcode: Opcode) -> bool {
222            matches!(
223                opcode,
224                Opcode::JMP
225                    | Opcode::JRCXZ
226                    | Opcode::LOOP
227                    | Opcode::LOOPZ
228                    | Opcode::LOOPNZ
229                    | Opcode::JO
230                    | Opcode::JNO
231                    | Opcode::JB
232                    | Opcode::JNB
233                    | Opcode::JZ
234                    | Opcode::JNZ
235                    | Opcode::JNA
236                    | Opcode::JA
237                    | Opcode::JS
238                    | Opcode::JNS
239                    | Opcode::JP
240                    | Opcode::JNP
241                    | Opcode::JL
242                    | Opcode::JGE
243                    | Opcode::JLE
244                    | Opcode::JG
245                    | Opcode::CALL
246            )
247        }
248
249        if is_relative_branch(inst.opcode()) {
250            match inst.operand(0) {
251                Operand::ImmediateI8 { imm } => {
252                    let rel = imm;
253                    let dest = rel_address as i64
254                        + offset as i64
255                        + inst.len().to_const() as i64
256                        + rel as i64;
257                    intel_insn = format!("{} 0x{:x}", inst.opcode(), dest);
258                    c_insn.clone_from(&intel_insn);
259                }
260                Operand::ImmediateI32 { imm } => {
261                    let rel = imm;
262                    let dest = rel_address as i64
263                        + offset as i64
264                        + inst.len().to_const() as i64
265                        + rel as i64;
266                    intel_insn = format!("{} 0x{:x}", inst.opcode(), dest);
267                    c_insn.clone_from(&intel_insn);
268                }
269                _ => {}
270            };
271        }
272
273        DecodedInstruction {
274            offset,
275            decoded_string_per_syntax: vec![intel_insn, c_insn],
276        }
277    }
278}
279
280impl InstructionDecoding for yaxpeax_x86::protected_mode::Arch {
281    const ARCH_NAME: &'static str = "i686";
282    const SYNTAX: &'static [&'static str] = &["Intel"];
283    const ADJUST_BY_AFTER_ERROR: usize = 1;
284
285    fn make_decoder() -> Self::Decoder {
286        yaxpeax_x86::protected_mode::InstDecoder::default()
287    }
288
289    fn stringify_inst(
290        _rel_address: u32,
291        offset: u32,
292        inst: Self::Instruction,
293    ) -> DecodedInstruction {
294        DecodedInstruction {
295            offset,
296            decoded_string_per_syntax: vec![inst.to_string()],
297        }
298    }
299}
300
301impl InstructionDecoding for yaxpeax_arm::armv8::a64::ARMv8 {
302    const ARCH_NAME: &'static str = "aarch64";
303    const SYNTAX: &'static [&'static str] = &["ARM"];
304    const ADJUST_BY_AFTER_ERROR: usize = 4;
305
306    fn make_decoder() -> Self::Decoder {
307        yaxpeax_arm::armv8::a64::InstDecoder::default()
308    }
309
310    fn stringify_inst(
311        _rel_address: u32,
312        offset: u32,
313        inst: Self::Instruction,
314    ) -> DecodedInstruction {
315        DecodedInstruction {
316            offset,
317            decoded_string_per_syntax: vec![inst.to_string()],
318        }
319    }
320}
321
322impl InstructionDecoding for yaxpeax_arm::armv7::ARMv7 {
323    const ARCH_NAME: &'static str = "arm";
324    const SYNTAX: &'static [&'static str] = &["ARM"];
325    const ADJUST_BY_AFTER_ERROR: usize = 2;
326
327    fn make_decoder() -> Self::Decoder {
328        // TODO: Detect whether the instructions in the requested address range
329        // use thumb or non-thumb mode.
330        // I'm not quite sure how to do this. The same object file can contain both
331        // types of code in different functions. We basically have two options:
332        //  1. Have the API caller tell us whether to use thumb, or
333        //  2. Detect the mode based on the content in the file.
334        // For 2., we could look up the closest symbol to the start address and
335        // check whether its symbol address has the thumb bit set. But the function
336        // may not have a symbol in the binary that we have access to here.
337        //
338        // For now we just always assume thumb.
339        yaxpeax_arm::armv7::InstDecoder::default_thumb()
340    }
341
342    fn stringify_inst(
343        _rel_address: u32,
344        offset: u32,
345        inst: Self::Instruction,
346    ) -> DecodedInstruction {
347        DecodedInstruction {
348            offset,
349            decoded_string_per_syntax: vec![inst.to_string()],
350        }
351    }
352}
353
354fn decode<'a, A: InstructionDecoding>(
355    bytes: &'a [u8],
356    rel_address: u32,
357    decode_len: u32,
358) -> Response
359where
360    u64: From<A::Address>,
361    U8Reader<'a>: yaxpeax_arch::Reader<A::Address, A::Word>,
362{
363    use yaxpeax_arch::Decoder;
364    let mut reader = yaxpeax_arch::U8Reader::new(bytes);
365    let decoder = A::make_decoder();
366    let mut instructions = Vec::new();
367    let mut offset = 0;
368    loop {
369        if offset >= decode_len {
370            break;
371        }
372        let before = u64::from(reader.total_offset()) as u32;
373        match decoder.decode(&mut reader) {
374            Ok(inst) => {
375                instructions.push(A::stringify_inst(rel_address, offset, inst));
376                let after = u64::from(reader.total_offset()) as u32;
377                offset += after - before;
378            }
379            Err(e) => {
380                if e.data_exhausted() {
381                    break;
382                }
383
384                let remaining_bytes = &bytes[offset as usize..];
385                let s = remaining_bytes
386                    .iter()
387                    .take(A::ADJUST_BY_AFTER_ERROR)
388                    .map(|b| format!("{b:#02x}"))
389                    .collect::<Vec<_>>()
390                    .join(", ");
391                let s2 = remaining_bytes
392                    .iter()
393                    .take(A::ADJUST_BY_AFTER_ERROR)
394                    .map(|b| format!("{b:02X}"))
395                    .collect::<Vec<_>>()
396                    .join(" ");
397
398                instructions.push(DecodedInstruction {
399                    offset,
400                    decoded_string_per_syntax: A::SYNTAX
401                        .iter()
402                        .map(|_| {
403                            format!(
404                                ".byte {s:width$} # Invalid instruction {s2}: {e}",
405                                width = A::ADJUST_BY_AFTER_ERROR * 6
406                            )
407                        })
408                        .collect(),
409                });
410
411                offset += A::ADJUST_BY_AFTER_ERROR as u32;
412                let Some(reader_bytes) = bytes.get(offset as usize..) else {
413                    break;
414                };
415                reader = U8Reader::new(reader_bytes);
416            }
417        }
418    }
419    let final_offset = u64::from(yaxpeax_arch::Reader::<A::Address, A::Word>::total_offset(
420        &mut reader,
421    )) as u32;
422
423    Response {
424        start_address: rel_address,
425        size: final_offset,
426        arch: A::ARCH_NAME.to_string(),
427        syntax: A::SYNTAX.iter().map(ToString::to_string).collect(),
428        instructions,
429    }
430}