ds_decomp/analysis/
functions.rs

1use std::{
2    backtrace::Backtrace,
3    collections::{BTreeMap, BTreeSet},
4    fmt::{Display, Formatter},
5};
6
7use snafu::Snafu;
8use unarm::{
9    args::{Argument, Reg, Register},
10    arm, thumb, ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser,
11};
12
13use crate::{
14    config::symbol::{SymbolMap, SymbolMapError},
15    util::bytes::FromSlice,
16};
17
18use super::{
19    function_branch::FunctionBranchState,
20    function_start::is_valid_function_start,
21    illegal_code::IllegalCodeState,
22    inline_table::{InlineTable, InlineTableState},
23    jump_table::{JumpTable, JumpTableState},
24    secure_area::SecureAreaState,
25};
26
27// All keys in the types below are instruction addresses
28pub type Labels = BTreeSet<u32>;
29pub type PoolConstants = BTreeSet<u32>;
30pub type JumpTables = BTreeMap<u32, JumpTable>;
31pub type InlineTables = BTreeMap<u32, InlineTable>;
32pub type FunctionCalls = BTreeMap<u32, CalledFunction>;
33pub type DataLoads = BTreeMap<u32, u32>;
34
35#[derive(Debug, Clone)]
36pub struct Function {
37    name: String,
38    start_address: u32,
39    end_address: u32,
40    first_instruction_address: u32,
41    thumb: bool,
42    labels: Labels,
43    pool_constants: PoolConstants,
44    jump_tables: JumpTables,
45    inline_tables: InlineTables,
46    function_calls: FunctionCalls,
47}
48
49#[derive(Debug, Snafu)]
50pub enum FunctionAnalysisError {
51    #[snafu(transparent)]
52    IntoFunction { source: IntoFunctionError },
53    #[snafu(transparent)]
54    SymbolMap { source: SymbolMapError },
55}
56
57impl Function {
58    pub fn size(&self) -> u32 {
59        self.end_address - self.start_address
60    }
61
62    fn is_thumb_function(address: u32, code: &[u8]) -> bool {
63        if (address & 3) != 0 {
64            // Not 4-aligned, must be Thumb
65            true
66        } else if code.len() < 4 {
67            // Can't contain a full ARM instruction
68            true
69        } else if code[3] & 0xf0 == 0xe0 {
70            // First instruction has the AL condition code, must be ARM
71            false
72        } else {
73            // Thumb otherwise
74            true
75        }
76    }
77
78    #[allow(clippy::match_like_matches_macro)]
79    fn is_entry_instruction(ins: Ins, parsed_ins: &ParsedIns) -> bool {
80        if ins.is_conditional() {
81            return false;
82        }
83
84        let args = &parsed_ins.args;
85        match (parsed_ins.mnemonic, args[0], args[1], args[2]) {
86            (
87                "stmdb",
88                Argument::Reg(Reg { reg: Register::Sp, writeback: true, deref: false }),
89                Argument::RegList(regs),
90                Argument::None,
91            )
92            | ("push", Argument::RegList(regs), Argument::None, Argument::None)
93                if regs.contains(Register::Lr) =>
94            {
95                true
96            }
97            _ => false,
98        }
99    }
100
101    fn is_branch(ins: Ins, parsed_ins: &ParsedIns, address: u32) -> Option<u32> {
102        if ins.mnemonic() != "b" {
103            return None;
104        }
105        let dest = parsed_ins.branch_destination().unwrap();
106        Some((address as i32 + dest).try_into().unwrap())
107    }
108
109    fn is_pool_load(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<u32> {
110        if ins.mnemonic() != "ldr" {
111            return None;
112        }
113        match (parsed_ins.args[0], parsed_ins.args[1], parsed_ins.args[2]) {
114            (Argument::Reg(dest), Argument::Reg(base), Argument::OffsetImm(offset)) => {
115                if dest.reg == Register::Pc {
116                    None
117                } else if !base.deref || base.reg != Register::Pc {
118                    None
119                } else if offset.post_indexed {
120                    None
121                } else {
122                    // ldr *, [pc + *]
123                    let load_address = (address as i32 + offset.value) as u32 & !3;
124                    let load_address = load_address + if thumb { 4 } else { 8 };
125                    Some(load_address)
126                }
127            }
128            _ => None,
129        }
130    }
131
132    fn is_function_call(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<CalledFunction> {
133        let args = &parsed_ins.args;
134        match (ins.mnemonic(), args[0], args[1]) {
135            ("bl", Argument::BranchDest(offset), Argument::None) => {
136                let destination = (address as i32 + offset) as u32;
137                Some(CalledFunction { ins, address: destination, thumb })
138            }
139            ("blx", Argument::BranchDest(offset), Argument::None) => {
140                let destination = (address as i32 + offset) as u32;
141                let destination = if thumb { destination & !3 } else { destination };
142                Some(CalledFunction { ins, address: destination, thumb: !thumb })
143            }
144            _ => None,
145        }
146    }
147
148    fn function_parser_loop(
149        mut parser: Parser<'_>,
150        options: FunctionParseOptions,
151    ) -> Result<ParseFunctionResult, FunctionAnalysisError> {
152        let thumb = parser.mode == ParseMode::Thumb;
153        let mut context = ParseFunctionContext::new(thumb, options);
154
155        let Some((address, ins, parsed_ins)) = parser.next() else { return Ok(ParseFunctionResult::NoEpilogue) };
156        if !is_valid_function_start(address, ins, &parsed_ins) {
157            return Ok(ParseFunctionResult::InvalidStart { address, ins, parsed_ins });
158        }
159
160        let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
161        let result = if state.ended() {
162            return Ok(context.into_function(state)?);
163        } else {
164            loop {
165                let Some((address, ins, parsed_ins)) = parser.next() else {
166                    break context.into_function(ParseFunctionState::Done);
167                };
168                let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
169                if state.ended() {
170                    break context.into_function(state);
171                }
172            }
173        };
174
175        let result = result?;
176        let ParseFunctionResult::Found(mut function) = result else {
177            return Ok(result);
178        };
179
180        if let Some(first_pool_address) = function.pool_constants.first() {
181            if *first_pool_address < function.start_address {
182                log::info!(
183                    "Function at {:#010x} was adjusted to include pre-code constant pool at {:#010x}",
184                    function.start_address,
185                    first_pool_address
186                );
187
188                function.first_instruction_address = function.start_address;
189                function.start_address = *first_pool_address;
190            }
191        }
192
193        Ok(ParseFunctionResult::Found(function))
194    }
195
196    pub fn parse_function(options: FunctionParseOptions) -> Result<ParseFunctionResult, FunctionAnalysisError> {
197        let FunctionParseOptions { start_address, base_address, module_code, parse_options, .. } = &options;
198
199        let thumb = parse_options.thumb.unwrap_or(Function::is_thumb_function(*start_address, module_code));
200        let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
201        let start = (start_address - base_address) as usize;
202        let function_code = &module_code[start..];
203        let parser = Parser::new(
204            parse_mode,
205            *start_address,
206            Endian::Little,
207            ParseFlags { version: ArmVersion::V5Te, ual: false },
208            function_code,
209        );
210
211        Self::function_parser_loop(parser, options)
212    }
213
214    pub fn find_functions(options: FindFunctionsOptions) -> Result<BTreeMap<u32, Function>, FunctionAnalysisError> {
215        let FindFunctionsOptions {
216            default_name_prefix,
217            base_address,
218            module_code,
219            symbol_map,
220            module_start_address,
221            module_end_address,
222            search_options,
223        } = options;
224
225        let mut functions = BTreeMap::new();
226
227        let start_address = search_options.start_address.unwrap_or(base_address);
228        assert!((start_address & 1) == 0);
229        let start_offset = start_address - base_address;
230        let end_address = search_options.end_address.unwrap_or(base_address + module_code.len() as u32);
231        let end_offset = end_address - base_address;
232        let module_code = &module_code[..end_offset as usize];
233        let mut function_code = &module_code[start_offset as usize..end_offset as usize];
234
235        log::debug!("Searching for functions from {:#010x} to {:#010x}", start_address, end_address);
236
237        // Upper bound for function search
238        let last_function_address = search_options.last_function_address.unwrap_or(end_address);
239        let mut upper_bounds = BTreeSet::new();
240
241        // Used to limit how far to search for valid function starts, see `max_function_start_search_distance`
242        let mut prev_valid_address = start_address;
243        let mut address = start_address;
244
245        while !function_code.is_empty() && address <= *upper_bounds.first().unwrap_or(&last_function_address) {
246            let thumb = Function::is_thumb_function(address, function_code);
247
248            let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
249            let parser = Parser::new(
250                parse_mode,
251                address,
252                Endian::Little,
253                ParseFlags { version: ArmVersion::V5Te, ual: false },
254                function_code,
255            );
256
257            let (name, new) = if let Some((_, symbol)) = symbol_map.by_address(address)? {
258                (symbol.name.clone(), false)
259            } else {
260                (format!("{}{:08x}", default_name_prefix, address), true)
261            };
262
263            let function_result = Function::function_parser_loop(
264                parser,
265                FunctionParseOptions {
266                    name,
267                    start_address: address,
268                    base_address,
269                    module_code,
270                    known_end_address: None,
271                    module_start_address,
272                    module_end_address,
273                    existing_functions: search_options.existing_functions,
274                    check_defs_uses: search_options.check_defs_uses,
275                    parse_options: Default::default(),
276                },
277            )?;
278            let function = match function_result {
279                ParseFunctionResult::Found(function) => function,
280                ParseFunctionResult::IllegalIns { address: illegal_address, ins, .. } => {
281                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
282                    let limit_reached = address >= search_limit;
283
284                    if !limit_reached {
285                        // It's possible that we've attempted to analyze pool constants as code, which can happen if the
286                        // function has a constant pool ahead of its code.
287                        let mut next_address = (address + 1).next_multiple_of(4);
288                        if let Some(function_addresses) = search_options.function_addresses.as_ref() {
289                            if let Some(&next_function) = function_addresses.range(address + 1..).next() {
290                                next_address = next_function;
291                            }
292                        }
293                        address = next_address;
294                        function_code = &module_code[(address - base_address) as usize..];
295                        continue;
296                    } else {
297                        if thumb {
298                            log::debug!(
299                                "Terminating function analysis due to illegal instruction at {:#010x}: {:04x}",
300                                illegal_address,
301                                ins.code()
302                            );
303                        } else {
304                            log::debug!(
305                                "Terminating function analysis due to illegal instruction at {:#010x}: {:08x}",
306                                illegal_address,
307                                ins.code()
308                            );
309                        }
310                        break;
311                    }
312                }
313                ParseFunctionResult::NoEpilogue => {
314                    log::debug!(
315                        "Terminating function analysis due to no epilogue in function starting from {:#010x}",
316                        address
317                    );
318                    break;
319                }
320                ParseFunctionResult::InvalidStart { address: start_address, ins, parsed_ins } => {
321                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
322                    let limit_reached = address >= search_limit;
323
324                    if !limit_reached {
325                        let ins_size = parse_mode.instruction_size(0);
326                        address += ins_size as u32;
327                        function_code = &function_code[ins_size..];
328                        continue;
329                    } else {
330                        if thumb {
331                            log::debug!(
332                                "Terminating function analysis due to invalid function start at {:#010x}: {:04x} {}",
333                                start_address,
334                                ins.code(),
335                                parsed_ins.display(Default::default())
336                            );
337                        } else {
338                            log::debug!(
339                                "Terminating function analysis due to invalid function start at {:#010x}: {:08x} {}",
340                                start_address,
341                                ins.code(),
342                                parsed_ins.display(Default::default())
343                            );
344                        }
345                        break;
346                    }
347                }
348            };
349
350            // A function was found
351            if new {
352                symbol_map.add_function(&function);
353            }
354            function.add_local_symbols_to_map(symbol_map)?;
355
356            address = function.end_address.next_multiple_of(4); // align by 4 in case of Thumb function ending on 2-byte boundary
357            prev_valid_address = function.end_address;
358            function_code = &module_code[(address - base_address) as usize..];
359
360            // Invalidate upper bounds if they are inside the function
361            let invalid_upper_bounds: Vec<u32> = upper_bounds.range(..=function.end_address).copied().collect();
362            for invalid_upper_bound in invalid_upper_bounds {
363                upper_bounds.remove(&invalid_upper_bound);
364                log::debug!(
365                    "Invalidating upper bound {:#010x} inside function {:#010x}",
366                    invalid_upper_bound,
367                    function.start_address
368                );
369            }
370
371            // Look for pointers to data in this module, to use as an upper bound for finding functions
372            if search_options.use_data_as_upper_bound {
373                for pool_constant in function.iter_pool_constants(module_code, base_address) {
374                    let pointer_value = pool_constant.value & !1;
375                    if upper_bounds.contains(&pointer_value) {
376                        continue;
377                    }
378                    if pointer_value < address {
379                        continue;
380                    }
381
382                    let offset = (pointer_value - base_address) as usize;
383                    if offset >= module_code.len() {
384                        continue;
385                    }
386
387                    let thumb = Function::is_thumb_function(pointer_value, &module_code[offset..]);
388                    let mut parser = Parser::new(
389                        if thumb { ParseMode::Thumb } else { ParseMode::Arm },
390                        pointer_value,
391                        Endian::Little,
392                        ParseFlags { ual: false, version: ArmVersion::V5Te },
393                        &module_code[offset..],
394                    );
395                    let (address, ins, parsed_ins) = parser.next().unwrap();
396                    if is_valid_function_start(address, ins, &parsed_ins) {
397                        continue;
398                    }
399
400                    // The pool constant points to data, limit the upper bound
401                    upper_bounds.insert(pointer_value);
402                    log::debug!(
403                        "Upper bound found: address to data at {:#010x} from pool constant at {:#010x} from function {}",
404                        pool_constant.value,
405                        pool_constant.address,
406                        function.name
407                    );
408                }
409            }
410
411            functions.insert(function.first_instruction_address, function);
412        }
413        Ok(functions)
414    }
415
416    pub fn add_local_symbols_to_map(&self, symbol_map: &mut SymbolMap) -> Result<(), SymbolMapError> {
417        for address in self.labels.iter() {
418            symbol_map.add_label(*address, self.thumb)?;
419        }
420        for address in self.pool_constants.iter() {
421            symbol_map.add_pool_constant(*address)?;
422        }
423        for jump_table in self.jump_tables() {
424            symbol_map.add_jump_table(jump_table)?;
425        }
426        for inline_table in self.inline_tables().values() {
427            symbol_map.add_data(None, inline_table.address, (*inline_table).into())?;
428        }
429        Ok(())
430    }
431
432    pub fn find_secure_area_functions(
433        module_code: &[u8],
434        base_addr: u32,
435        symbol_map: &mut SymbolMap,
436    ) -> BTreeMap<u32, Function> {
437        let mut functions = BTreeMap::new();
438
439        let parse_flags = ParseFlags { ual: false, version: ArmVersion::V5Te };
440
441        let mut address = base_addr;
442        let mut state = SecureAreaState::default();
443        for ins_code in module_code.chunks_exact(2) {
444            let ins_code = u16::from_le_slice(ins_code);
445            let ins = thumb::Ins::new(ins_code as u32, &parse_flags);
446            let parsed_ins = ins.parse(&parse_flags);
447
448            state = state.handle(address, &parsed_ins);
449            if let Some(function) = state.get_function() {
450                let function = Function {
451                    name: function.name().to_string(),
452                    start_address: function.start(),
453                    end_address: function.end(),
454                    first_instruction_address: function.start(),
455                    thumb: true,
456                    labels: Labels::new(),
457                    pool_constants: PoolConstants::new(),
458                    jump_tables: JumpTables::new(),
459                    inline_tables: InlineTables::new(),
460                    function_calls: FunctionCalls::new(),
461                };
462                symbol_map.add_function(&function);
463                functions.insert(function.first_instruction_address, function);
464            }
465
466            address += 2;
467        }
468
469        functions
470    }
471
472    pub fn parser<'a>(&'a self, module_code: &'a [u8], base_address: u32) -> Parser<'a> {
473        Parser::new(
474            if self.thumb { ParseMode::Thumb } else { ParseMode::Arm },
475            self.start_address,
476            Endian::Little,
477            ParseFlags { ual: false, version: ArmVersion::V5Te },
478            self.code(module_code, base_address),
479        )
480    }
481
482    pub fn code<'a>(&self, module_code: &'a [u8], base_address: u32) -> &'a [u8] {
483        let start = (self.start_address - base_address) as usize;
484        let end = (self.end_address - base_address) as usize;
485        &module_code[start..end]
486    }
487
488    pub fn name(&self) -> &str {
489        &self.name
490    }
491
492    pub fn start_address(&self) -> u32 {
493        self.start_address
494    }
495
496    pub fn end_address(&self) -> u32 {
497        self.end_address
498    }
499
500    pub fn first_instruction_address(&self) -> u32 {
501        self.first_instruction_address
502    }
503
504    pub fn is_thumb(&self) -> bool {
505        self.thumb
506    }
507
508    pub fn labels(&self) -> impl Iterator<Item = &u32> {
509        self.labels.iter()
510    }
511
512    pub fn jump_tables(&self) -> impl Iterator<Item = &JumpTable> {
513        self.jump_tables.values()
514    }
515
516    pub fn inline_tables(&self) -> &InlineTables {
517        &self.inline_tables
518    }
519
520    pub fn get_inline_table_at(&self, address: u32) -> Option<&InlineTable> {
521        Self::inline_table_at(&self.inline_tables, address)
522    }
523
524    fn inline_table_at(inline_tables: &InlineTables, address: u32) -> Option<&InlineTable> {
525        inline_tables.values().find(|table| address >= table.address && address < table.address + table.size)
526    }
527
528    pub fn pool_constants(&self) -> &PoolConstants {
529        &self.pool_constants
530    }
531
532    pub fn iter_pool_constants<'a>(
533        &'a self,
534        module_code: &'a [u8],
535        base_address: u32,
536    ) -> impl Iterator<Item = PoolConstant> + 'a {
537        self.pool_constants.iter().map(move |&address| {
538            let start = (address - base_address) as usize;
539            let bytes = &module_code[start..];
540            PoolConstant { address, value: u32::from_le_slice(bytes) }
541        })
542    }
543
544    pub fn function_calls(&self) -> &FunctionCalls {
545        &self.function_calls
546    }
547}
548
549#[derive(Default)]
550pub struct FunctionParseOptions<'a> {
551    pub name: String,
552    pub start_address: u32,
553    pub base_address: u32,
554    pub module_code: &'a [u8],
555    pub known_end_address: Option<u32>,
556    pub module_start_address: u32,
557    pub module_end_address: u32,
558    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
559
560    /// Whether to check that all registers used in the instruction are defined
561    pub check_defs_uses: bool,
562
563    pub parse_options: ParseFunctionOptions,
564}
565
566pub struct FindFunctionsOptions<'a> {
567    pub default_name_prefix: &'a str,
568    pub base_address: u32,
569    pub module_code: &'a [u8],
570    pub symbol_map: &'a mut SymbolMap,
571    pub module_start_address: u32,
572    pub module_end_address: u32,
573
574    pub search_options: FunctionSearchOptions<'a>,
575}
576
577struct ParseFunctionContext<'a> {
578    name: String,
579    start_address: u32,
580    thumb: bool,
581    end_address: Option<u32>,
582    known_end_address: Option<u32>,
583    labels: Labels,
584    pool_constants: PoolConstants,
585    jump_tables: JumpTables,
586    inline_tables: InlineTables,
587    function_calls: FunctionCalls,
588
589    module_start_address: u32,
590    module_end_address: u32,
591    existing_functions: Option<&'a BTreeMap<u32, Function>>,
592
593    /// Address of last conditional instruction, so we can detect the final return instruction
594    last_conditional_destination: Option<u32>,
595    /// Address of last pool constant, to get the function's true end address
596    last_pool_address: Option<u32>,
597    /// State machine for detecting jump tables and adding them as symbols
598    jump_table_state: JumpTableState,
599    /// State machine for detecting branches (B, not BL) to other functions
600    function_branch_state: FunctionBranchState,
601    /// State machine for detecting inline data tables within the function
602    inline_table_state: InlineTableState,
603    /// State machine for detecting illegal code sequences
604    illegal_code_state: IllegalCodeState,
605
606    /// Whether to check that all registers used in the instruction are defined
607    check_defs_uses: bool,
608    defined_registers: BTreeSet<Register>,
609
610    prev_ins: Option<Ins>,
611    prev_parsed_ins: Option<ParsedIns>,
612    prev_address: Option<u32>,
613}
614
615#[derive(Debug, Snafu)]
616pub enum IntoFunctionError {
617    #[snafu(display("Cannot turn parse context into function before parsing is done"))]
618    NotDone { backtrace: Backtrace },
619}
620
621impl<'a> ParseFunctionContext<'a> {
622    pub fn new(thumb: bool, options: FunctionParseOptions<'a>) -> Self {
623        let FunctionParseOptions {
624            name,
625            start_address,
626            known_end_address,
627            module_start_address,
628            module_end_address,
629            existing_functions,
630            check_defs_uses,
631            ..
632        } = options;
633
634        let mut defined_registers = BTreeSet::new();
635        // Could be arguments
636        defined_registers.insert(Register::R0);
637        defined_registers.insert(Register::R1);
638        defined_registers.insert(Register::R2);
639        defined_registers.insert(Register::R3);
640        // Always defined
641        defined_registers.insert(Register::Sp);
642        defined_registers.insert(Register::Lr);
643        defined_registers.insert(Register::Pc);
644        // Could be used as a scratch register
645        defined_registers.insert(Register::R12);
646
647        Self {
648            name,
649            start_address,
650            thumb,
651            end_address: None,
652            known_end_address,
653            labels: Labels::new(),
654            pool_constants: PoolConstants::new(),
655            jump_tables: JumpTables::new(),
656            inline_tables: InlineTables::new(),
657            function_calls: FunctionCalls::new(),
658
659            module_start_address,
660            module_end_address,
661            existing_functions,
662
663            last_conditional_destination: None,
664            last_pool_address: None,
665            jump_table_state: if thumb {
666                JumpTableState::Thumb(Default::default())
667            } else {
668                JumpTableState::Arm(Default::default())
669            },
670            function_branch_state: Default::default(),
671            inline_table_state: Default::default(),
672            illegal_code_state: Default::default(),
673
674            check_defs_uses,
675            defined_registers,
676
677            prev_ins: None,
678            prev_parsed_ins: None,
679            prev_address: None,
680        }
681    }
682
683    fn handle_ins_inner(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: &ParsedIns) -> ParseFunctionState {
684        if self.pool_constants.contains(&address) {
685            parser.seek_forward(address + 4);
686            return ParseFunctionState::Continue;
687        }
688        if let Some(inline_table) = Function::inline_table_at(&self.inline_tables, address) {
689            parser.seek_forward(inline_table.address + inline_table.size);
690            return ParseFunctionState::Continue;
691        }
692
693        self.jump_table_state = self.jump_table_state.handle(address, ins, parsed_ins, &mut self.jump_tables);
694        self.last_conditional_destination = self.last_conditional_destination.max(self.jump_table_state.table_end_address());
695        if let Some(label) = self.jump_table_state.get_label(address, ins) {
696            self.labels.insert(label);
697            self.last_conditional_destination = self.last_conditional_destination.max(Some(label));
698        }
699
700        if self.jump_table_state.is_numerical_jump_offset() {
701            // Not an instruction, continue
702            return ParseFunctionState::Continue;
703        }
704
705        let ins_size = if let Ins::Thumb(thumb_ins) = ins {
706            if thumb_ins.op != thumb::Opcode::Bl && thumb_ins.op != thumb::Opcode::BlxI {
707                // Typical Thumb instruction
708                2
709            } else if matches!(parsed_ins.args[0], Argument::BranchDest(_)) {
710                // Combined BL/BLX instruction
711                4
712            } else {
713                // Not combined
714                return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
715            }
716        } else {
717            // ARM instruction
718            4
719        };
720
721        self.illegal_code_state = self.illegal_code_state.handle(ins, parsed_ins);
722        if self.illegal_code_state.is_illegal() {
723            return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
724        }
725
726        let in_conditional_block = Some(address) < self.last_conditional_destination;
727        let is_return =
728            self.is_return(ins, parsed_ins, address, self.start_address, self.module_start_address, self.module_end_address);
729        if !in_conditional_block && is_return {
730            let end_address = address + ins_size;
731            if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
732                let outside_function = destination < self.start_address || destination >= end_address;
733                if outside_function {
734                    // Tail call
735                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
736                }
737            }
738
739            // We're not inside a conditional code block, so this is the final return instruction
740            self.end_address = Some(address + ins_size);
741            return ParseFunctionState::Done;
742        }
743
744        if address > self.start_address && Function::is_entry_instruction(ins, parsed_ins) {
745            'check_tail_call: {
746                let Some(prev_ins) = self.prev_ins else {
747                    break 'check_tail_call;
748                };
749                let Some(prev_parsed_ins) = self.prev_parsed_ins.as_ref() else {
750                    break 'check_tail_call;
751                };
752                let Some(prev_address) = self.prev_address else {
753                    break 'check_tail_call;
754                };
755                if Function::is_branch(prev_ins, prev_parsed_ins, prev_address).is_some() {
756                    let is_conditional = in_conditional_block || prev_ins.is_conditional();
757                    if is_conditional {
758                        // Tail call
759                        self.end_address = Some(address);
760                        return ParseFunctionState::Done;
761                    }
762                }
763            };
764        }
765
766        self.function_branch_state = self.function_branch_state.handle(ins, parsed_ins);
767        if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
768            let in_current_module = destination >= self.module_start_address && destination < self.module_end_address;
769            if !in_current_module {
770                // Tail call
771                self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
772            } else if self.function_branch_state.is_function_branch()
773                || self.existing_functions.map(|functions| functions.contains_key(&destination)).unwrap_or(false)
774            {
775                if !ins.is_conditional() && !in_conditional_block {
776                    // This is an unconditional backwards function branch, which means this function has ended
777                    self.end_address = Some(address + ins_size);
778                    return ParseFunctionState::Done;
779                } else {
780                    // TODO: Always run this (move it outside of else block). SectionExt::relocatable_code must take condition
781                    // code into account so the game matches after linking
782                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
783                }
784            } else {
785                // Normal branch instruction, insert a label
786                if let Some(state) = self.handle_label(destination, address, parser, ins_size) {
787                    return state;
788                }
789            }
790        }
791
792        if let Some(pool_address) = Function::is_pool_load(ins, parsed_ins, address, self.thumb) {
793            self.pool_constants.insert(pool_address);
794            self.last_pool_address = self.last_pool_address.max(Some(pool_address));
795        }
796
797        self.inline_table_state = self.inline_table_state.handle(self.thumb, address, parsed_ins);
798        if let Some(table) = self.inline_table_state.get_table() {
799            log::debug!("Inline table found at {:#x}, size {:#x}", table.address, table.size);
800            self.inline_tables.insert(table.address, table);
801        }
802
803        if let Some(called_function) = Function::is_function_call(ins, parsed_ins, address, self.thumb) {
804            self.function_calls.insert(address, called_function);
805        }
806
807        if self.check_defs_uses && !Self::is_nop(ins, parsed_ins) {
808            if Self::is_push(ins) {
809                // Add all caller-saved registers to the defined set
810                ins.register_list().iter().for_each(|reg| {
811                    self.defined_registers.insert(reg);
812                });
813            }
814
815            // Verify that all registers used in the instruction are defined
816            let defs_uses = match ins {
817                Ins::Arm(ins) => Some((ins.defs(&Default::default()), ins.uses(&Default::default()))),
818                Ins::Thumb(ins) => Some((ins.defs(&Default::default()), ins.uses(&Default::default()))),
819                Ins::Data => None,
820            };
821            if let Some((defs, uses)) = defs_uses {
822                for usage in uses {
823                    let legal = match usage {
824                        Argument::Reg(reg) => {
825                            if let Ins::Arm(ins) = ins {
826                                if ins.op == arm::Opcode::Str && ins.field_rn_deref().reg == Register::Sp {
827                                    // There are instance of `str Rd, [sp, #imm]` where Rd is not defined.
828                                    // Potential UB bug in mwccarm.
829                                    self.defined_registers.insert(reg.reg);
830                                    continue;
831                                }
832                            }
833
834                            self.defined_registers.contains(&reg.reg)
835                        }
836                        Argument::RegList(reg_list) => reg_list.iter().all(|reg| self.defined_registers.contains(&reg)),
837                        Argument::ShiftReg(shift_reg) => self.defined_registers.contains(&shift_reg.reg),
838                        Argument::OffsetReg(offset_reg) => self.defined_registers.contains(&offset_reg.reg),
839                        _ => continue,
840                    };
841                    if !legal {
842                        return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
843                    }
844                }
845                if !is_return {
846                    for def in defs {
847                        match def {
848                            Argument::Reg(reg) => {
849                                self.defined_registers.insert(reg.reg);
850                            }
851                            Argument::RegList(reg_list) => {
852                                for reg in reg_list.iter() {
853                                    self.defined_registers.insert(reg);
854                                }
855                            }
856                            Argument::ShiftReg(shift_reg) => {
857                                self.defined_registers.insert(shift_reg.reg);
858                            }
859                            Argument::OffsetReg(offset_reg) => {
860                                self.defined_registers.insert(offset_reg.reg);
861                            }
862                            _ => continue,
863                        };
864                    }
865                }
866            }
867        }
868
869        ParseFunctionState::Continue
870    }
871
872    pub fn handle_ins(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: ParsedIns) -> ParseFunctionState {
873        let state = self.handle_ins_inner(parser, address, ins, &parsed_ins);
874        self.prev_ins = Some(ins);
875        self.prev_parsed_ins = Some(parsed_ins);
876        self.prev_address = Some(address);
877        state
878    }
879
880    fn handle_label(
881        &mut self,
882        destination: u32,
883        address: u32,
884        parser: &mut Parser,
885        ins_size: u32,
886    ) -> Option<ParseFunctionState> {
887        self.labels.insert(destination);
888        self.last_conditional_destination = self.last_conditional_destination.max(Some(destination));
889
890        let next_address = address + ins_size;
891        if self.pool_constants.contains(&next_address) {
892            let branch_backwards = destination <= address;
893
894            // Load instructions in ARM mode can have an offset of up to ±4kB. Therefore, some functions must
895            // emit pool constants in the middle so they can all be accessed by PC-relative loads. There will
896            // also be branch instruction right before, so that the pool constants don't get executed.
897
898            // Sometimes, the pre-pool branch is conveniently placed at an actual branch in the code, and
899            // leads even further than the end of the pool constants. In that case we should already have found
900            // a label at a lower address.
901            if let Some(after_pools) = self.labels.range(address + 1..).next().copied() {
902                if after_pools > address + 0x1000 {
903                    log::warn!("Massive gap from constant pool at {:#x} to next label at {:#x}", next_address, after_pools);
904                }
905                parser.seek_forward(after_pools);
906            } else if !branch_backwards {
907                // Backwards branch with no further branch labels. This type of function contains some kind of infinite loop,
908                // hence the lack of return instruction as the final instruction.
909                self.end_address = Some(next_address);
910                return Some(ParseFunctionState::Done);
911            } else {
912                let after_pools = (next_address..).step_by(4).find(|addr| !self.pool_constants.contains(addr)).unwrap();
913                log::warn!(
914                    "No label past constant pool at {:#x}, jumping to first address not occupied by a pool constant ({:#x})",
915                    next_address,
916                    after_pools
917                );
918                parser.seek_forward(after_pools);
919            }
920        }
921
922        None
923    }
924
925    fn into_function(self, state: ParseFunctionState) -> Result<ParseFunctionResult, IntoFunctionError> {
926        match state {
927            ParseFunctionState::Continue => {
928                return NotDoneSnafu.fail();
929            }
930            ParseFunctionState::IllegalIns { address, ins, parsed_ins } => {
931                return Ok(ParseFunctionResult::IllegalIns { address, ins, parsed_ins })
932            }
933            ParseFunctionState::Done => {}
934        };
935        let Some(end_address) = self.end_address else {
936            return Ok(ParseFunctionResult::NoEpilogue);
937        };
938
939        let end_address =
940            self.known_end_address.unwrap_or(end_address.max(self.last_pool_address.map(|a| a + 4).unwrap_or(0)));
941        if end_address > self.module_end_address {
942            return Ok(ParseFunctionResult::NoEpilogue);
943        }
944
945        Ok(ParseFunctionResult::Found(Function {
946            name: self.name,
947            start_address: self.start_address,
948            end_address,
949            first_instruction_address: self.start_address,
950            thumb: self.thumb,
951            labels: self.labels,
952            pool_constants: self.pool_constants,
953            jump_tables: self.jump_tables,
954            inline_tables: self.inline_tables,
955            function_calls: self.function_calls,
956        }))
957    }
958
959    fn is_return(
960        &self,
961        ins: Ins,
962        parsed_ins: &ParsedIns,
963        address: u32,
964        function_start: u32,
965        module_start_address: u32,
966        module_end_address: u32,
967    ) -> bool {
968        if ins.is_conditional() {
969            return false;
970        }
971
972        let args = &parsed_ins.args;
973        match (parsed_ins.mnemonic, args[0], args[1]) {
974            // bx *
975            ("bx", _, _) => true,
976            // mov pc, *
977            ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
978            // ldmia *, {..., pc}
979            ("ldmia", _, Argument::RegList(reg_list)) if reg_list.contains(Register::Pc) => true,
980            // pop {..., pc}
981            ("pop", Argument::RegList(reg_list), _) if reg_list.contains(Register::Pc) => true,
982            // backwards branch
983            ("b", Argument::BranchDest(offset), _) if offset < 0 => {
984                // Branch must be within current function (infinite loop) or outside current module (tail call)
985                Function::is_branch(ins, parsed_ins, address)
986                    .map(|destination| {
987                        destination >= function_start
988                            || destination < module_start_address
989                            || destination >= module_end_address
990                    })
991                    .unwrap_or(false)
992            }
993            // subs pc, lr, *
994            ("subs", Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::Reg(Reg { reg: Register::Lr, .. })) => true,
995            // ldr pc, *
996            ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
997            _ => false,
998        }
999    }
1000
1001    fn is_nop(ins: Ins, parsed_ins: &ParsedIns) -> bool {
1002        match (ins.mnemonic(), parsed_ins.args[0], parsed_ins.args[1], parsed_ins.args[2]) {
1003            ("nop", _, _, _) => true,
1004            ("mov", Argument::Reg(Reg { reg: dest, .. }), Argument::Reg(Reg { reg: src, .. }), Argument::None) => dest == src,
1005            _ => false,
1006        }
1007    }
1008
1009    fn is_push(ins: Ins) -> bool {
1010        match ins {
1011            Ins::Arm(arm_ins) => {
1012                if arm_ins.op == arm::Opcode::StmW && arm_ins.field_rn_wb().reg == Register::Sp {
1013                    true
1014                } else {
1015                    matches!(arm_ins.op, arm::Opcode::PushM | arm::Opcode::PushR)
1016                }
1017            }
1018            Ins::Thumb(thumb_ins) => thumb_ins.op == thumb::Opcode::Push,
1019            _ => false,
1020        }
1021    }
1022}
1023
1024#[derive(Default)]
1025pub struct ParseFunctionOptions {
1026    /// Whether the function is in Thumb or ARM mode, or None if it should be detected automatically.
1027    pub thumb: Option<bool>,
1028}
1029
1030enum ParseFunctionState {
1031    Continue,
1032    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
1033    Done,
1034}
1035
1036impl ParseFunctionState {
1037    pub fn ended(&self) -> bool {
1038        match self {
1039            Self::Continue => false,
1040            Self::IllegalIns { .. } | Self::Done => true,
1041        }
1042    }
1043}
1044
1045#[derive(Debug)]
1046pub enum ParseFunctionResult {
1047    Found(Function),
1048    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
1049    NoEpilogue,
1050    InvalidStart { address: u32, ins: Ins, parsed_ins: ParsedIns },
1051}
1052
1053impl Display for ParseFunctionResult {
1054    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1055        match self {
1056            Self::Found(function) => write!(f, "Found function: {}", function.name()),
1057            Self::IllegalIns { address, parsed_ins, .. } => {
1058                write!(f, "Illegal instruction at {:#010x}: {}", address, parsed_ins.display(Default::default()))
1059            }
1060            Self::NoEpilogue => write!(f, "No epilogue found"),
1061            Self::InvalidStart { address, parsed_ins, .. } => {
1062                write!(f, "Invalid function start at {:#010x}: {}", address, parsed_ins.display(Default::default()))
1063            }
1064        }
1065    }
1066}
1067
1068#[derive(Default)]
1069pub struct FunctionSearchOptions<'a> {
1070    /// Address to start searching from. Defaults to the base address.
1071    pub start_address: Option<u32>,
1072    /// Last address that a function can start from. Defaults to [`Self::end_address`].
1073    pub last_function_address: Option<u32>,
1074    /// Address to end the search. Defaults to the base address plus code size.
1075    pub end_address: Option<u32>,
1076    /// If zero, end the search when an illegal starting instruction is found. Otherwise, continue searching for a valid
1077    /// function start for up to this many bytes. Set to [`u32::MAX`] to search until the end of the module.
1078    pub max_function_start_search_distance: u32,
1079    /// If true, pointers to data will be used to limit the upper bound address.
1080    pub use_data_as_upper_bound: bool,
1081    /// Guarantees that all these addresses will be analyzed, even if the function analysis would terminate before they are
1082    /// reached. Used for .init functions.
1083    /// Note: This will override `keep_searching_for_valid_function_start`, they are not intended to be used together.
1084    pub function_addresses: Option<BTreeSet<u32>>,
1085    /// If a branch instruction branches into one of these functions, it will be treated as a function branch instead of
1086    /// inserting a label at the branch destination.
1087    /// If the function branch is unconditional, it will also be treated as a tail call and terminate the analysis of the
1088    /// current function.
1089    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
1090    /// Whether to treat instructions using undefined registers as illegal.
1091    pub check_defs_uses: bool,
1092}
1093
1094#[derive(Clone, Copy, Debug)]
1095pub struct CalledFunction {
1096    pub ins: Ins,
1097    pub address: u32,
1098    pub thumb: bool,
1099}
1100
1101pub struct PoolConstant {
1102    pub address: u32,
1103    pub value: u32,
1104}