ds_decomp/analysis/
functions.rs

1use std::{
2    backtrace::Backtrace,
3    collections::{BTreeMap, BTreeSet},
4    fmt::{Display, Formatter},
5};
6
7use snafu::Snafu;
8use unarm::{
9    ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser,
10    args::{Argument, Reg, Register},
11    arm, thumb,
12};
13
14use crate::{
15    config::symbol::{SymbolMap, SymbolMapError},
16    util::bytes::FromSlice,
17};
18
19use super::{
20    function_branch::FunctionBranchState,
21    function_start::is_valid_function_start,
22    illegal_code::IllegalCodeState,
23    inline_table::{InlineTable, InlineTableState},
24    jump_table::{JumpTable, JumpTableState},
25    secure_area::SecureAreaState,
26};
27
28// All keys in the types below are instruction addresses
29pub type Labels = BTreeSet<u32>;
30pub type PoolConstants = BTreeSet<u32>;
31pub type JumpTables = BTreeMap<u32, JumpTable>;
32pub type InlineTables = BTreeMap<u32, InlineTable>;
33pub type FunctionCalls = BTreeMap<u32, CalledFunction>;
34pub type DataLoads = BTreeMap<u32, u32>;
35
36#[derive(Debug, Clone)]
37pub struct Function {
38    name: String,
39    start_address: u32,
40    end_address: u32,
41    first_instruction_address: u32,
42    thumb: bool,
43    labels: Labels,
44    pool_constants: PoolConstants,
45    jump_tables: JumpTables,
46    inline_tables: InlineTables,
47    function_calls: FunctionCalls,
48}
49
50#[derive(Debug, Snafu)]
51pub enum FunctionAnalysisError {
52    #[snafu(transparent)]
53    IntoFunction { source: IntoFunctionError },
54    #[snafu(transparent)]
55    SymbolMap { source: SymbolMapError },
56}
57
58impl Function {
59    pub fn size(&self) -> u32 {
60        self.end_address - self.first_instruction_address
61    }
62
63    fn is_thumb_function(address: u32, code: &[u8]) -> bool {
64        if (address & 3) != 0 {
65            // Not 4-aligned, must be Thumb
66            true
67        } else if code.len() < 4 {
68            // Can't contain a full ARM instruction
69            true
70        } else if code[3] & 0xf0 == 0xe0 {
71            // First instruction has the AL condition code, must be ARM
72            false
73        } else {
74            // Thumb otherwise
75            true
76        }
77    }
78
79    #[allow(clippy::match_like_matches_macro)]
80    fn is_entry_instruction(ins: Ins, parsed_ins: &ParsedIns) -> bool {
81        if ins.is_conditional() {
82            return false;
83        }
84
85        let args = &parsed_ins.args;
86        match (parsed_ins.mnemonic, args[0], args[1], args[2]) {
87            (
88                "stmdb",
89                Argument::Reg(Reg { reg: Register::Sp, writeback: true, deref: false }),
90                Argument::RegList(regs),
91                Argument::None,
92            )
93            | ("push", Argument::RegList(regs), Argument::None, Argument::None)
94                if regs.contains(Register::Lr) =>
95            {
96                true
97            }
98            _ => false,
99        }
100    }
101
102    fn is_branch(ins: Ins, parsed_ins: &ParsedIns, address: u32) -> Option<u32> {
103        if ins.mnemonic() != "b" {
104            return None;
105        }
106        let dest = parsed_ins.branch_destination().unwrap();
107        Some((address as i32 + dest).try_into().unwrap())
108    }
109
110    fn is_pool_load(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<u32> {
111        if ins.mnemonic() != "ldr" {
112            return None;
113        }
114        match (parsed_ins.args[0], parsed_ins.args[1], parsed_ins.args[2]) {
115            (Argument::Reg(dest), Argument::Reg(base), Argument::OffsetImm(offset)) => {
116                if dest.reg == Register::Pc {
117                    None
118                } else if !base.deref || base.reg != Register::Pc {
119                    None
120                } else if offset.post_indexed {
121                    None
122                } else {
123                    // ldr *, [pc + *]
124                    let load_address = (address as i32 + offset.value) as u32 & !3;
125                    let load_address = load_address + if thumb { 4 } else { 8 };
126                    Some(load_address)
127                }
128            }
129            _ => None,
130        }
131    }
132
133    fn is_function_call(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<CalledFunction> {
134        let args = &parsed_ins.args;
135        match (ins.mnemonic(), args[0], args[1]) {
136            ("bl", Argument::BranchDest(offset), Argument::None) => {
137                let destination = (address as i32 + offset) as u32;
138                Some(CalledFunction { ins, address: destination, thumb })
139            }
140            ("blx", Argument::BranchDest(offset), Argument::None) => {
141                let destination = (address as i32 + offset) as u32;
142                let destination = if thumb { destination & !3 } else { destination };
143                Some(CalledFunction { ins, address: destination, thumb: !thumb })
144            }
145            _ => None,
146        }
147    }
148
149    fn function_parser_loop(
150        mut parser: Parser<'_>,
151        options: FunctionParseOptions,
152    ) -> Result<ParseFunctionResult, FunctionAnalysisError> {
153        let thumb = parser.mode == ParseMode::Thumb;
154        let mut context = ParseFunctionContext::new(thumb, options);
155
156        let Some((address, ins, parsed_ins)) = parser.next() else { return Ok(ParseFunctionResult::NoEpilogue) };
157        if !is_valid_function_start(address, ins, &parsed_ins) {
158            return Ok(ParseFunctionResult::InvalidStart { address, ins, parsed_ins });
159        }
160
161        let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
162        let result = if state.ended() {
163            return Ok(context.into_function(state)?);
164        } else {
165            loop {
166                let Some((address, ins, parsed_ins)) = parser.next() else {
167                    break context.into_function(ParseFunctionState::Done);
168                };
169                let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
170                if state.ended() {
171                    break context.into_function(state);
172                }
173            }
174        };
175
176        let result = result?;
177        let ParseFunctionResult::Found(mut function) = result else {
178            return Ok(result);
179        };
180
181        if let Some(first_pool_address) = function.pool_constants.first() {
182            if *first_pool_address < function.start_address {
183                log::info!(
184                    "Function at {:#010x} was adjusted to include pre-code constant pool at {:#010x}",
185                    function.start_address,
186                    first_pool_address
187                );
188
189                function.first_instruction_address = function.start_address;
190                function.start_address = *first_pool_address;
191            }
192        }
193
194        Ok(ParseFunctionResult::Found(function))
195    }
196
197    pub fn parse_function(options: FunctionParseOptions) -> Result<ParseFunctionResult, FunctionAnalysisError> {
198        let FunctionParseOptions { start_address, base_address, module_code, parse_options, .. } = &options;
199
200        let thumb = parse_options.thumb.unwrap_or(Function::is_thumb_function(*start_address, module_code));
201        let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
202        let start = (start_address - base_address) as usize;
203        let function_code = &module_code[start..];
204        let parser = Parser::new(
205            parse_mode,
206            *start_address,
207            Endian::Little,
208            ParseFlags { version: ArmVersion::V5Te, ual: false },
209            function_code,
210        );
211
212        Self::function_parser_loop(parser, options)
213    }
214
215    pub fn find_functions(options: FindFunctionsOptions) -> Result<BTreeMap<u32, Function>, FunctionAnalysisError> {
216        let FindFunctionsOptions {
217            default_name_prefix,
218            base_address,
219            module_code,
220            symbol_map,
221            module_start_address,
222            module_end_address,
223            search_options,
224        } = options;
225
226        let mut functions = BTreeMap::new();
227
228        let start_address = search_options.start_address.unwrap_or(base_address);
229        assert!((start_address & 1) == 0);
230        let start_offset = start_address - base_address;
231        let end_address = search_options.end_address.unwrap_or(base_address + module_code.len() as u32);
232        let end_offset = end_address - base_address;
233        let module_code = &module_code[..end_offset as usize];
234        let mut function_code = &module_code[start_offset as usize..end_offset as usize];
235
236        log::debug!("Searching for functions from {:#010x} to {:#010x}", start_address, end_address);
237
238        // Upper bound for function search
239        let last_function_address = search_options.last_function_address.unwrap_or(end_address);
240        let mut upper_bounds = BTreeSet::new();
241
242        // Used to limit how far to search for valid function starts, see `max_function_start_search_distance`
243        let mut prev_valid_address = start_address;
244        let mut address = start_address;
245
246        while !function_code.is_empty() && address <= *upper_bounds.first().unwrap_or(&last_function_address) {
247            let thumb = Function::is_thumb_function(address, function_code);
248
249            let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
250            let parser = Parser::new(
251                parse_mode,
252                address,
253                Endian::Little,
254                ParseFlags { version: ArmVersion::V5Te, ual: false },
255                function_code,
256            );
257
258            let (name, new) = if let Some((_, symbol)) = symbol_map.by_address(address)? {
259                (symbol.name.clone(), false)
260            } else {
261                (format!("{default_name_prefix}{address:08x}"), true)
262            };
263
264            let function_result = Function::function_parser_loop(
265                parser,
266                FunctionParseOptions {
267                    name,
268                    start_address: address,
269                    base_address,
270                    module_code,
271                    known_end_address: None,
272                    module_start_address,
273                    module_end_address,
274                    existing_functions: search_options.existing_functions,
275                    check_defs_uses: search_options.check_defs_uses,
276                    parse_options: Default::default(),
277                },
278            )?;
279            let function = match function_result {
280                ParseFunctionResult::Found(function) => function,
281                ParseFunctionResult::IllegalIns { address: illegal_address, ins, .. } => {
282                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
283                    let limit_reached = address >= search_limit;
284
285                    if !limit_reached {
286                        // It's possible that we've attempted to analyze pool constants as code, which can happen if the
287                        // function has a constant pool ahead of its code.
288                        let mut next_address = (address + 1).next_multiple_of(4);
289                        if let Some(function_addresses) = search_options.function_addresses.as_ref() {
290                            if let Some(&next_function) = function_addresses.range(address + 1..).next() {
291                                next_address = next_function;
292                            }
293                        }
294                        address = next_address;
295                        function_code = &module_code[(address - base_address) as usize..];
296                        continue;
297                    } else {
298                        if thumb {
299                            log::debug!(
300                                "Terminating function analysis due to illegal instruction at {:#010x}: {:04x}",
301                                illegal_address,
302                                ins.code()
303                            );
304                        } else {
305                            log::debug!(
306                                "Terminating function analysis due to illegal instruction at {:#010x}: {:08x}",
307                                illegal_address,
308                                ins.code()
309                            );
310                        }
311                        break;
312                    }
313                }
314                ParseFunctionResult::NoEpilogue => {
315                    log::debug!(
316                        "Terminating function analysis due to no epilogue in function starting from {:#010x}",
317                        address
318                    );
319                    break;
320                }
321                ParseFunctionResult::InvalidStart { address: start_address, ins, parsed_ins } => {
322                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
323                    let limit_reached = address >= search_limit;
324
325                    if !limit_reached {
326                        let ins_size = parse_mode.instruction_size(0);
327                        address += ins_size as u32;
328                        function_code = &function_code[ins_size..];
329                        continue;
330                    } else {
331                        if thumb {
332                            log::debug!(
333                                "Terminating function analysis due to invalid function start at {:#010x}: {:04x} {}",
334                                start_address,
335                                ins.code(),
336                                parsed_ins.display(Default::default())
337                            );
338                        } else {
339                            log::debug!(
340                                "Terminating function analysis due to invalid function start at {:#010x}: {:08x} {}",
341                                start_address,
342                                ins.code(),
343                                parsed_ins.display(Default::default())
344                            );
345                        }
346                        break;
347                    }
348                }
349            };
350
351            // A function was found
352            if new {
353                symbol_map.add_function(&function);
354            }
355            function.add_local_symbols_to_map(symbol_map)?;
356
357            address = function.end_address.next_multiple_of(4); // align by 4 in case of Thumb function ending on 2-byte boundary
358            prev_valid_address = function.end_address;
359            function_code = &module_code[(address - base_address) as usize..];
360
361            // Invalidate upper bounds if they are inside the function
362            let invalid_upper_bounds: Vec<u32> = upper_bounds.range(..=function.end_address).copied().collect();
363            for invalid_upper_bound in invalid_upper_bounds {
364                upper_bounds.remove(&invalid_upper_bound);
365                log::debug!(
366                    "Invalidating upper bound {:#010x} inside function {:#010x}",
367                    invalid_upper_bound,
368                    function.start_address
369                );
370            }
371
372            // Look for pointers to data in this module, to use as an upper bound for finding functions
373            if search_options.use_data_as_upper_bound {
374                for pool_constant in function.iter_pool_constants(module_code, base_address) {
375                    let pointer_value = pool_constant.value & !1;
376                    if upper_bounds.contains(&pointer_value) {
377                        continue;
378                    }
379                    if pointer_value < address {
380                        continue;
381                    }
382
383                    let offset = (pointer_value - base_address) as usize;
384                    if offset >= module_code.len() {
385                        continue;
386                    }
387
388                    let thumb = Function::is_thumb_function(pointer_value, &module_code[offset..]);
389                    let mut parser = Parser::new(
390                        if thumb { ParseMode::Thumb } else { ParseMode::Arm },
391                        pointer_value,
392                        Endian::Little,
393                        ParseFlags { ual: false, version: ArmVersion::V5Te },
394                        &module_code[offset..],
395                    );
396                    let (address, ins, parsed_ins) = parser.next().unwrap();
397                    if is_valid_function_start(address, ins, &parsed_ins) {
398                        continue;
399                    }
400
401                    // The pool constant points to data, limit the upper bound
402                    upper_bounds.insert(pointer_value);
403                    log::debug!(
404                        "Upper bound found: address to data at {:#010x} from pool constant at {:#010x} from function {}",
405                        pool_constant.value,
406                        pool_constant.address,
407                        function.name
408                    );
409                }
410            }
411
412            functions.insert(function.first_instruction_address, function);
413        }
414        Ok(functions)
415    }
416
417    pub fn add_local_symbols_to_map(&self, symbol_map: &mut SymbolMap) -> Result<(), SymbolMapError> {
418        for address in self.labels.iter() {
419            symbol_map.add_label(*address, self.thumb)?;
420        }
421        for address in self.pool_constants.iter() {
422            symbol_map.add_pool_constant(*address)?;
423        }
424        for jump_table in self.jump_tables() {
425            symbol_map.add_jump_table(jump_table)?;
426        }
427        for inline_table in self.inline_tables().values() {
428            symbol_map.add_skip_data(None, inline_table.address, (*inline_table).into())?;
429        }
430        Ok(())
431    }
432
433    pub fn find_secure_area_functions(
434        module_code: &[u8],
435        base_addr: u32,
436        symbol_map: &mut SymbolMap,
437    ) -> BTreeMap<u32, Function> {
438        let mut functions = BTreeMap::new();
439
440        let parse_flags = ParseFlags { ual: false, version: ArmVersion::V5Te };
441
442        let mut address = base_addr;
443        let mut state = SecureAreaState::default();
444        for ins_code in module_code.chunks_exact(2) {
445            let ins_code = u16::from_le_slice(ins_code);
446            let ins = thumb::Ins::new(ins_code as u32, &parse_flags);
447            let parsed_ins = ins.parse(&parse_flags);
448
449            state = state.handle(address, &parsed_ins);
450            if let Some(function) = state.get_function() {
451                let function = Function {
452                    name: function.name().to_string(),
453                    start_address: function.start(),
454                    end_address: function.end(),
455                    first_instruction_address: function.start(),
456                    thumb: true,
457                    labels: Labels::new(),
458                    pool_constants: PoolConstants::new(),
459                    jump_tables: JumpTables::new(),
460                    inline_tables: InlineTables::new(),
461                    function_calls: FunctionCalls::new(),
462                };
463                symbol_map.add_function(&function);
464                functions.insert(function.first_instruction_address, function);
465            }
466
467            address += 2;
468        }
469
470        functions
471    }
472
473    pub fn parser<'a>(&'a self, module_code: &'a [u8], base_address: u32) -> Parser<'a> {
474        Parser::new(
475            if self.thumb { ParseMode::Thumb } else { ParseMode::Arm },
476            self.start_address,
477            Endian::Little,
478            ParseFlags { ual: false, version: ArmVersion::V5Te },
479            self.code(module_code, base_address),
480        )
481    }
482
483    pub fn code<'a>(&self, module_code: &'a [u8], base_address: u32) -> &'a [u8] {
484        let start = (self.start_address - base_address) as usize;
485        let end = (self.end_address - base_address) as usize;
486        &module_code[start..end]
487    }
488
489    pub fn name(&self) -> &str {
490        &self.name
491    }
492
493    pub fn start_address(&self) -> u32 {
494        self.start_address
495    }
496
497    pub fn end_address(&self) -> u32 {
498        self.end_address
499    }
500
501    pub fn first_instruction_address(&self) -> u32 {
502        self.first_instruction_address
503    }
504
505    pub fn is_thumb(&self) -> bool {
506        self.thumb
507    }
508
509    pub fn labels(&self) -> impl Iterator<Item = &u32> {
510        self.labels.iter()
511    }
512
513    pub fn jump_tables(&self) -> impl Iterator<Item = &JumpTable> {
514        self.jump_tables.values()
515    }
516
517    pub fn inline_tables(&self) -> &InlineTables {
518        &self.inline_tables
519    }
520
521    pub fn get_inline_table_at(&self, address: u32) -> Option<&InlineTable> {
522        Self::inline_table_at(&self.inline_tables, address)
523    }
524
525    fn inline_table_at(inline_tables: &InlineTables, address: u32) -> Option<&InlineTable> {
526        inline_tables.values().find(|table| address >= table.address && address < table.address + table.size)
527    }
528
529    pub fn pool_constants(&self) -> &PoolConstants {
530        &self.pool_constants
531    }
532
533    pub fn iter_pool_constants<'a>(
534        &'a self,
535        module_code: &'a [u8],
536        base_address: u32,
537    ) -> impl Iterator<Item = PoolConstant> + 'a {
538        self.pool_constants.iter().map(move |&address| {
539            let start = (address - base_address) as usize;
540            let bytes = &module_code[start..];
541            PoolConstant { address, value: u32::from_le_slice(bytes) }
542        })
543    }
544
545    pub fn function_calls(&self) -> &FunctionCalls {
546        &self.function_calls
547    }
548}
549
550#[derive(Default)]
551pub struct FunctionParseOptions<'a> {
552    pub name: String,
553    pub start_address: u32,
554    pub base_address: u32,
555    pub module_code: &'a [u8],
556    pub known_end_address: Option<u32>,
557    pub module_start_address: u32,
558    pub module_end_address: u32,
559    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
560
561    /// Whether to check that all registers used in the instruction are defined
562    pub check_defs_uses: bool,
563
564    pub parse_options: ParseFunctionOptions,
565}
566
567pub struct FindFunctionsOptions<'a> {
568    pub default_name_prefix: &'a str,
569    pub base_address: u32,
570    pub module_code: &'a [u8],
571    pub symbol_map: &'a mut SymbolMap,
572    pub module_start_address: u32,
573    pub module_end_address: u32,
574
575    pub search_options: FunctionSearchOptions<'a>,
576}
577
578struct ParseFunctionContext<'a> {
579    name: String,
580    start_address: u32,
581    thumb: bool,
582    end_address: Option<u32>,
583    known_end_address: Option<u32>,
584    labels: Labels,
585    pool_constants: PoolConstants,
586    jump_tables: JumpTables,
587    inline_tables: InlineTables,
588    function_calls: FunctionCalls,
589
590    module_start_address: u32,
591    module_end_address: u32,
592    existing_functions: Option<&'a BTreeMap<u32, Function>>,
593
594    /// Address of last conditional instruction, so we can detect the final return instruction
595    last_conditional_destination: Option<u32>,
596    /// Address of last pool constant, to get the function's true end address
597    last_pool_address: Option<u32>,
598    /// State machine for detecting jump tables and adding them as symbols
599    jump_table_state: JumpTableState,
600    /// State machine for detecting branches (B, not BL) to other functions
601    function_branch_state: FunctionBranchState,
602    /// State machine for detecting inline data tables within the function
603    inline_table_state: InlineTableState,
604    /// State machine for detecting illegal code sequences
605    illegal_code_state: IllegalCodeState,
606
607    /// Whether to check that all registers used in the instruction are defined
608    check_defs_uses: bool,
609    defined_registers: BTreeSet<Register>,
610
611    prev_ins: Option<Ins>,
612    prev_parsed_ins: Option<ParsedIns>,
613    prev_address: Option<u32>,
614}
615
616#[derive(Debug, Snafu)]
617pub enum IntoFunctionError {
618    #[snafu(display("Cannot turn parse context into function before parsing is done"))]
619    NotDone { backtrace: Backtrace },
620}
621
622impl<'a> ParseFunctionContext<'a> {
623    pub fn new(thumb: bool, options: FunctionParseOptions<'a>) -> Self {
624        let FunctionParseOptions {
625            name,
626            start_address,
627            known_end_address,
628            module_start_address,
629            module_end_address,
630            existing_functions,
631            check_defs_uses,
632            ..
633        } = options;
634
635        let mut defined_registers = BTreeSet::new();
636        // Could be arguments
637        defined_registers.insert(Register::R0);
638        defined_registers.insert(Register::R1);
639        defined_registers.insert(Register::R2);
640        defined_registers.insert(Register::R3);
641        // Always defined
642        defined_registers.insert(Register::Sp);
643        defined_registers.insert(Register::Lr);
644        defined_registers.insert(Register::Pc);
645        // Could be used as a scratch register
646        defined_registers.insert(Register::R12);
647
648        Self {
649            name,
650            start_address,
651            thumb,
652            end_address: None,
653            known_end_address,
654            labels: Labels::new(),
655            pool_constants: PoolConstants::new(),
656            jump_tables: JumpTables::new(),
657            inline_tables: InlineTables::new(),
658            function_calls: FunctionCalls::new(),
659
660            module_start_address,
661            module_end_address,
662            existing_functions,
663
664            last_conditional_destination: None,
665            last_pool_address: None,
666            jump_table_state: if thumb {
667                JumpTableState::Thumb(Default::default())
668            } else {
669                JumpTableState::Arm(Default::default())
670            },
671            function_branch_state: Default::default(),
672            inline_table_state: Default::default(),
673            illegal_code_state: Default::default(),
674
675            check_defs_uses,
676            defined_registers,
677
678            prev_ins: None,
679            prev_parsed_ins: None,
680            prev_address: None,
681        }
682    }
683
684    fn handle_ins_inner(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: &ParsedIns) -> ParseFunctionState {
685        if self.pool_constants.contains(&address) {
686            parser.seek_forward(address + 4);
687            return ParseFunctionState::Continue;
688        }
689        if let Some(inline_table) = Function::inline_table_at(&self.inline_tables, address) {
690            parser.seek_forward(inline_table.address + inline_table.size);
691            return ParseFunctionState::Continue;
692        }
693
694        self.jump_table_state = self.jump_table_state.handle(address, ins, parsed_ins, &mut self.jump_tables);
695        self.last_conditional_destination = self.last_conditional_destination.max(self.jump_table_state.table_end_address());
696        if let Some(label) = self.jump_table_state.get_label(address, ins) {
697            self.labels.insert(label);
698            self.last_conditional_destination = self.last_conditional_destination.max(Some(label));
699        }
700
701        if self.jump_table_state.is_numerical_jump_offset() {
702            // Not an instruction, continue
703            return ParseFunctionState::Continue;
704        }
705
706        let ins_size = if let Ins::Thumb(thumb_ins) = ins {
707            if thumb_ins.op != thumb::Opcode::Bl && thumb_ins.op != thumb::Opcode::BlxI {
708                // Typical Thumb instruction
709                2
710            } else if matches!(parsed_ins.args[0], Argument::BranchDest(_)) {
711                // Combined BL/BLX instruction
712                4
713            } else {
714                // Not combined
715                return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
716            }
717        } else {
718            // ARM instruction
719            4
720        };
721
722        self.illegal_code_state = self.illegal_code_state.handle(ins, parsed_ins);
723        if self.illegal_code_state.is_illegal() {
724            return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
725        }
726
727        let in_conditional_block = Some(address) < self.last_conditional_destination;
728        let is_return =
729            self.is_return(ins, parsed_ins, address, self.start_address, self.module_start_address, self.module_end_address);
730        if !in_conditional_block && is_return {
731            let end_address = address + ins_size;
732            if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
733                let outside_function = destination < self.start_address || destination >= end_address;
734                if outside_function {
735                    // Tail call
736                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
737                }
738            }
739
740            // We're not inside a conditional code block, so this is the final return instruction
741            self.end_address = Some(address + ins_size);
742            return ParseFunctionState::Done;
743        }
744
745        if address > self.start_address
746            && Function::is_entry_instruction(ins, parsed_ins)
747            && let Some(prev_ins) = self.prev_ins
748            && let Some(prev_parsed_ins) = self.prev_parsed_ins.as_ref()
749            && let Some(prev_address) = self.prev_address
750            && Function::is_branch(prev_ins, prev_parsed_ins, prev_address).is_some()
751        {
752            let is_conditional = in_conditional_block || prev_ins.is_conditional();
753            if is_conditional {
754                // Tail call
755                self.end_address = Some(address);
756                return ParseFunctionState::Done;
757            }
758        }
759
760        self.function_branch_state = self.function_branch_state.handle(ins, parsed_ins);
761        if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
762            let in_current_module = destination >= self.module_start_address && destination < self.module_end_address;
763            if !in_current_module {
764                // Tail call
765                self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
766            } else if self.function_branch_state.is_function_branch()
767                || self.existing_functions.map(|functions| functions.contains_key(&destination)).unwrap_or(false)
768            {
769                if !ins.is_conditional() && !in_conditional_block {
770                    // This is an unconditional backwards function branch, which means this function has ended
771                    self.end_address = Some(address + ins_size);
772                    return ParseFunctionState::Done;
773                } else {
774                    // TODO: Always run this (move it outside of else block). SectionExt::relocatable_code must take condition
775                    // code into account so the game matches after linking
776                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
777                }
778            } else {
779                // Normal branch instruction, insert a label
780                if let Some(state) = self.handle_label(destination, address, parser, ins_size) {
781                    return state;
782                }
783            }
784        }
785
786        if let Some(pool_address) = Function::is_pool_load(ins, parsed_ins, address, self.thumb) {
787            self.pool_constants.insert(pool_address);
788            self.last_pool_address = self.last_pool_address.max(Some(pool_address));
789        }
790
791        self.inline_table_state = self.inline_table_state.handle(self.thumb, address, parsed_ins);
792        if let Some(table) = self.inline_table_state.get_table() {
793            log::debug!("Inline table found at {:#x}, size {:#x}", table.address, table.size);
794            self.inline_tables.insert(table.address, table);
795        }
796
797        if let Some(called_function) = Function::is_function_call(ins, parsed_ins, address, self.thumb) {
798            self.function_calls.insert(address, called_function);
799        }
800
801        if self.check_defs_uses && !Self::is_nop(ins, parsed_ins) {
802            if Self::is_push(ins) {
803                // Add all caller-saved registers to the defined set
804                ins.register_list().iter().for_each(|reg| {
805                    self.defined_registers.insert(reg);
806                });
807            }
808
809            // Verify that all registers used in the instruction are defined
810            let defs_uses = match ins {
811                Ins::Arm(ins) => Some((ins.defs(&Default::default()), ins.uses(&Default::default()))),
812                Ins::Thumb(ins) => Some((ins.defs(&Default::default()), ins.uses(&Default::default()))),
813                Ins::Data => None,
814            };
815            if let Some((defs, uses)) = defs_uses {
816                for usage in uses {
817                    let legal = match usage {
818                        Argument::Reg(reg) => {
819                            if let Ins::Arm(ins) = ins {
820                                if ins.op == arm::Opcode::Str && ins.field_rn_deref().reg == Register::Sp {
821                                    // There are instance of `str Rd, [sp, #imm]` where Rd is not defined.
822                                    // Potential UB bug in mwccarm.
823                                    self.defined_registers.insert(reg.reg);
824                                    continue;
825                                }
826                            }
827
828                            self.defined_registers.contains(&reg.reg)
829                        }
830                        Argument::RegList(reg_list) => reg_list.iter().all(|reg| self.defined_registers.contains(&reg)),
831                        Argument::ShiftReg(shift_reg) => self.defined_registers.contains(&shift_reg.reg),
832                        Argument::OffsetReg(offset_reg) => self.defined_registers.contains(&offset_reg.reg),
833                        _ => continue,
834                    };
835                    if !legal {
836                        return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
837                    }
838                }
839                if !is_return {
840                    for def in defs {
841                        match def {
842                            Argument::Reg(reg) => {
843                                self.defined_registers.insert(reg.reg);
844                            }
845                            Argument::RegList(reg_list) => {
846                                for reg in reg_list.iter() {
847                                    self.defined_registers.insert(reg);
848                                }
849                            }
850                            Argument::ShiftReg(shift_reg) => {
851                                self.defined_registers.insert(shift_reg.reg);
852                            }
853                            Argument::OffsetReg(offset_reg) => {
854                                self.defined_registers.insert(offset_reg.reg);
855                            }
856                            _ => continue,
857                        };
858                    }
859                }
860            }
861        }
862
863        ParseFunctionState::Continue
864    }
865
866    pub fn handle_ins(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: ParsedIns) -> ParseFunctionState {
867        let state = self.handle_ins_inner(parser, address, ins, &parsed_ins);
868        self.prev_ins = Some(ins);
869        self.prev_parsed_ins = Some(parsed_ins);
870        self.prev_address = Some(address);
871        state
872    }
873
874    fn handle_label(
875        &mut self,
876        destination: u32,
877        address: u32,
878        parser: &mut Parser,
879        ins_size: u32,
880    ) -> Option<ParseFunctionState> {
881        self.labels.insert(destination);
882        self.last_conditional_destination = self.last_conditional_destination.max(Some(destination));
883
884        let next_address = address + ins_size;
885        if self.pool_constants.contains(&next_address) {
886            let branch_backwards = destination <= address;
887
888            // Load instructions in ARM mode can have an offset of up to ±4kB. Therefore, some functions must
889            // emit pool constants in the middle so they can all be accessed by PC-relative loads. There will
890            // also be branch instruction right before, so that the pool constants don't get executed.
891
892            // Sometimes, the pre-pool branch is conveniently placed at an actual branch in the code, and
893            // leads even further than the end of the pool constants. In that case we should already have found
894            // a label at a lower address.
895            if let Some(after_pools) = self.labels.range(address + 1..).next().copied() {
896                if after_pools > address + 0x1000 {
897                    log::warn!("Massive gap from constant pool at {:#x} to next label at {:#x}", next_address, after_pools);
898                }
899                parser.seek_forward(after_pools);
900            } else if !branch_backwards {
901                // Backwards branch with no further branch labels. This type of function contains some kind of infinite loop,
902                // hence the lack of return instruction as the final instruction.
903                self.end_address = Some(next_address);
904                return Some(ParseFunctionState::Done);
905            } else {
906                let after_pools = (next_address..).step_by(4).find(|addr| !self.pool_constants.contains(addr)).unwrap();
907                log::warn!(
908                    "No label past constant pool at {:#x}, jumping to first address not occupied by a pool constant ({:#x})",
909                    next_address,
910                    after_pools
911                );
912                parser.seek_forward(after_pools);
913            }
914        }
915
916        None
917    }
918
919    fn into_function(self, state: ParseFunctionState) -> Result<ParseFunctionResult, IntoFunctionError> {
920        match state {
921            ParseFunctionState::Continue => {
922                return NotDoneSnafu.fail();
923            }
924            ParseFunctionState::IllegalIns { address, ins, parsed_ins } => {
925                return Ok(ParseFunctionResult::IllegalIns { address, ins, parsed_ins });
926            }
927            ParseFunctionState::Done => {}
928        };
929        let Some(end_address) = self.end_address else {
930            return Ok(ParseFunctionResult::NoEpilogue);
931        };
932
933        let end_address =
934            self.known_end_address.unwrap_or(end_address.max(self.last_pool_address.map(|a| a + 4).unwrap_or(0)));
935        if end_address > self.module_end_address {
936            return Ok(ParseFunctionResult::NoEpilogue);
937        }
938
939        Ok(ParseFunctionResult::Found(Function {
940            name: self.name,
941            start_address: self.start_address,
942            end_address,
943            first_instruction_address: self.start_address,
944            thumb: self.thumb,
945            labels: self.labels,
946            pool_constants: self.pool_constants,
947            jump_tables: self.jump_tables,
948            inline_tables: self.inline_tables,
949            function_calls: self.function_calls,
950        }))
951    }
952
953    fn is_return(
954        &self,
955        ins: Ins,
956        parsed_ins: &ParsedIns,
957        address: u32,
958        function_start: u32,
959        module_start_address: u32,
960        module_end_address: u32,
961    ) -> bool {
962        if ins.is_conditional() {
963            return false;
964        }
965
966        let args = &parsed_ins.args;
967        match (parsed_ins.mnemonic, args[0], args[1]) {
968            // bx *
969            ("bx", _, _) => true,
970            // mov pc, *
971            ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
972            // ldmia *, {..., pc}
973            ("ldmia", _, Argument::RegList(reg_list)) if reg_list.contains(Register::Pc) => true,
974            // pop {..., pc}
975            ("pop", Argument::RegList(reg_list), _) if reg_list.contains(Register::Pc) => true,
976            // backwards branch
977            ("b", Argument::BranchDest(offset), _) if offset < 0 => {
978                // Branch must be within current function (infinite loop) or outside current module (tail call)
979                Function::is_branch(ins, parsed_ins, address)
980                    .map(|destination| {
981                        destination >= function_start
982                            || destination < module_start_address
983                            || destination >= module_end_address
984                    })
985                    .unwrap_or(false)
986            }
987            // subs pc, lr, *
988            ("subs", Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::Reg(Reg { reg: Register::Lr, .. })) => true,
989            // ldr pc, *
990            ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
991            _ => false,
992        }
993    }
994
995    fn is_nop(ins: Ins, parsed_ins: &ParsedIns) -> bool {
996        match (ins.mnemonic(), parsed_ins.args[0], parsed_ins.args[1], parsed_ins.args[2]) {
997            ("nop", _, _, _) => true,
998            ("mov", Argument::Reg(Reg { reg: dest, .. }), Argument::Reg(Reg { reg: src, .. }), Argument::None) => dest == src,
999            _ => false,
1000        }
1001    }
1002
1003    fn is_push(ins: Ins) -> bool {
1004        match ins {
1005            Ins::Arm(arm_ins) => {
1006                if arm_ins.op == arm::Opcode::StmW && arm_ins.field_rn_wb().reg == Register::Sp {
1007                    true
1008                } else {
1009                    matches!(arm_ins.op, arm::Opcode::PushM | arm::Opcode::PushR)
1010                }
1011            }
1012            Ins::Thumb(thumb_ins) => thumb_ins.op == thumb::Opcode::Push,
1013            _ => false,
1014        }
1015    }
1016}
1017
1018#[derive(Default)]
1019pub struct ParseFunctionOptions {
1020    /// Whether the function is in Thumb or ARM mode, or None if it should be detected automatically.
1021    pub thumb: Option<bool>,
1022}
1023
1024enum ParseFunctionState {
1025    Continue,
1026    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
1027    Done,
1028}
1029
1030impl ParseFunctionState {
1031    pub fn ended(&self) -> bool {
1032        match self {
1033            Self::Continue => false,
1034            Self::IllegalIns { .. } | Self::Done => true,
1035        }
1036    }
1037}
1038
1039#[derive(Debug)]
1040pub enum ParseFunctionResult {
1041    Found(Function),
1042    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
1043    NoEpilogue,
1044    InvalidStart { address: u32, ins: Ins, parsed_ins: ParsedIns },
1045}
1046
1047impl Display for ParseFunctionResult {
1048    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1049        match self {
1050            Self::Found(function) => write!(f, "Found function: {}", function.name()),
1051            Self::IllegalIns { address, parsed_ins, .. } => {
1052                write!(f, "Illegal instruction at {:#010x}: {}", address, parsed_ins.display(Default::default()))
1053            }
1054            Self::NoEpilogue => write!(f, "No epilogue found"),
1055            Self::InvalidStart { address, parsed_ins, .. } => {
1056                write!(f, "Invalid function start at {:#010x}: {}", address, parsed_ins.display(Default::default()))
1057            }
1058        }
1059    }
1060}
1061
1062#[derive(Default)]
1063pub struct FunctionSearchOptions<'a> {
1064    /// Address to start searching from. Defaults to the base address.
1065    pub start_address: Option<u32>,
1066    /// Last address that a function can start from. Defaults to [`Self::end_address`].
1067    pub last_function_address: Option<u32>,
1068    /// Address to end the search. Defaults to the base address plus code size.
1069    pub end_address: Option<u32>,
1070    /// If zero, end the search when an illegal starting instruction is found. Otherwise, continue searching for a valid
1071    /// function start for up to this many bytes. Set to [`u32::MAX`] to search until the end of the module.
1072    pub max_function_start_search_distance: u32,
1073    /// If true, pointers to data will be used to limit the upper bound address.
1074    pub use_data_as_upper_bound: bool,
1075    /// Guarantees that all these addresses will be analyzed, even if the function analysis would terminate before they are
1076    /// reached. Used for .init functions.
1077    /// Note: This will override `keep_searching_for_valid_function_start`, they are not intended to be used together.
1078    pub function_addresses: Option<BTreeSet<u32>>,
1079    /// If a branch instruction branches into one of these functions, it will be treated as a function branch instead of
1080    /// inserting a label at the branch destination.
1081    /// If the function branch is unconditional, it will also be treated as a tail call and terminate the analysis of the
1082    /// current function.
1083    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
1084    /// Whether to treat instructions using undefined registers as illegal.
1085    pub check_defs_uses: bool,
1086}
1087
1088#[derive(Clone, Copy, Debug)]
1089pub struct CalledFunction {
1090    pub ins: Ins,
1091    pub address: u32,
1092    pub thumb: bool,
1093}
1094
1095pub struct PoolConstant {
1096    pub address: u32,
1097    pub value: u32,
1098}