ds_decomp/analysis/
functions.rs

1use std::{
2    backtrace::Backtrace,
3    collections::{BTreeMap, BTreeSet},
4};
5
6use snafu::Snafu;
7use unarm::{
8    args::{Argument, Reg, Register},
9    thumb, ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser,
10};
11
12use crate::{
13    config::symbol::{SymbolMap, SymbolMapError},
14    util::bytes::FromSlice,
15};
16
17use super::{
18    function_branch::FunctionBranchState,
19    function_start::is_valid_function_start,
20    illegal_code::IllegalCodeState,
21    inline_table::{InlineTable, InlineTableState},
22    jump_table::{JumpTable, JumpTableState},
23    secure_area::SecureAreaState,
24};
25
26// All keys in the types below are instruction addresses
27pub type Labels = BTreeSet<u32>;
28pub type PoolConstants = BTreeSet<u32>;
29pub type JumpTables = BTreeMap<u32, JumpTable>;
30pub type InlineTables = BTreeMap<u32, InlineTable>;
31pub type FunctionCalls = BTreeMap<u32, CalledFunction>;
32pub type DataLoads = BTreeMap<u32, u32>;
33
34#[derive(Debug, Clone)]
35pub struct Function {
36    name: String,
37    start_address: u32,
38    end_address: u32,
39    first_instruction_address: u32,
40    thumb: bool,
41    labels: Labels,
42    pool_constants: PoolConstants,
43    jump_tables: JumpTables,
44    inline_tables: InlineTables,
45    function_calls: FunctionCalls,
46}
47
48#[derive(Debug, Snafu)]
49pub enum FunctionAnalysisError {
50    #[snafu(transparent)]
51    IntoFunction { source: IntoFunctionError },
52    #[snafu(transparent)]
53    SymbolMap { source: SymbolMapError },
54}
55
56impl Function {
57    pub fn size(&self) -> u32 {
58        self.end_address - self.start_address
59    }
60
61    fn is_thumb_function(address: u32, code: &[u8]) -> bool {
62        if (address & 3) != 0 {
63            // Not 4-aligned, must be Thumb
64            true
65        } else if code.len() < 4 {
66            // Can't contain a full ARM instruction
67            true
68        } else if code[3] & 0xf0 == 0xe0 {
69            // First instruction has the AL condition code, must be ARM
70            false
71        } else {
72            // Thumb otherwise
73            true
74        }
75    }
76
77    #[allow(clippy::match_like_matches_macro)]
78    fn is_entry_instruction(ins: Ins, parsed_ins: &ParsedIns) -> bool {
79        if ins.is_conditional() {
80            return false;
81        }
82
83        let args = &parsed_ins.args;
84        match (parsed_ins.mnemonic, args[0], args[1], args[2]) {
85            (
86                "stmdb",
87                Argument::Reg(Reg { reg: Register::Sp, writeback: true, deref: false }),
88                Argument::RegList(regs),
89                Argument::None,
90            )
91            | ("push", Argument::RegList(regs), Argument::None, Argument::None)
92                if regs.contains(Register::Lr) =>
93            {
94                true
95            }
96            _ => false,
97        }
98    }
99
100    fn is_return(
101        ins: Ins,
102        parsed_ins: &ParsedIns,
103        address: u32,
104        function_start: u32,
105        module_start_address: u32,
106        module_end_address: u32,
107    ) -> bool {
108        if ins.is_conditional() {
109            return false;
110        }
111
112        let args = &parsed_ins.args;
113        match (parsed_ins.mnemonic, args[0], args[1]) {
114            // bx *
115            ("bx", _, _) => true,
116            // mov pc, *
117            ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
118            // ldmia *, {..., pc}
119            ("ldmia", _, Argument::RegList(reg_list)) if reg_list.contains(Register::Pc) => true,
120            // pop {..., pc}
121            ("pop", Argument::RegList(reg_list), _) if reg_list.contains(Register::Pc) => true,
122            // backwards branch
123            ("b", Argument::BranchDest(offset), _) if offset < 0 => {
124                // Branch must be within current function (infinite loop) or outside current module (tail call)
125                Self::is_branch(ins, parsed_ins, address)
126                    .map(|destination| {
127                        destination >= function_start
128                            || destination < module_start_address
129                            || destination >= module_end_address
130                    })
131                    .unwrap_or(false)
132            }
133            // subs pc, lr, *
134            ("subs", Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::Reg(Reg { reg: Register::Lr, .. })) => true,
135            // ldr pc, *
136            ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true,
137            _ => false,
138        }
139    }
140
141    fn is_branch(ins: Ins, parsed_ins: &ParsedIns, address: u32) -> Option<u32> {
142        if ins.mnemonic() != "b" {
143            return None;
144        }
145        let dest = parsed_ins.branch_destination().unwrap();
146        Some((address as i32 + dest).try_into().unwrap())
147    }
148
149    fn is_pool_load(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<u32> {
150        if ins.mnemonic() != "ldr" {
151            return None;
152        }
153        match (parsed_ins.args[0], parsed_ins.args[1], parsed_ins.args[2]) {
154            (Argument::Reg(dest), Argument::Reg(base), Argument::OffsetImm(offset)) => {
155                if dest.reg == Register::Pc {
156                    None
157                } else if !base.deref || base.reg != Register::Pc {
158                    None
159                } else if offset.post_indexed {
160                    None
161                } else {
162                    // ldr *, [pc + *]
163                    let load_address = (address as i32 + offset.value) as u32 & !3;
164                    let load_address = load_address + if thumb { 4 } else { 8 };
165                    Some(load_address)
166                }
167            }
168            _ => None,
169        }
170    }
171
172    fn is_function_call(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option<CalledFunction> {
173        let args = &parsed_ins.args;
174        match (ins.mnemonic(), args[0], args[1]) {
175            ("bl", Argument::BranchDest(offset), Argument::None) => {
176                let destination = (address as i32 + offset) as u32;
177                Some(CalledFunction { ins, address: destination, thumb })
178            }
179            ("blx", Argument::BranchDest(offset), Argument::None) => {
180                let destination = (address as i32 + offset) as u32;
181                let destination = if thumb { destination & !3 } else { destination };
182                Some(CalledFunction { ins, address: destination, thumb: !thumb })
183            }
184            _ => None,
185        }
186    }
187
188    fn function_parser_loop(
189        mut parser: Parser<'_>,
190        options: FunctionParseOptions,
191    ) -> Result<ParseFunctionResult, FunctionAnalysisError> {
192        let thumb = parser.mode == ParseMode::Thumb;
193        let mut context = ParseFunctionContext::new(thumb, options);
194
195        let Some((address, ins, parsed_ins)) = parser.next() else { return Ok(ParseFunctionResult::NoEpilogue) };
196        if !is_valid_function_start(address, ins, &parsed_ins) {
197            return Ok(ParseFunctionResult::InvalidStart { address, ins, parsed_ins });
198        }
199
200        let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
201        let result = if state.ended() {
202            return Ok(context.into_function(state)?);
203        } else {
204            loop {
205                let Some((address, ins, parsed_ins)) = parser.next() else {
206                    break context.into_function(ParseFunctionState::Done);
207                };
208                let state = context.handle_ins(&mut parser, address, ins, parsed_ins);
209                if state.ended() {
210                    break context.into_function(state);
211                }
212            }
213        };
214
215        let result = result?;
216        let ParseFunctionResult::Found(mut function) = result else {
217            return Ok(result);
218        };
219
220        if let Some(first_pool_address) = function.pool_constants.first() {
221            if *first_pool_address < function.start_address {
222                log::info!(
223                    "Function at {:#010x} was adjusted to include pre-code constant pool at {:#010x}",
224                    function.start_address,
225                    first_pool_address
226                );
227
228                function.first_instruction_address = function.start_address;
229                function.start_address = *first_pool_address;
230            }
231        }
232
233        Ok(ParseFunctionResult::Found(function))
234    }
235
236    pub fn parse_function(options: FunctionParseOptions) -> Result<ParseFunctionResult, FunctionAnalysisError> {
237        let FunctionParseOptions { start_address, base_address, module_code, parse_options, .. } = &options;
238
239        let thumb = parse_options.thumb.unwrap_or(Function::is_thumb_function(*start_address, module_code));
240        let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
241        let start = (start_address - base_address) as usize;
242        let function_code = &module_code[start..];
243        let parser = Parser::new(
244            parse_mode,
245            *start_address,
246            Endian::Little,
247            ParseFlags { version: ArmVersion::V5Te, ual: false },
248            function_code,
249        );
250
251        Self::function_parser_loop(parser, options)
252    }
253
254    pub fn find_functions(options: FindFunctionsOptions) -> Result<BTreeMap<u32, Function>, FunctionAnalysisError> {
255        let FindFunctionsOptions {
256            default_name_prefix,
257            base_address,
258            module_code,
259            symbol_map,
260            module_start_address,
261            module_end_address,
262            search_options,
263        } = options;
264
265        let mut functions = BTreeMap::new();
266
267        let start_address = search_options.start_address.unwrap_or(base_address);
268        assert!((start_address & 1) == 0);
269        let start_offset = start_address - base_address;
270        let end_address = search_options.end_address.unwrap_or(base_address + module_code.len() as u32);
271        let end_offset = end_address - base_address;
272        let module_code = &module_code[..end_offset as usize];
273        let mut function_code = &module_code[start_offset as usize..end_offset as usize];
274
275        log::debug!("Searching for functions from {:#010x} to {:#010x}", start_address, end_address);
276
277        // Upper bound for function search
278        let mut last_function_address = search_options.last_function_address.unwrap_or(end_address);
279        // Used to limit how far to search for valid function starts, see `max_function_start_search_distance`
280        let mut prev_valid_address = start_address;
281        let mut address = start_address;
282
283        while !function_code.is_empty() && address <= last_function_address {
284            let thumb = Function::is_thumb_function(address, function_code);
285
286            let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm };
287            let parser = Parser::new(
288                parse_mode,
289                address,
290                Endian::Little,
291                ParseFlags { version: ArmVersion::V5Te, ual: false },
292                function_code,
293            );
294
295            let (name, new) = if let Some((_, symbol)) = symbol_map.by_address(address)? {
296                (symbol.name.clone(), false)
297            } else {
298                (format!("{}{:08x}", default_name_prefix, address), true)
299            };
300
301            let function_result = Function::function_parser_loop(
302                parser,
303                FunctionParseOptions {
304                    name,
305                    start_address: address,
306                    base_address,
307                    module_code,
308                    known_end_address: None,
309                    module_start_address,
310                    module_end_address,
311                    existing_functions: search_options.existing_functions,
312                    parse_options: Default::default(),
313                },
314            )?;
315            let function = match function_result {
316                ParseFunctionResult::Found(function) => function,
317                ParseFunctionResult::IllegalIns { address: illegal_address, ins, .. } => {
318                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
319                    let limit_reached = address >= search_limit;
320
321                    if !limit_reached {
322                        // It's possible that we've attempted to analyze pool constants as code, which can happen if the
323                        // function has a constant pool ahead of its code.
324                        let mut next_address = (address + 1).next_multiple_of(4);
325                        if let Some(function_addresses) = search_options.function_addresses.as_ref() {
326                            if let Some(&next_function) = function_addresses.range(address + 1..).next() {
327                                next_address = next_function;
328                            }
329                        }
330                        address = next_address;
331                        function_code = &module_code[(address - base_address) as usize..];
332                        continue;
333                    } else {
334                        if thumb {
335                            log::debug!(
336                                "Terminating function analysis due to illegal instruction at {:#010x}: {:04x}",
337                                illegal_address,
338                                ins.code()
339                            );
340                        } else {
341                            log::debug!(
342                                "Terminating function analysis due to illegal instruction at {:#010x}: {:08x}",
343                                illegal_address,
344                                ins.code()
345                            );
346                        }
347                        break;
348                    }
349                }
350                ParseFunctionResult::NoEpilogue => {
351                    log::debug!(
352                        "Terminating function analysis due to no epilogue in function starting from {:#010x}",
353                        address
354                    );
355                    break;
356                }
357                ParseFunctionResult::InvalidStart { address: start_address, ins, parsed_ins } => {
358                    let search_limit = prev_valid_address.saturating_add(search_options.max_function_start_search_distance);
359                    let limit_reached = address >= search_limit;
360
361                    if !limit_reached {
362                        let ins_size = parse_mode.instruction_size(0);
363                        address += ins_size as u32;
364                        function_code = &function_code[ins_size..];
365                        continue;
366                    } else {
367                        if thumb {
368                            log::debug!(
369                                "Terminating function analysis due to invalid function start at {:#010x}: {:04x} {}",
370                                start_address,
371                                ins.code(),
372                                parsed_ins.display(Default::default())
373                            );
374                        } else {
375                            log::debug!(
376                                "Terminating function analysis due to invalid function start at {:#010x}: {:08x} {}",
377                                start_address,
378                                ins.code(),
379                                parsed_ins.display(Default::default())
380                            );
381                        }
382                        break;
383                    }
384                }
385            };
386
387            if new {
388                symbol_map.add_function(&function);
389            }
390            function.add_local_symbols_to_map(symbol_map)?;
391
392            address = function.end_address;
393            prev_valid_address = function.end_address;
394            function_code = &module_code[(address - base_address) as usize..];
395
396            // Look for pointers to data in this module, to use as an upper bound for finding functions
397            if search_options.use_data_as_upper_bound {
398                for pool_constant in function.iter_pool_constants(module_code, base_address) {
399                    let pointer_value = pool_constant.value & !1;
400                    if pointer_value >= last_function_address {
401                        continue;
402                    }
403                    if pointer_value >= start_address && pointer_value >= address {
404                        let offset = (pointer_value - base_address) as usize;
405                        if offset < module_code.len() {
406                            let thumb = Function::is_thumb_function(pointer_value, &module_code[offset..]);
407                            let mut parser = Parser::new(
408                                if thumb { ParseMode::Thumb } else { ParseMode::Arm },
409                                pointer_value,
410                                Endian::Little,
411                                ParseFlags { ual: false, version: ArmVersion::V5Te },
412                                &module_code[offset..],
413                            );
414                            let (address, ins, parsed_ins) = parser.next().unwrap();
415                            if !is_valid_function_start(address, ins, &parsed_ins) {
416                                // The pool constant points to data, limit the upper bound
417                                last_function_address = pointer_value;
418                                log::debug!(
419                                    "Upper bound found: address to data at {:#010x} from pool constant at {:#010x} from function {}",
420                                    pool_constant.value,
421                                    pool_constant.address,
422                                    function.name
423                                );
424                            }
425                        }
426                    }
427                }
428            }
429
430            functions.insert(function.first_instruction_address, function);
431        }
432        Ok(functions)
433    }
434
435    pub fn add_local_symbols_to_map(&self, symbol_map: &mut SymbolMap) -> Result<(), SymbolMapError> {
436        for address in self.labels.iter() {
437            symbol_map.add_label(*address, self.thumb)?;
438        }
439        for address in self.pool_constants.iter() {
440            symbol_map.add_pool_constant(*address)?;
441        }
442        for jump_table in self.jump_tables() {
443            symbol_map.add_jump_table(jump_table)?;
444        }
445        for inline_table in self.inline_tables().values() {
446            symbol_map.add_data(None, inline_table.address, (*inline_table).into())?;
447        }
448        Ok(())
449    }
450
451    pub fn find_secure_area_functions(
452        module_code: &[u8],
453        base_addr: u32,
454        symbol_map: &mut SymbolMap,
455    ) -> BTreeMap<u32, Function> {
456        let mut functions = BTreeMap::new();
457
458        let parse_flags = ParseFlags { ual: false, version: ArmVersion::V5Te };
459
460        let mut address = base_addr;
461        let mut state = SecureAreaState::default();
462        for ins_code in module_code.chunks_exact(2) {
463            let ins_code = u16::from_le_slice(ins_code);
464            let ins = thumb::Ins::new(ins_code as u32, &parse_flags);
465            let parsed_ins = ins.parse(&parse_flags);
466
467            state = state.handle(address, &parsed_ins);
468            if let Some(function) = state.get_function() {
469                let function = Function {
470                    name: function.name().to_string(),
471                    start_address: function.start(),
472                    end_address: function.end(),
473                    first_instruction_address: function.start(),
474                    thumb: true,
475                    labels: Labels::new(),
476                    pool_constants: PoolConstants::new(),
477                    jump_tables: JumpTables::new(),
478                    inline_tables: InlineTables::new(),
479                    function_calls: FunctionCalls::new(),
480                };
481                symbol_map.add_function(&function);
482                functions.insert(function.first_instruction_address, function);
483            }
484
485            address += 2;
486        }
487
488        functions
489    }
490
491    pub fn parser<'a>(&'a self, module_code: &'a [u8], base_address: u32) -> Parser<'a> {
492        Parser::new(
493            if self.thumb { ParseMode::Thumb } else { ParseMode::Arm },
494            self.start_address,
495            Endian::Little,
496            ParseFlags { ual: false, version: ArmVersion::V5Te },
497            self.code(module_code, base_address),
498        )
499    }
500
501    pub fn code<'a>(&self, module_code: &'a [u8], base_address: u32) -> &'a [u8] {
502        let start = (self.start_address - base_address) as usize;
503        let end = (self.end_address - base_address) as usize;
504        &module_code[start..end]
505    }
506
507    pub fn name(&self) -> &str {
508        &self.name
509    }
510
511    pub fn start_address(&self) -> u32 {
512        self.start_address
513    }
514
515    pub fn end_address(&self) -> u32 {
516        self.end_address
517    }
518
519    pub fn first_instruction_address(&self) -> u32 {
520        self.first_instruction_address
521    }
522
523    pub fn is_thumb(&self) -> bool {
524        self.thumb
525    }
526
527    pub fn labels(&self) -> impl Iterator<Item = &u32> {
528        self.labels.iter()
529    }
530
531    pub fn jump_tables(&self) -> impl Iterator<Item = &JumpTable> {
532        self.jump_tables.values()
533    }
534
535    pub fn inline_tables(&self) -> &InlineTables {
536        &self.inline_tables
537    }
538
539    pub fn get_inline_table_at(&self, address: u32) -> Option<&InlineTable> {
540        Self::inline_table_at(&self.inline_tables, address)
541    }
542
543    fn inline_table_at(inline_tables: &InlineTables, address: u32) -> Option<&InlineTable> {
544        inline_tables.values().find(|table| address >= table.address && address < table.address + table.size)
545    }
546
547    pub fn pool_constants(&self) -> &PoolConstants {
548        &self.pool_constants
549    }
550
551    pub fn iter_pool_constants<'a>(
552        &'a self,
553        module_code: &'a [u8],
554        base_address: u32,
555    ) -> impl Iterator<Item = PoolConstant> + 'a {
556        self.pool_constants.iter().map(move |&address| {
557            let start = (address - base_address) as usize;
558            let bytes = &module_code[start..];
559            PoolConstant { address, value: u32::from_le_slice(bytes) }
560        })
561    }
562
563    pub fn function_calls(&self) -> &FunctionCalls {
564        &self.function_calls
565    }
566}
567
568#[derive(Default)]
569pub struct FunctionParseOptions<'a> {
570    pub name: String,
571    pub start_address: u32,
572    pub base_address: u32,
573    pub module_code: &'a [u8],
574    pub known_end_address: Option<u32>,
575    pub module_start_address: u32,
576    pub module_end_address: u32,
577    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
578
579    pub parse_options: ParseFunctionOptions,
580}
581
582pub struct FindFunctionsOptions<'a> {
583    pub default_name_prefix: &'a str,
584    pub base_address: u32,
585    pub module_code: &'a [u8],
586    pub symbol_map: &'a mut SymbolMap,
587    pub module_start_address: u32,
588    pub module_end_address: u32,
589
590    pub search_options: FunctionSearchOptions<'a>,
591}
592
593struct ParseFunctionContext<'a> {
594    name: String,
595    start_address: u32,
596    thumb: bool,
597    end_address: Option<u32>,
598    known_end_address: Option<u32>,
599    labels: Labels,
600    pool_constants: PoolConstants,
601    jump_tables: JumpTables,
602    inline_tables: InlineTables,
603    function_calls: FunctionCalls,
604
605    module_start_address: u32,
606    module_end_address: u32,
607    existing_functions: Option<&'a BTreeMap<u32, Function>>,
608
609    /// Address of last conditional instruction, so we can detect the final return instruction
610    last_conditional_destination: Option<u32>,
611    /// Address of last pool constant, to get the function's true end address
612    last_pool_address: Option<u32>,
613    /// State machine for detecting jump tables and adding them as symbols
614    jump_table_state: JumpTableState,
615    /// State machine for detecting branches (B, not BL) to other functions
616    function_branch_state: FunctionBranchState,
617    /// State machine for detecting inline data tables within the function
618    inline_table_state: InlineTableState,
619    /// State machine for detecting illegal code sequences
620    illegal_code_state: IllegalCodeState,
621
622    prev_ins: Option<Ins>,
623    prev_parsed_ins: Option<ParsedIns>,
624    prev_address: Option<u32>,
625}
626
627#[derive(Debug, Snafu)]
628pub enum IntoFunctionError {
629    #[snafu(display("Cannot turn parse context into function before parsing is done"))]
630    NotDone { backtrace: Backtrace },
631}
632
633impl<'a> ParseFunctionContext<'a> {
634    pub fn new(thumb: bool, options: FunctionParseOptions<'a>) -> Self {
635        let FunctionParseOptions {
636            name,
637            start_address,
638            known_end_address,
639            module_start_address,
640            module_end_address,
641            existing_functions,
642            ..
643        } = options;
644
645        Self {
646            name,
647            start_address,
648            thumb,
649            end_address: None,
650            known_end_address,
651            labels: Labels::new(),
652            pool_constants: PoolConstants::new(),
653            jump_tables: JumpTables::new(),
654            inline_tables: InlineTables::new(),
655            function_calls: FunctionCalls::new(),
656
657            module_start_address,
658            module_end_address,
659            existing_functions,
660
661            last_conditional_destination: None,
662            last_pool_address: None,
663            jump_table_state: if thumb {
664                JumpTableState::Thumb(Default::default())
665            } else {
666                JumpTableState::Arm(Default::default())
667            },
668            function_branch_state: Default::default(),
669            inline_table_state: Default::default(),
670            illegal_code_state: Default::default(),
671
672            prev_ins: None,
673            prev_parsed_ins: None,
674            prev_address: None,
675        }
676    }
677
678    fn handle_ins_inner(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: &ParsedIns) -> ParseFunctionState {
679        if self.pool_constants.contains(&address) {
680            parser.seek_forward(address + 4);
681            return ParseFunctionState::Continue;
682        }
683        if let Some(inline_table) = Function::inline_table_at(&self.inline_tables, address) {
684            parser.seek_forward(inline_table.address + inline_table.size);
685            return ParseFunctionState::Continue;
686        }
687
688        self.jump_table_state = self.jump_table_state.handle(address, ins, parsed_ins, &mut self.jump_tables);
689        self.last_conditional_destination = self.last_conditional_destination.max(self.jump_table_state.table_end_address());
690        if let Some(label) = self.jump_table_state.get_label(address, ins) {
691            self.labels.insert(label);
692            self.last_conditional_destination = self.last_conditional_destination.max(Some(label));
693        }
694
695        if self.jump_table_state.is_numerical_jump_offset() {
696            // Not an instruction, continue
697            return ParseFunctionState::Continue;
698        }
699
700        let ins_size = if let Ins::Thumb(thumb_ins) = ins {
701            if thumb_ins.op != thumb::Opcode::Bl && thumb_ins.op != thumb::Opcode::BlxI {
702                // Typical Thumb instruction
703                2
704            } else if matches!(parsed_ins.args[0], Argument::BranchDest(_)) {
705                // Combined BL/BLX instruction
706                4
707            } else {
708                // Not combined
709                return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
710            }
711        } else {
712            // ARM instruction
713            4
714        };
715
716        self.illegal_code_state = self.illegal_code_state.handle(ins, parsed_ins);
717        if self.illegal_code_state.is_illegal() {
718            return ParseFunctionState::IllegalIns { address, ins, parsed_ins: parsed_ins.clone() };
719        }
720
721        let in_conditional_block = Some(address) < self.last_conditional_destination;
722        let is_return = Function::is_return(
723            ins,
724            parsed_ins,
725            address,
726            self.start_address,
727            self.module_start_address,
728            self.module_end_address,
729        );
730        if !in_conditional_block && is_return {
731            let end_address = address + ins_size;
732            if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
733                let outside_function = destination < self.start_address || destination >= end_address;
734                if outside_function {
735                    // Tail call
736                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
737                }
738            }
739
740            // We're not inside a conditional code block, so this is the final return instruction
741            self.end_address = Some(address + ins_size);
742            return ParseFunctionState::Done;
743        }
744
745        if address > self.start_address && Function::is_entry_instruction(ins, parsed_ins) {
746            'check_tail_call: {
747                let Some(prev_ins) = self.prev_ins else {
748                    break 'check_tail_call;
749                };
750                let Some(prev_parsed_ins) = self.prev_parsed_ins.as_ref() else {
751                    break 'check_tail_call;
752                };
753                let Some(prev_address) = self.prev_address else {
754                    break 'check_tail_call;
755                };
756                if Function::is_branch(prev_ins, prev_parsed_ins, prev_address).is_some() {
757                    let is_conditional = in_conditional_block || prev_ins.is_conditional();
758                    if is_conditional {
759                        // Tail call
760                        self.end_address = Some(address);
761                        return ParseFunctionState::Done;
762                    }
763                }
764            };
765        }
766
767        self.function_branch_state = self.function_branch_state.handle(ins, parsed_ins);
768        if let Some(destination) = Function::is_branch(ins, parsed_ins, address) {
769            let in_current_module = destination >= self.module_start_address && destination < self.module_end_address;
770            if !in_current_module {
771                // Tail call
772                self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
773            } else if self.function_branch_state.is_function_branch()
774                || self.existing_functions.map(|functions| functions.contains_key(&destination)).unwrap_or(false)
775            {
776                if !ins.is_conditional() && !in_conditional_block {
777                    // This is an unconditional backwards function branch, which means this function has ended
778                    self.end_address = Some(address + ins_size);
779                    return ParseFunctionState::Done;
780                } else {
781                    // TODO: Always run this (move it outside of else block). SectionExt::relocatable_code must take condition
782                    // code into account so the game matches after linking
783                    self.function_calls.insert(address, CalledFunction { ins, address: destination, thumb: self.thumb });
784                }
785            } else {
786                // Normal branch instruction, insert a label
787                if let Some(state) = self.handle_label(destination, address, parser, ins_size) {
788                    return state;
789                }
790            }
791        }
792
793        if let Some(pool_address) = Function::is_pool_load(ins, parsed_ins, address, self.thumb) {
794            self.pool_constants.insert(pool_address);
795            self.last_pool_address = self.last_pool_address.max(Some(pool_address));
796        }
797
798        self.inline_table_state = self.inline_table_state.handle(self.thumb, address, parsed_ins);
799        if let Some(table) = self.inline_table_state.get_table() {
800            log::debug!("Inline table found at {:#x}, size {:#x}", table.address, table.size);
801            self.inline_tables.insert(table.address, table);
802        }
803
804        if let Some(called_function) = Function::is_function_call(ins, parsed_ins, address, self.thumb) {
805            self.function_calls.insert(address, called_function);
806        }
807
808        ParseFunctionState::Continue
809    }
810
811    pub fn handle_ins(&mut self, parser: &mut Parser, address: u32, ins: Ins, parsed_ins: ParsedIns) -> ParseFunctionState {
812        let state = self.handle_ins_inner(parser, address, ins, &parsed_ins);
813        self.prev_ins = Some(ins);
814        self.prev_parsed_ins = Some(parsed_ins);
815        self.prev_address = Some(address);
816        state
817    }
818
819    fn handle_label(
820        &mut self,
821        destination: u32,
822        address: u32,
823        parser: &mut Parser,
824        ins_size: u32,
825    ) -> Option<ParseFunctionState> {
826        self.labels.insert(destination);
827        self.last_conditional_destination = self.last_conditional_destination.max(Some(destination));
828
829        let next_address = address + ins_size;
830        if self.pool_constants.contains(&next_address) {
831            let branch_backwards = destination <= address;
832
833            // Load instructions in ARM mode can have an offset of up to ±4kB. Therefore, some functions must
834            // emit pool constants in the middle so they can all be accessed by PC-relative loads. There will
835            // also be branch instruction right before, so that the pool constants don't get executed.
836
837            // Sometimes, the pre-pool branch is conveniently placed at an actual branch in the code, and
838            // leads even further than the end of the pool constants. In that case we should already have found
839            // a label at a lower address.
840            if let Some(after_pools) = self.labels.range(address + 1..).next().copied() {
841                if after_pools > address + 0x1000 {
842                    log::warn!("Massive gap from constant pool at {:#x} to next label at {:#x}", next_address, after_pools);
843                }
844                parser.seek_forward(after_pools);
845            } else if !branch_backwards {
846                // Backwards branch with no further branch labels. This type of function contains some kind of infinite loop,
847                // hence the lack of return instruction as the final instruction.
848                self.end_address = Some(next_address);
849                return Some(ParseFunctionState::Done);
850            } else {
851                let after_pools = (next_address..).step_by(4).find(|addr| !self.pool_constants.contains(addr)).unwrap();
852                log::warn!(
853                    "No label past constant pool at {:#x}, jumping to first address not occupied by a pool constant ({:#x})",
854                    next_address,
855                    after_pools
856                );
857                parser.seek_forward(after_pools);
858            }
859        }
860
861        None
862    }
863
864    fn into_function(self, state: ParseFunctionState) -> Result<ParseFunctionResult, IntoFunctionError> {
865        match state {
866            ParseFunctionState::Continue => {
867                return NotDoneSnafu.fail();
868            }
869            ParseFunctionState::IllegalIns { address, ins, parsed_ins } => {
870                return Ok(ParseFunctionResult::IllegalIns { address, ins, parsed_ins })
871            }
872            ParseFunctionState::Done => {}
873        };
874        let Some(end_address) = self.end_address else {
875            return Ok(ParseFunctionResult::NoEpilogue);
876        };
877
878        let end_address = self
879            .known_end_address
880            .unwrap_or(end_address.max(self.last_pool_address.map(|a| a + 4).unwrap_or(0)).next_multiple_of(4));
881        if end_address > self.module_end_address {
882            return Ok(ParseFunctionResult::NoEpilogue);
883        }
884
885        Ok(ParseFunctionResult::Found(Function {
886            name: self.name,
887            start_address: self.start_address,
888            end_address,
889            first_instruction_address: self.start_address,
890            thumb: self.thumb,
891            labels: self.labels,
892            pool_constants: self.pool_constants,
893            jump_tables: self.jump_tables,
894            inline_tables: self.inline_tables,
895            function_calls: self.function_calls,
896        }))
897    }
898}
899
900#[derive(Default)]
901pub struct ParseFunctionOptions {
902    /// Whether the function is in Thumb or ARM mode, or None if it should be detected automatically.
903    pub thumb: Option<bool>,
904}
905
906enum ParseFunctionState {
907    Continue,
908    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
909    Done,
910}
911
912impl ParseFunctionState {
913    pub fn ended(&self) -> bool {
914        match self {
915            Self::Continue => false,
916            Self::IllegalIns { .. } | Self::Done => true,
917        }
918    }
919}
920
921#[derive(Debug)]
922pub enum ParseFunctionResult {
923    Found(Function),
924    IllegalIns { address: u32, ins: Ins, parsed_ins: ParsedIns },
925    NoEpilogue,
926    InvalidStart { address: u32, ins: Ins, parsed_ins: ParsedIns },
927}
928
929#[derive(Default)]
930pub struct FunctionSearchOptions<'a> {
931    /// Address to start searching from. Defaults to the base address.
932    pub start_address: Option<u32>,
933    /// Last address that a function can start from. Defaults to [`Self::end_address`].
934    pub last_function_address: Option<u32>,
935    /// Address to end the search. Defaults to the base address plus code size.
936    pub end_address: Option<u32>,
937    /// If zero, end the search when an illegal starting instruction is found. Otherwise, continue searching for a valid
938    /// function start for up to this many bytes. Set to [`u32::MAX`] to search until the end of the module.
939    pub max_function_start_search_distance: u32,
940    /// If true, pointers to data will be used to limit the upper bound address.
941    pub use_data_as_upper_bound: bool,
942    /// Guarantees that all these addresses will be analyzed, even if the function analysis would terminate before they are
943    /// reached. Used for .init functions.
944    /// Note: This will override `keep_searching_for_valid_function_start`, they are not intended to be used together.
945    pub function_addresses: Option<BTreeSet<u32>>,
946    /// If a branch instruction branches into one of these functions, it will be treated as a function branch instead of
947    /// inserting a label at the branch destination.
948    /// If the function branch is unconditional, it will also be treated as a tail call and terminate the analysis of the
949    /// current function.
950    pub existing_functions: Option<&'a BTreeMap<u32, Function>>,
951}
952
953#[derive(Clone, Copy, Debug)]
954pub struct CalledFunction {
955    pub ins: Ins,
956    pub address: u32,
957    pub thumb: bool,
958}
959
960pub struct PoolConstant {
961    pub address: u32,
962    pub value: u32,
963}