minidump_processor/
arg_recovery.rs

1use minidump::{CpuContext, MinidumpRawContext, UnifiedMemory};
2use minidump_unwind::{CallStack, CallingConvention, FunctionArg, FunctionArgs};
3
4// # Recovering x86 function arguments
5//
6// This approach is very hacky and very likely to produce incorrect values
7// in many situations. But sometimes it will be right, and that's useful, right?
8//
9// x86 has two common calling conventions which are very friendly to recovering
10// function arguments: cdecl and thiscall.
11//
12// In both conventions, arguments are unconditionally pushed to the
13// stack in reverse order, so in theory all we need to do is jump to the
14// top of the caller's stack frame and just start reading the values off!
15//
16// (thiscall requires some special handling for the implicit "this" arg, see below.)
17//
18// This leaves us with several problems to solve:
19//
20// 1. determining the calling convention
21// 2. determining the number (and name) of arguments
22// 3. determining the size of arguments
23//
24//
25//
26// ## Determining the calling convention
27//
28// We don't have the kind of debuginfo that would tell use what the calling
29// convention was, but we can make an educated guess based on the name of
30// the function:
31//
32// * If the function name contains a ::, it's probably a C++ member function,
33//   in which case it's thiscall
34// * Otherwise, assume it's cdecl
35//
36// It's a blunt heuristic that will misclassify static functions and always
37// mishandle anything that's manually defined to be fastcall or whatever else,
38// but it should do the right thing for *most* functions!
39//
40//
41//
42// ## Determining the number of arguments
43//
44// We assume a function name includes its argument list if it contains
45// both an open-paren "(" and close-paren ")" in the right order. Everything
46// between the first open-paren and last close-paren is assumed to be the
47// argument list.
48//
49// The number of arguments is then just "the number of commas in the argument list".
50// However C++ templates introduce "fake" commas. This can be easily handled by
51// tracking the nesting level of `<` and `>` and only considering a comma "real"
52// if the nesting level is 0. We similarly handle `(` and `)` to try to handle
53// function pointer types.
54//
55// thiscall functions have an implicit first argument "this". Windows (Visual C++)
56// toolchains will pass "this" via $eax instead of on the stack. Other (gcc)
57// toolchains will pass "this" just like any other argument (so it will be at
58// the top of the stack frame).
59//
60//
61//
62// # Determining the size of arguments
63//
64// Rather than attempting to parse and resolve C++ types (*laughs and cries at the same time*),
65// we just unconditionally assume all arguments are pointer-sized. This is intuitively true
66// most of the time. The major exceptions are `bool` and `uint64_t`. Maybe those
67// are worth carving out special cases for, but until then: it's all pointers!
68
69/// Try to recover function arguments
70pub fn fill_arguments(call_stack: &mut CallStack, stack_memory: Option<UnifiedMemory>) {
71    // Collect up all the results at once to avoid borrowing issues.
72    let args = call_stack
73        .frames
74        .iter()
75        .enumerate()
76        .map(|(frame_idx, frame)| {
77            // Only x86 is implemented because it has friendly calling conventions.
78            // and we need the function name to make any guesses at what the arguments are.
79            if let (Some(mem), Some(func_name), MinidumpRawContext::X86(ctx)) =
80                (stack_memory, &frame.function_name, &frame.context.raw)
81            {
82                const POINTER_WIDTH: u64 = 4;
83
84                if let Some((calling_convention, argument_list)) = parse_x86_arg_list(func_name) {
85                    // We're assuming this is either cdecl or thiscall. In either case,
86                    // all the arguments are saved at the top of the caller's stackframe
87                    // in reverse order (which in fact means we can start at the top
88                    // of the frame and read them off *in order*).
89
90                    // The stack grows down, so the maximum address in the stack
91                    // is actually the base of the stack. Since we're walking down
92                    // the stack, the base of the stack is a good upper-bound
93                    // (and default value) for any stack/frame pointer.
94                    let stack_base = mem.base_address().saturating_add(mem.size());
95
96                    let caller_stack_pointer = call_stack
97                        .frames
98                        .get(frame_idx + 1)
99                        .map(|f| f.context.get_stack_pointer())
100                        .unwrap_or(stack_base);
101                    let caller_frame_pointer = call_stack
102                        .frames
103                        .get(frame_idx + 2)
104                        .map(|f| f.context.get_stack_pointer())
105                        .unwrap_or(stack_base);
106
107                    let mut read_head = caller_stack_pointer;
108                    let mut pop_value = || {
109                        if read_head < caller_frame_pointer {
110                            let val = mem.get_memory_at_address::<u32>(read_head);
111                            read_head += POINTER_WIDTH;
112                            val.map(|val| val as u64)
113                        } else {
114                            None
115                        }
116                    };
117
118                    let mut args = Vec::new();
119
120                    // Handle the first argument of thiscall
121                    match calling_convention {
122                        CallingConvention::WindowsThisCall => {
123                            // On windows, "this" is passed in eax
124                            let value = ctx
125                                .get_register("eax", &frame.context.valid)
126                                .map(|x| x as u64);
127                            args.push(FunctionArg {
128                                name: String::from("this"),
129                                value,
130                            });
131                        }
132                        CallingConvention::OtherThisCall => {
133                            // Everywhere else, "this" is passed like a normal value
134                            let value = pop_value();
135                            args.push(FunctionArg {
136                                name: String::from("this"),
137                                value,
138                            });
139                        }
140                        CallingConvention::Cdecl => {
141                            // Nothing to do
142                        }
143                    }
144
145                    // Now handle the rest
146                    args.extend(argument_list.iter().map(|&arg_name| {
147                        let value = pop_value();
148                        FunctionArg {
149                            name: String::from(arg_name),
150                            value,
151                        }
152                    }));
153
154                    return Some(FunctionArgs {
155                        calling_convention,
156                        args,
157                    });
158                }
159            }
160            None
161        })
162        .collect::<Vec<_>>();
163
164    // Now write the values back to the call stack
165    for (frame, args) in call_stack.frames.iter_mut().zip(args) {
166        frame.arguments = args;
167    }
168}
169
170fn parse_x86_arg_list(func_name: &str) -> Option<(CallingConvention, Vec<&str>)> {
171    if let Some((func_name, arg_list)) = func_name.split_once('(') {
172        if let Some((arg_list, _junk)) = arg_list.rsplit_once(')') {
173            let calling_convention = if func_name.contains("::") {
174                // Assume this is a C++ method (thiscall)
175                let windows = true; // TODO
176                if windows {
177                    CallingConvention::WindowsThisCall
178                } else {
179                    CallingConvention::OtherThisCall
180                }
181            } else {
182                CallingConvention::Cdecl
183                // Assume this is a static function (cdecl)
184            };
185
186            let mut args = Vec::new();
187
188            // Now parse the arguments out
189            let mut arg_start = 0;
190            let mut template_depth = 0;
191            let mut paren_depth = 0;
192
193            // Only consider a comma a "real" argument separator if we aren't
194            // currently nested inside of templates (`<>`) or parens (`()`).
195            for (idx, c) in arg_list.bytes().enumerate() {
196                match c as char {
197                    '<' => template_depth += 1,
198                    '>' => {
199                        if template_depth > 0 {
200                            template_depth -= 1;
201                        } else {
202                            // Parser is lost
203                            return None;
204                        }
205                    }
206                    '(' => paren_depth += 1,
207                    ')' => {
208                        if paren_depth > 0 {
209                            paren_depth -= 1;
210                        } else {
211                            // Parser is lost
212                            return None;
213                        }
214                    }
215                    ',' => {
216                        if template_depth == 0 && paren_depth == 0 {
217                            args.push(arg_list[arg_start..idx].trim());
218                            arg_start = idx + 1;
219                        }
220                    }
221                    _ => {}
222                }
223            }
224
225            // Whole function name parsed, the remainder is the last argument.
226            args.push(arg_list[arg_start..].trim());
227
228            // Only accept the result if all nesting was balanced
229            if template_depth == 0 && paren_depth == 0 {
230                return Some((calling_convention, args));
231            }
232        }
233    }
234    None
235}