minidump_processor/arg_recovery.rs
1use minidump::{CpuContext, MinidumpRawContext, UnifiedMemory};
2use minidump_unwind::{CallStack, CallingConvention, FunctionArg, FunctionArgs};
3
4// # Recovering x86 function arguments
5//
6// This approach is very hacky and very likely to produce incorrect values
7// in many situations. But sometimes it will be right, and that's useful, right?
8//
9// x86 has two common calling conventions which are very friendly to recovering
10// function arguments: cdecl and thiscall.
11//
12// In both conventions, arguments are unconditionally pushed to the
13// stack in reverse order, so in theory all we need to do is jump to the
14// top of the caller's stack frame and just start reading the values off!
15//
16// (thiscall requires some special handling for the implicit "this" arg, see below.)
17//
18// This leaves us with several problems to solve:
19//
20// 1. determining the calling convention
21// 2. determining the number (and name) of arguments
22// 3. determining the size of arguments
23//
24//
25//
26// ## Determining the calling convention
27//
28// We don't have the kind of debuginfo that would tell use what the calling
29// convention was, but we can make an educated guess based on the name of
30// the function:
31//
32// * If the function name contains a ::, it's probably a C++ member function,
33// in which case it's thiscall
34// * Otherwise, assume it's cdecl
35//
36// It's a blunt heuristic that will misclassify static functions and always
37// mishandle anything that's manually defined to be fastcall or whatever else,
38// but it should do the right thing for *most* functions!
39//
40//
41//
42// ## Determining the number of arguments
43//
44// We assume a function name includes its argument list if it contains
45// both an open-paren "(" and close-paren ")" in the right order. Everything
46// between the first open-paren and last close-paren is assumed to be the
47// argument list.
48//
49// The number of arguments is then just "the number of commas in the argument list".
50// However C++ templates introduce "fake" commas. This can be easily handled by
51// tracking the nesting level of `<` and `>` and only considering a comma "real"
52// if the nesting level is 0. We similarly handle `(` and `)` to try to handle
53// function pointer types.
54//
55// thiscall functions have an implicit first argument "this". Windows (Visual C++)
56// toolchains will pass "this" via $eax instead of on the stack. Other (gcc)
57// toolchains will pass "this" just like any other argument (so it will be at
58// the top of the stack frame).
59//
60//
61//
62// # Determining the size of arguments
63//
64// Rather than attempting to parse and resolve C++ types (*laughs and cries at the same time*),
65// we just unconditionally assume all arguments are pointer-sized. This is intuitively true
66// most of the time. The major exceptions are `bool` and `uint64_t`. Maybe those
67// are worth carving out special cases for, but until then: it's all pointers!
68
69/// Try to recover function arguments
70pub fn fill_arguments(call_stack: &mut CallStack, stack_memory: Option<UnifiedMemory>) {
71 // Collect up all the results at once to avoid borrowing issues.
72 let args = call_stack
73 .frames
74 .iter()
75 .enumerate()
76 .map(|(frame_idx, frame)| {
77 // Only x86 is implemented because it has friendly calling conventions.
78 // and we need the function name to make any guesses at what the arguments are.
79 if let (Some(mem), Some(func_name), MinidumpRawContext::X86(ctx)) =
80 (stack_memory, &frame.function_name, &frame.context.raw)
81 {
82 const POINTER_WIDTH: u64 = 4;
83
84 if let Some((calling_convention, argument_list)) = parse_x86_arg_list(func_name) {
85 // We're assuming this is either cdecl or thiscall. In either case,
86 // all the arguments are saved at the top of the caller's stackframe
87 // in reverse order (which in fact means we can start at the top
88 // of the frame and read them off *in order*).
89
90 // The stack grows down, so the maximum address in the stack
91 // is actually the base of the stack. Since we're walking down
92 // the stack, the base of the stack is a good upper-bound
93 // (and default value) for any stack/frame pointer.
94 let stack_base = mem.base_address().saturating_add(mem.size());
95
96 let caller_stack_pointer = call_stack
97 .frames
98 .get(frame_idx + 1)
99 .map(|f| f.context.get_stack_pointer())
100 .unwrap_or(stack_base);
101 let caller_frame_pointer = call_stack
102 .frames
103 .get(frame_idx + 2)
104 .map(|f| f.context.get_stack_pointer())
105 .unwrap_or(stack_base);
106
107 let mut read_head = caller_stack_pointer;
108 let mut pop_value = || {
109 if read_head < caller_frame_pointer {
110 let val = mem.get_memory_at_address::<u32>(read_head);
111 read_head += POINTER_WIDTH;
112 val.map(|val| val as u64)
113 } else {
114 None
115 }
116 };
117
118 let mut args = Vec::new();
119
120 // Handle the first argument of thiscall
121 match calling_convention {
122 CallingConvention::WindowsThisCall => {
123 // On windows, "this" is passed in eax
124 let value = ctx
125 .get_register("eax", &frame.context.valid)
126 .map(|x| x as u64);
127 args.push(FunctionArg {
128 name: String::from("this"),
129 value,
130 });
131 }
132 CallingConvention::OtherThisCall => {
133 // Everywhere else, "this" is passed like a normal value
134 let value = pop_value();
135 args.push(FunctionArg {
136 name: String::from("this"),
137 value,
138 });
139 }
140 CallingConvention::Cdecl => {
141 // Nothing to do
142 }
143 }
144
145 // Now handle the rest
146 args.extend(argument_list.iter().map(|&arg_name| {
147 let value = pop_value();
148 FunctionArg {
149 name: String::from(arg_name),
150 value,
151 }
152 }));
153
154 return Some(FunctionArgs {
155 calling_convention,
156 args,
157 });
158 }
159 }
160 None
161 })
162 .collect::<Vec<_>>();
163
164 // Now write the values back to the call stack
165 for (frame, args) in call_stack.frames.iter_mut().zip(args) {
166 frame.arguments = args;
167 }
168}
169
170fn parse_x86_arg_list(func_name: &str) -> Option<(CallingConvention, Vec<&str>)> {
171 if let Some((func_name, arg_list)) = func_name.split_once('(') {
172 if let Some((arg_list, _junk)) = arg_list.rsplit_once(')') {
173 let calling_convention = if func_name.contains("::") {
174 // Assume this is a C++ method (thiscall)
175 let windows = true; // TODO
176 if windows {
177 CallingConvention::WindowsThisCall
178 } else {
179 CallingConvention::OtherThisCall
180 }
181 } else {
182 CallingConvention::Cdecl
183 // Assume this is a static function (cdecl)
184 };
185
186 let mut args = Vec::new();
187
188 // Now parse the arguments out
189 let mut arg_start = 0;
190 let mut template_depth = 0;
191 let mut paren_depth = 0;
192
193 // Only consider a comma a "real" argument separator if we aren't
194 // currently nested inside of templates (`<>`) or parens (`()`).
195 for (idx, c) in arg_list.bytes().enumerate() {
196 match c as char {
197 '<' => template_depth += 1,
198 '>' => {
199 if template_depth > 0 {
200 template_depth -= 1;
201 } else {
202 // Parser is lost
203 return None;
204 }
205 }
206 '(' => paren_depth += 1,
207 ')' => {
208 if paren_depth > 0 {
209 paren_depth -= 1;
210 } else {
211 // Parser is lost
212 return None;
213 }
214 }
215 ',' => {
216 if template_depth == 0 && paren_depth == 0 {
217 args.push(arg_list[arg_start..idx].trim());
218 arg_start = idx + 1;
219 }
220 }
221 _ => {}
222 }
223 }
224
225 // Whole function name parsed, the remainder is the last argument.
226 args.push(arg_list[arg_start..].trim());
227
228 // Only accept the result if all nesting was balanced
229 if template_depth == 0 && paren_depth == 0 {
230 return Some((calling_convention, args));
231 }
232 }
233 }
234 None
235}