1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
use crate::{CallStack, CallingConvention, FunctionArg, FunctionArgs};
use minidump::{CpuContext, MinidumpMemory, MinidumpRawContext};
// # Recovering x86 function arguments
//
// This approach is very hacky and very likely to produce incorrect values
// in many situations. But sometimes it will be right, and that's useful, right?
//
// x86 has two common calling conventions which are very friendly to recovering
// function arguments: cdecl and thiscall.
//
// In both conventions, arguments are unconditionally pushed to the
// stack in reverse order, so in theory all we need to do is jump to the
// top of the caller's stack frame and just start reading the values off!
//
// (thiscall requires some special handling for the implicit "this" arg, see below.)
//
// This leaves us with several problems to solve:
//
// 1. determining the calling convention
// 2. determining the number (and name) of arguments
// 3. determining the size of arguments
//
//
//
// ## Determining the calling convention
//
// We don't have the kind of debuginfo that would tell use what the calling
// convention was, but we can make an educated guess based on the name of
// the function:
//
// * If the function name contains a ::, it's probably a C++ member function,
// in which case it's thiscall
// * Otherwise, assume it's cdecl
//
// It's a blunt heuristic that will misclassify static functions and always
// mishandle anything that's manually defined to be fastcall or whatever else,
// but it should do the right thing for *most* functions!
//
//
//
// ## Determining the number of arguments
//
// We assume a function name includes its argument list if it contains
// both an open-paren "(" and close-paren ")" in the right order. Everything
// between the first open-paren and last close-paren is assumed to be the
// argument list.
//
// The number of arguments is then just "the number of commas in the argument list".
// However C++ templates introduce "fake" commas. This can be easily handled by
// tracking the nesting level of `<` and `>` and only considering a comma "real"
// if the nesting level is 0. We similarly handle `(` and `)` to try to handle
// function pointer types.
//
// thiscall functions have an implicit first argument "this". Windows (Visual C++)
// toolchains will pass "this" via $eax instead of on the stack. Other (gcc)
// toolchains will pass "this" just like any other argument (so it will be at
// the top of the stack frame).
//
//
//
// # Determining the size of arguments
//
// Rather than attempting to parse and resolve C++ types (*laughs and cries at the same time*),
// we just unconditionally assume all arguments are pointer-sized. This is intuitively true
// most of the time. The major exceptions are `bool` and `uint64_t`. Maybe those
// are worth carving out special cases for, but until then: it's all pointers!
/// Try to recover function arguments
pub fn fill_arguments(call_stack: &mut CallStack, stack_memory: Option<&MinidumpMemory>) {
// Collect up all the results at once to avoid borrowing issues.
let args = call_stack
.frames
.iter()
.enumerate()
.map(|(frame_idx, frame)| {
// Only x86 is implemented because it has friendly calling conventions.
// and we need the function name to make any guesses at what the arguments are.
if let (Some(mem), Some(func_name), MinidumpRawContext::X86(ctx)) =
(stack_memory, &frame.function_name, &frame.context.raw)
{
const POINTER_WIDTH: u64 = 4;
if let Some((calling_convention, argument_list)) = parse_x86_arg_list(func_name) {
// We're assuming this is either cdecl or thiscall. In either case,
// all the arguments are saved at the top of the caller's stackframe
// in reverse order (which in fact means we can start at the top
// of the frame and read them off *in order*).
// The stack grows down, so the maximum address in the stack
// is actually the base of the stack. Since we're walking down
// the stack, the base of the stack is a good upper-bound
// (and default value) for any stack/frame pointer.
let stack_base = mem.base_address.saturating_add(mem.size);
let caller_stack_pointer = call_stack
.frames
.get(frame_idx + 1)
.map(|f| f.context.get_stack_pointer())
.unwrap_or(stack_base);
let caller_frame_pointer = call_stack
.frames
.get(frame_idx + 2)
.map(|f| f.context.get_stack_pointer())
.unwrap_or(stack_base);
let mut read_head = caller_stack_pointer;
let mut pop_value = || {
if read_head < caller_frame_pointer {
let val = mem.get_memory_at_address::<u32>(read_head);
read_head += POINTER_WIDTH;
val.map(|val| val as u64)
} else {
None
}
};
let mut args = Vec::new();
// Handle the first argument of thiscall
match calling_convention {
CallingConvention::WindowsThisCall => {
// On windows, "this" is passed in eax
let value = ctx
.get_register("eax", &frame.context.valid)
.map(|x| x as u64);
args.push(FunctionArg {
name: String::from("this"),
value,
});
}
CallingConvention::OtherThisCall => {
// Everywhere else, "this" is passed like a normal value
let value = pop_value();
args.push(FunctionArg {
name: String::from("this"),
value,
});
}
CallingConvention::Cdecl => {
// Nothing to do
}
}
// Now handle the rest
args.extend(argument_list.iter().map(|&arg_name| {
let value = pop_value();
FunctionArg {
name: String::from(arg_name),
value,
}
}));
return Some(FunctionArgs {
calling_convention,
args,
});
}
}
None
})
.collect::<Vec<_>>();
// Now write the values back to the call stack
for (frame, args) in call_stack.frames.iter_mut().zip(args) {
frame.arguments = args;
}
}
fn parse_x86_arg_list(func_name: &str) -> Option<(CallingConvention, Vec<&str>)> {
if let Some((func_name, arg_list)) = func_name.split_once('(') {
if let Some((arg_list, _junk)) = arg_list.rsplit_once(')') {
let calling_convention = if func_name.contains("::") {
// Assume this is a C++ method (thiscall)
let windows = true; // TODO
if windows {
CallingConvention::WindowsThisCall
} else {
CallingConvention::OtherThisCall
}
} else {
CallingConvention::Cdecl
// Assume this is a static function (cdecl)
};
let mut args = Vec::new();
// Now parse the arguments out
let mut arg_start = 0;
let mut template_depth = 0;
let mut paren_depth = 0;
// Only consider a comma a "real" argument separator if we aren't
// currently nested inside of templates (`<>`) or parens (`()`).
for (idx, c) in arg_list.bytes().enumerate() {
match c as char {
'<' => template_depth += 1,
'>' => {
if template_depth > 0 {
template_depth -= 1;
} else {
// Parser is lost
return None;
}
}
'(' => paren_depth += 1,
')' => {
if paren_depth > 0 {
paren_depth -= 1;
} else {
// Parser is lost
return None;
}
}
',' => {
if template_depth == 0 && paren_depth == 0 {
args.push(arg_list[arg_start..idx].trim());
arg_start = idx + 1;
}
}
_ => {}
}
}
// Whole function name parsed, the remainder is the last argument.
args.push(arg_list[arg_start..].trim());
// Only accept the result if all nesting was balanced
if template_depth == 0 && paren_depth == 0 {
return Some((calling_convention, args));
}
}
}
None
}