1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
use crate::{CallStack, CallingConvention, FunctionArg, FunctionArgs};
use minidump::{CpuContext, MinidumpMemory, MinidumpRawContext};

// # Recovering x86 function arguments
//
// This approach is very hacky and very likely to produce incorrect values
// in many situations. But sometimes it will be right, and that's useful, right?
//
// x86 has two common calling conventions which are very friendly to recovering
// function arguments: cdecl and thiscall.
//
// In both conventions, arguments are unconditionally pushed to the
// stack in reverse order, so in theory all we need to do is jump to the
// top of the caller's stack frame and just start reading the values off!
//
// (thiscall requires some special handling for the implicit "this" arg, see below.)
//
// This leaves us with several problems to solve:
//
// 1. determining the calling convention
// 2. determining the number (and name) of arguments
// 3. determining the size of arguments
//
//
//
// ## Determining the calling convention
//
// We don't have the kind of debuginfo that would tell use what the calling
// convention was, but we can make an educated guess based on the name of
// the function:
//
// * If the function name contains a ::, it's probably a C++ member function,
//   in which case it's thiscall
// * Otherwise, assume it's cdecl
//
// It's a blunt heuristic that will misclassify static functions and always
// mishandle anything that's manually defined to be fastcall or whatever else,
// but it should do the right thing for *most* functions!
//
//
//
// ## Determining the number of arguments
//
// We assume a function name includes its argument list if it contains
// both an open-paren "(" and close-paren ")" in the right order. Everything
// between the first open-paren and last close-paren is assumed to be the
// argument list.
//
// The number of arguments is then just "the number of commas in the argument list".
// However C++ templates introduce "fake" commas. This can be easily handled by
// tracking the nesting level of `<` and `>` and only considering a comma "real"
// if the nesting level is 0. We similarly handle `(` and `)` to try to handle
// function pointer types.
//
// thiscall functions have an implicit first argument "this". Windows (Visual C++)
// toolchains will pass "this" via $eax instead of on the stack. Other (gcc)
// toolchains will pass "this" just like any other argument (so it will be at
// the top of the stack frame).
//
//
//
// # Determining the size of arguments
//
// Rather than attempting to parse and resolve C++ types (*laughs and cries at the same time*),
// we just unconditionally assume all arguments are pointer-sized. This is intuitively true
// most of the time. The major exceptions are `bool` and `uint64_t`. Maybe those
// are worth carving out special cases for, but until then: it's all pointers!

/// Try to recover function arguments
pub fn fill_arguments(call_stack: &mut CallStack, stack_memory: Option<&MinidumpMemory>) {
    // Collect up all the results at once to avoid borrowing issues.
    let args = call_stack
        .frames
        .iter()
        .enumerate()
        .map(|(frame_idx, frame)| {
            // Only x86 is implemented because it has friendly calling conventions.
            // and we need the function name to make any guesses at what the arguments are.
            if let (Some(mem), Some(func_name), MinidumpRawContext::X86(ctx)) =
                (stack_memory, &frame.function_name, &frame.context.raw)
            {
                const POINTER_WIDTH: u64 = 4;

                if let Some((calling_convention, argument_list)) = parse_x86_arg_list(func_name) {
                    // We're assuming this is either cdecl or thiscall. In either case,
                    // all the arguments are saved at the top of the caller's stackframe
                    // in reverse order (which in fact means we can start at the top
                    // of the frame and read them off *in order*).

                    // The stack grows down, so the maximum address in the stack
                    // is actually the base of the stack. Since we're walking down
                    // the stack, the base of the stack is a good upper-bound
                    // (and default value) for any stack/frame pointer.
                    let stack_base = mem.base_address.saturating_add(mem.size);

                    let caller_stack_pointer = call_stack
                        .frames
                        .get(frame_idx + 1)
                        .map(|f| f.context.get_stack_pointer())
                        .unwrap_or(stack_base);
                    let caller_frame_pointer = call_stack
                        .frames
                        .get(frame_idx + 2)
                        .map(|f| f.context.get_stack_pointer())
                        .unwrap_or(stack_base);

                    let mut read_head = caller_stack_pointer;
                    let mut pop_value = || {
                        if read_head < caller_frame_pointer {
                            let val = mem.get_memory_at_address::<u32>(read_head);
                            read_head += POINTER_WIDTH;
                            val.map(|val| val as u64)
                        } else {
                            None
                        }
                    };

                    let mut args = Vec::new();

                    // Handle the first argument of thiscall
                    match calling_convention {
                        CallingConvention::WindowsThisCall => {
                            // On windows, "this" is passed in eax
                            let value = ctx
                                .get_register("eax", &frame.context.valid)
                                .map(|x| x as u64);
                            args.push(FunctionArg {
                                name: String::from("this"),
                                value,
                            });
                        }
                        CallingConvention::OtherThisCall => {
                            // Everywhere else, "this" is passed like a normal value
                            let value = pop_value();
                            args.push(FunctionArg {
                                name: String::from("this"),
                                value,
                            });
                        }
                        CallingConvention::Cdecl => {
                            // Nothing to do
                        }
                    }

                    // Now handle the rest
                    args.extend(argument_list.iter().map(|&arg_name| {
                        let value = pop_value();
                        FunctionArg {
                            name: String::from(arg_name),
                            value,
                        }
                    }));

                    return Some(FunctionArgs {
                        calling_convention,
                        args,
                    });
                }
            }
            None
        })
        .collect::<Vec<_>>();

    // Now write the values back to the call stack
    for (frame, args) in call_stack.frames.iter_mut().zip(args) {
        frame.arguments = args;
    }
}

fn parse_x86_arg_list(func_name: &str) -> Option<(CallingConvention, Vec<&str>)> {
    if let Some((func_name, arg_list)) = func_name.split_once('(') {
        if let Some((arg_list, _junk)) = arg_list.rsplit_once(')') {
            let calling_convention = if func_name.contains("::") {
                // Assume this is a C++ method (thiscall)
                let windows = true; // TODO
                if windows {
                    CallingConvention::WindowsThisCall
                } else {
                    CallingConvention::OtherThisCall
                }
            } else {
                CallingConvention::Cdecl
                // Assume this is a static function (cdecl)
            };

            let mut args = Vec::new();

            // Now parse the arguments out
            let mut arg_start = 0;
            let mut template_depth = 0;
            let mut paren_depth = 0;

            // Only consider a comma a "real" argument separator if we aren't
            // currently nested inside of templates (`<>`) or parens (`()`).
            for (idx, c) in arg_list.bytes().enumerate() {
                match c as char {
                    '<' => template_depth += 1,
                    '>' => {
                        if template_depth > 0 {
                            template_depth -= 1;
                        } else {
                            // Parser is lost
                            return None;
                        }
                    }
                    '(' => paren_depth += 1,
                    ')' => {
                        if paren_depth > 0 {
                            paren_depth -= 1;
                        } else {
                            // Parser is lost
                            return None;
                        }
                    }
                    ',' => {
                        if template_depth == 0 && paren_depth == 0 {
                            args.push(arg_list[arg_start..idx].trim());
                            arg_start = idx + 1;
                        }
                    }
                    _ => {}
                }
            }

            // Whole function name parsed, the remainder is the last argument.
            args.push(arg_list[arg_start..].trim());

            // Only accept the result if all nesting was balanced
            if template_depth == 0 && paren_depth == 0 {
                return Some((calling_convention, args));
            }
        }
    }
    None
}