1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
use std;

use failure::{Error, ResultExt};

use remoteprocess::ProcessMemory;

use crate::python_interpreters::{InterpreterState, ThreadState, FrameObject, CodeObject, StringObject, BytesObject};

/// Call stack for a single python thread
#[derive(Debug, Clone, Serialize)]
pub struct StackTrace {
    /// The python thread id for this stack trace
    pub thread_id: u64,
    /// The OS thread id for this stack tracee
    pub os_thread_id: Option<u64>,
    /// Whether or not the thread was active
    pub active: bool,
    /// Whether or not the thread held the GIL
    pub owns_gil: bool,
    /// The frames
    pub frames: Vec<Frame>
}

/// Information about a single function call in a stack trace
#[derive(Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Clone, Serialize)]
pub struct Frame {
    /// The function name
    pub name: String,
    /// The full filename of the file
    pub filename: String,
    /// The module/shared library the
    pub module: Option<String>,
    /// A short, more readable, representation of the filename
    pub short_filename: Option<String>,
    /// The line number inside the file (or 0 for native frames without line information)
    pub line: i32,
}

/// Given an InterpreterState, this function returns a vector of stack traces for each thread
pub fn get_stack_traces<I, P>(interpreter: &I, process: &P) -> Result<(Vec<StackTrace>), Error>
        where I: InterpreterState, P: ProcessMemory {
    // TODO: deprecate this method
    let mut ret = Vec::new();
    let mut threads = interpreter.head();
    while !threads.is_null() {
        let thread = process.copy_pointer(threads).context("Failed to copy PyThreadState")?;
        ret.push(get_stack_trace(&thread, process)?);
        // This seems to happen occasionally when scanning BSS addresses for valid interpeters
        if ret.len() > 4096 {
            return Err(format_err!("Max thread recursion depth reached"));
        }
        threads = thread.next();
    }
    Ok(ret)
}

/// Gets a stack trace for an individual thread
pub fn get_stack_trace<T, P >(thread: &T, process: &P) -> Result<StackTrace, Error>
        where T: ThreadState, P: ProcessMemory {
    // TODO: just return frames here? everything else probably should be returned out of scopee
    let mut frames = Vec::new();
    let mut frame_ptr = thread.frame();
    while !frame_ptr.is_null() {
        let frame = process.copy_pointer(frame_ptr).context("Failed to copy PyFrameObject")?;
        let code = process.copy_pointer(frame.code()).context("Failed to copy PyCodeObject")?;

        let filename = copy_string(code.filename(), process).context("Failed to copy filename")?;
        let name = copy_string(code.name(), process).context("Failed to copy function name")?;


        let line = match get_line_number(&code, frame.lasti(), process) {
            Ok(line) => line,
            Err(e) => {
                // Failling to get the line number really shouldn't be fatal here, but
                // can happen in extreme cases (https://github.com/benfred/py-spy/issues/164)
                // Rather than fail set the linenumber to 0. This is used by the native extensions
                // to indicate that we can't load a line number and it should be handled gracefully
                warn!("Failed to get line number from {}.{}: {}", filename, name, e);
                0
            }
        };

        frames.push(Frame{name, filename, line, short_filename: None, module: None});
        if frames.len() > 4096 {
            return Err(format_err!("Max frame recursion depth reached"));
        }

        frame_ptr = frame.back();
    }

    Ok(StackTrace{frames, thread_id: thread.thread_id(), owns_gil: false, active: true, os_thread_id: None})
}

impl StackTrace {
    pub fn status_str(&self) -> &str {
        match (self.owns_gil, self.active) {
            (_, false) => "idle",
            (true, true) => "active+gil",
            (false, true) => "active",
        }
    }
}

/// Returns the line number from a PyCodeObject (given the lasti index from a PyFrameObject)
fn get_line_number<C: CodeObject, P: ProcessMemory>(code: &C, lasti: i32, process: &P) -> Result<i32, Error> {
    let table = copy_bytes(code.lnotab(), process).context("Failed to copy line number table")?;

    // unpack the line table. format is specified here:
    // https://github.com/python/cpython/blob/master/Objects/lnotab_notes.txt
    let size = table.len();
    let mut i = 0;
    let mut line_number: i32 = code.first_lineno();
    let mut bytecode_address: i32 = 0;
    while (i + 1) < size {
        bytecode_address += i32::from(table[i]);
        if bytecode_address > lasti {
            break;
        }

        line_number += i32::from(table[i + 1]);
        i += 2;
    }

    Ok(line_number)
}

/// Copies a string from a target process. Attempts to handle unicode differences, which mostly seems to be working
pub fn copy_string<T: StringObject, P: ProcessMemory>(ptr: * const T, process: &P) -> Result<String, Error> {
    let obj = process.copy_pointer(ptr)?;
    if obj.size() >= 4096 {
        return Err(format_err!("Refusing to copy {} chars of a string", obj.size()));
    }

    let kind = obj.kind();

    let bytes = process.copy(obj.address(ptr as usize), obj.size() * kind as usize)?;

    match (kind, obj.ascii()) {
        (4, _) => {
            #[allow(clippy::cast_ptr_alignment)]
            let chars = unsafe { std::slice::from_raw_parts(bytes.as_ptr() as * const char, bytes.len() / 4) };
            Ok(chars.iter().collect())
        },
        (2, _) => {
            // UCS2 strings aren't used internally after v3.3: https://www.python.org/dev/peps/pep-0393/
            // TODO: however with python 2.7 they could be added with --enable-unicode=ucs2 configure flag.
            //            or with python 3.2 --with-wide-unicode=ucs2
            Err(format_err!("ucs2 strings aren't supported yet!"))
        },
        (1, true) => Ok(String::from_utf8(bytes)?),
        (1, false) => Ok(bytes.iter().map(|&b| { b as char }).collect()),
        _ => Err(format_err!("Unknown string kind {}", kind))
    }
}

/// Copies data from a PyBytesObject (currently only lnotab object)
pub fn copy_bytes<T: BytesObject, P: ProcessMemory>(ptr: * const T, process: &P) -> Result<Vec<u8>, Error> {
    let obj = process.copy_pointer(ptr)?;
    let size = obj.size();
    if size >= 65536 {
        return Err(format_err!("Refusing to copy {} bytes", size));
    }
    Ok(process.copy(obj.address(ptr as usize), size as usize)?)
}

#[cfg(test)]
mod tests {
    // the idea here is to create various cpython interpretator structs locally
    // and then test out that the above code handles appropiately
    use super::*;
    use remoteprocess::LocalProcess;
    use python_bindings::v3_7_0::{PyCodeObject, PyBytesObject, PyVarObject, PyUnicodeObject, PyASCIIObject};
    use std::ptr::copy_nonoverlapping;

    // python stores data after pybytesobject/pyasciiobject. hack by initializing a 4k buffer for testing.
    // TODO: get better at Rust and figure out a better solution
    #[allow(dead_code)]
    struct AllocatedPyByteObject {
        base: PyBytesObject,
        storage: [u8; 4096]
    }

    #[allow(dead_code)]
    struct AllocatedPyASCIIObject {
        base: PyASCIIObject,
        storage: [u8; 4096]
    }

    fn to_byteobject(bytes: &[u8]) -> AllocatedPyByteObject {
        let ob_size = bytes.len() as isize;
        let base = PyBytesObject{ob_base: PyVarObject{ob_size, ..Default::default()}, ..Default::default()};
        let mut ret = AllocatedPyByteObject{base, storage: [0 as u8; 4096]};
        unsafe { copy_nonoverlapping(bytes.as_ptr(), ret.base.ob_sval.as_mut_ptr() as *mut u8, bytes.len()); }
        ret
    }

    fn to_asciiobject(input: &str) -> AllocatedPyASCIIObject {
        let bytes: Vec<u8> = input.bytes().collect();
        let mut base = PyASCIIObject{length: bytes.len() as isize, ..Default::default()};
        base.state.set_compact(1);
        base.state.set_kind(1);
        base.state.set_ascii(1);
        let mut ret = AllocatedPyASCIIObject{base, storage: [0 as u8; 4096]};
        unsafe {
            let ptr = &mut ret as *mut AllocatedPyASCIIObject as *mut u8;
            let dst = ptr.offset(std::mem::size_of::<PyASCIIObject>() as isize);
            copy_nonoverlapping(bytes.as_ptr(), dst, bytes.len());
        }
        ret
    }

    #[test]
    fn test_get_line_number() {
        let mut lnotab = to_byteobject(&[0u8, 1, 10, 1, 8, 1, 4, 1]);
        let code = PyCodeObject{co_firstlineno: 3,
                                co_lnotab: &mut lnotab.base.ob_base.ob_base,
                                ..Default::default()};
        let lineno = get_line_number(&code, 30, &LocalProcess).unwrap();
        assert_eq!(lineno, 7);
    }

    #[test]
    fn test_copy_string() {
        let original = "function_name";
        let obj = to_asciiobject(original);

        let unicode: &PyUnicodeObject = unsafe{ std::mem::transmute(&obj.base) };
        let copied = copy_string(unicode, &LocalProcess).unwrap();
        assert_eq!(copied, original);
    }

    #[test]
    fn test_copy_bytes() {
        let original = [10_u8, 20, 30, 40, 50, 70, 80];
        let bytes = to_byteobject(&original);
        let copied = copy_bytes(&bytes.base, &LocalProcess).unwrap();
        assert_eq!(copied, original);
    }
}