py_spy/
coredump.rs

1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::Read;
5use std::os::unix::ffi::OsStrExt;
6use std::path::Path;
7use std::path::PathBuf;
8
9use anyhow::{Context, Error, Result};
10use console::style;
11use log::info;
12use remoteprocess::ProcessMemory;
13
14use crate::binary_parser::{parse_binary, BinaryInfo};
15use crate::config::Config;
16use crate::dump::print_trace;
17use crate::python_bindings::{
18    v2_7_15, v3_10_0, v3_11_0, v3_12_0, v3_13_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5,
19};
20use crate::python_data_access::format_variable;
21use crate::python_interpreters::InterpreterState;
22use crate::python_process_info::{
23    get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib,
24    ContainsAddr, PythonProcessInfo,
25};
26use crate::python_threading::thread_names_from_interpreter;
27use crate::stack_trace::{get_stack_traces, StackTrace};
28use crate::version::Version;
29
30#[derive(Debug, Clone)]
31pub struct CoreMapRange {
32    pub pathname: Option<PathBuf>,
33    pub segment: goblin::elf::ProgramHeader,
34}
35
36// Defines accessors to match those in proc_maps. However, can't use the
37// proc_maps trait since is private
38impl CoreMapRange {
39    pub fn size(&self) -> usize {
40        self.segment.p_memsz as usize
41    }
42    pub fn start(&self) -> usize {
43        self.segment.p_vaddr as usize
44    }
45    pub fn filename(&self) -> Option<&Path> {
46        self.pathname.as_deref()
47    }
48    pub fn is_exec(&self) -> bool {
49        self.segment.is_executable()
50    }
51    pub fn is_write(&self) -> bool {
52        self.segment.is_write()
53    }
54    pub fn is_read(&self) -> bool {
55        self.segment.is_read()
56    }
57}
58
59impl ContainsAddr for Vec<CoreMapRange> {
60    fn contains_addr(&self, addr: usize) -> bool {
61        self.iter()
62            .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size())))
63    }
64}
65
66pub struct CoreDump {
67    filename: PathBuf,
68    contents: Vec<u8>,
69    maps: Vec<CoreMapRange>,
70    psinfo: Option<elfcore::elf_prpsinfo>,
71    status: Vec<elfcore::elf_prstatus>,
72}
73
74impl CoreDump {
75    pub fn new<P: AsRef<Path>>(filename: P) -> Result<CoreDump, Error> {
76        let filename = filename.as_ref();
77        let mut file = File::open(filename)?;
78        let mut contents = Vec::new();
79        file.read_to_end(&mut contents)?;
80        let elf = goblin::elf::Elf::parse(&contents)?;
81
82        let notes = elf
83            .iter_note_headers(&contents)
84            .ok_or_else(|| format_err!("no note segment found"))?;
85
86        let mut filenames = HashMap::new();
87        let mut psinfo = None;
88        let mut status = Vec::new();
89        for note in notes.flatten() {
90            if note.n_type == goblin::elf::note::NT_PRPSINFO {
91                psinfo = Some(unsafe {
92                    std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prpsinfo)
93                });
94            } else if note.n_type == goblin::elf::note::NT_PRSTATUS {
95                let thread_status: elfcore::elf_prstatus = unsafe {
96                    std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prstatus)
97                };
98                status.push(thread_status);
99            } else if note.n_type == goblin::elf::note::NT_FILE {
100                let data = note.desc;
101                let ptrs = data.as_ptr() as *const usize;
102
103                let count = unsafe { std::ptr::read_unaligned(ptrs) };
104                let _page_size = unsafe { std::ptr::read_unaligned(ptrs.offset(1)) };
105
106                let string_table = &data[(std::mem::size_of::<usize>() * (2 + count * 3))..];
107
108                for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() {
109                    if i < count {
110                        let i = i as isize;
111                        let start = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 2)) };
112                        let _end = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 3)) };
113                        let _page_offset =
114                            unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 4)) };
115
116                        let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf();
117                        filenames.insert(start, pathname);
118                    }
119                }
120            }
121        }
122
123        let mut maps = Vec::new();
124        for ph in elf.program_headers {
125            if ph.p_type == goblin::elf::program_header::PT_LOAD {
126                let pathname = filenames.get(&(ph.p_vaddr as _));
127                let map = CoreMapRange {
128                    pathname: pathname.cloned(),
129                    segment: ph,
130                };
131                info!(
132                    "map: {:016x}-{:016x} {}{}{} {}",
133                    map.start(),
134                    map.start() + map.size(),
135                    if map.is_read() { 'r' } else { '-' },
136                    if map.is_write() { 'w' } else { '-' },
137                    if map.is_exec() { 'x' } else { '-' },
138                    map.filename()
139                        .unwrap_or(&std::path::PathBuf::from(""))
140                        .display()
141                );
142
143                maps.push(map);
144            }
145        }
146
147        Ok(CoreDump {
148            filename: filename.to_owned(),
149            contents,
150            maps,
151            psinfo,
152            status,
153        })
154    }
155}
156
157impl ProcessMemory for CoreDump {
158    fn read(&self, addr: usize, buf: &mut [u8]) -> Result<(), remoteprocess::Error> {
159        let start = addr as u64;
160        let _end = (addr + buf.len()) as u64;
161
162        for map in &self.maps {
163            // TODO: one issue here is the bss addr spans multiple mmap segments - so checking the 'end'
164            // here means we skip it. Instead we're just checking if the start address exists in
165            // the segment
166            let ph = &map.segment;
167            if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) {
168                let offset = (start - ph.p_vaddr + ph.p_offset) as usize;
169                buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]);
170                return Ok(());
171            }
172        }
173
174        let io_error = std::io::Error::from_raw_os_error(libc::EFAULT);
175        Err(remoteprocess::Error::IOError(io_error))
176    }
177}
178
179pub struct PythonCoreDump {
180    core: CoreDump,
181    version: Version,
182    interpreter_address: usize,
183    threadstate_address: usize,
184}
185
186impl PythonCoreDump {
187    pub fn new<P: AsRef<Path>>(filename: P) -> Result<PythonCoreDump, Error> {
188        let core = CoreDump::new(filename)?;
189        let maps = &core.maps;
190
191        // Get the python binary from the maps, and parse it
192        let (python_filename, python_binary) = {
193            let map = maps
194                .iter()
195                .find(|m| m.filename().is_some() & m.is_exec())
196                .ok_or_else(|| format_err!("Failed to get binary from coredump"))?;
197            let python_filename = map.filename().unwrap();
198            let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _);
199            info!("Found python binary @ {}", python_filename.display());
200            (python_filename.to_owned(), python_binary)
201        };
202
203        // get the libpython binary (if any) from maps
204        let libpython_binary = {
205            let libmap = maps.iter().find(|m| {
206                if let Some(pathname) = m.filename() {
207                    if let Some(pathname) = pathname.to_str() {
208                        return is_python_lib(pathname) && m.is_exec();
209                    }
210                }
211                false
212            });
213
214            let mut libpython_binary: Option<BinaryInfo> = None;
215            if let Some(libpython) = libmap {
216                if let Some(filename) = &libpython.filename() {
217                    info!("Found libpython binary @ {}", filename.display());
218                    let parsed =
219                        parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?;
220                    libpython_binary = Some(parsed);
221                }
222            }
223            libpython_binary
224        };
225
226        // If we have a libpython binary - we can tolerate failures on parsing the main python binary.
227        let python_binary = match libpython_binary {
228            None => Some(python_binary.context("Failed to parse python binary")?),
229            _ => python_binary.ok(),
230        };
231
232        let python_info = PythonProcessInfo {
233            python_binary,
234            libpython_binary,
235            maps: Box::new(core.maps.clone()),
236            python_filename,
237            dockerized: false,
238        };
239
240        let version =
241            get_python_version(&python_info, &core).context("failed to get python version")?;
242        info!("Got python version {}", version);
243
244        let interpreter_address = get_interpreter_address(&python_info, &core, &version)?;
245        info!("Found interpreter at 0x{:016x}", interpreter_address);
246
247        // lets us figure out which thread has the GIL
248        let config = Config::default();
249        let threadstate_address =
250            get_threadstate_address(interpreter_address, &python_info, &core, &version, &config)?;
251        info!("found threadstate at 0x{:016x}", threadstate_address);
252
253        Ok(PythonCoreDump {
254            core,
255            version,
256            interpreter_address,
257            threadstate_address,
258        })
259    }
260
261    pub fn get_stack(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
262        if config.native {
263            return Err(format_err!(
264                "Native unwinding isn't yet supported with coredumps"
265            ));
266        }
267
268        if config.subprocesses {
269            return Err(format_err!(
270                "Subprocesses can't be used for getting stacktraces from coredumps"
271            ));
272        }
273
274        // different versions have different layouts, check as appropriate
275        match self.version {
276            Version {
277                major: 2,
278                minor: 3..=7,
279                ..
280            } => self._get_stack::<v2_7_15::_is>(config),
281            Version {
282                major: 3, minor: 3, ..
283            } => self._get_stack::<v3_3_7::_is>(config),
284            Version {
285                major: 3,
286                minor: 4..=5,
287                ..
288            } => self._get_stack::<v3_5_5::_is>(config),
289            Version {
290                major: 3, minor: 6, ..
291            } => self._get_stack::<v3_6_6::_is>(config),
292            Version {
293                major: 3, minor: 7, ..
294            } => self._get_stack::<v3_7_0::_is>(config),
295            Version {
296                major: 3, minor: 8, ..
297            } => self._get_stack::<v3_8_0::_is>(config),
298            Version {
299                major: 3, minor: 9, ..
300            } => self._get_stack::<v3_9_5::_is>(config),
301            Version {
302                major: 3,
303                minor: 10,
304                ..
305            } => self._get_stack::<v3_10_0::_is>(config),
306            Version {
307                major: 3,
308                minor: 11,
309                ..
310            } => self._get_stack::<v3_11_0::_is>(config),
311            Version {
312                major: 3,
313                minor: 12,
314                ..
315            } => self._get_stack::<v3_12_0::_is>(config),
316            Version {
317                major: 3,
318                minor: 13,
319                ..
320            } => self._get_stack::<v3_13_0::_is>(config),
321            _ => Err(format_err!(
322                "Unsupported version of Python: {}",
323                self.version
324            )),
325        }
326    }
327
328    fn _get_stack<I: InterpreterState>(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
329        let mut traces = get_stack_traces::<I, CoreDump>(
330            self.interpreter_address,
331            &self.core,
332            self.threadstate_address,
333            Some(config),
334        )?;
335        let thread_names = thread_names_from_interpreter::<I, CoreDump>(
336            self.interpreter_address,
337            &self.core,
338            &self.version,
339        )
340        .ok();
341
342        for trace in &mut traces {
343            if let Some(ref thread_names) = thread_names {
344                trace.thread_name = thread_names.get(&trace.thread_id).cloned();
345            }
346
347            for frame in &mut trace.frames {
348                if let Some(locals) = frame.locals.as_mut() {
349                    let max_length = (128 * config.dump_locals) as isize;
350                    for local in locals {
351                        let repr = format_variable::<I, CoreDump>(
352                            &self.core,
353                            &self.version,
354                            local.addr,
355                            max_length,
356                        );
357                        local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned()));
358                    }
359                }
360            }
361        }
362        Ok(traces)
363    }
364
365    pub fn print_traces(&self, traces: &Vec<StackTrace>, config: &Config) -> Result<(), Error> {
366        if config.dump_json {
367            println!("{}", serde_json::to_string_pretty(&traces)?);
368            return Ok(());
369        }
370
371        if let Some(status) = self.core.status.first() {
372            println!(
373                "Signal {}: {}",
374                style(status.pr_cursig).bold().yellow(),
375                self.core.filename.display()
376            );
377        }
378
379        if let Some(psinfo) = self.core.psinfo {
380            println!(
381                "Process {}: {}",
382                style(psinfo.pr_pid).bold().yellow(),
383                OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()
384            );
385        }
386        println!("Python v{}", style(&self.version).bold());
387        println!();
388        for trace in traces.iter().rev() {
389            print_trace(trace, false);
390        }
391        Ok(())
392    }
393}
394
395mod elfcore {
396    #[repr(C)]
397    #[derive(Debug, Copy, Clone)]
398    pub struct elf_siginfo {
399        pub si_signo: ::std::os::raw::c_int,
400        pub si_code: ::std::os::raw::c_int,
401        pub si_errno: ::std::os::raw::c_int,
402    }
403
404    #[repr(C)]
405    #[derive(Debug, Copy, Clone)]
406    pub struct timeval {
407        pub tv_sec: ::std::os::raw::c_long,
408        pub tv_usec: ::std::os::raw::c_long,
409    }
410
411    #[repr(C)]
412    #[derive(Debug, Copy, Clone)]
413    pub struct elf_prstatus {
414        pub pr_info: elf_siginfo,
415        pub pr_cursig: ::std::os::raw::c_short,
416        pub pr_sigpend: ::std::os::raw::c_ulong,
417        pub pr_sighold: ::std::os::raw::c_ulong,
418        pub pr_pid: ::std::os::raw::c_int,
419        pub pr_ppid: ::std::os::raw::c_int,
420        pub pr_pgrp: ::std::os::raw::c_int,
421        pub pr_sid: ::std::os::raw::c_int,
422        pub pr_utime: timeval,
423        pub pr_stime: timeval,
424        pub pr_cutime: timeval,
425        pub pr_cstime: timeval,
426        // TODO: has registers next for thread next - don't need them right now, but if we want to do
427        // unwinding we will
428    }
429
430    #[repr(C)]
431    #[derive(Debug, Copy, Clone)]
432    pub struct elf_prpsinfo {
433        pub pr_state: ::std::os::raw::c_char,
434        pub pr_sname: ::std::os::raw::c_char,
435        pub pr_zomb: ::std::os::raw::c_char,
436        pub pr_nice: ::std::os::raw::c_char,
437        pub pr_flag: ::std::os::raw::c_ulong,
438        pub pr_uid: ::std::os::raw::c_uint,
439        pub pr_gid: ::std::os::raw::c_uint,
440        pub pr_pid: ::std::os::raw::c_int,
441        pub pr_ppid: ::std::os::raw::c_int,
442        pub pr_pgrp: ::std::os::raw::c_int,
443        pub pr_sid: ::std::os::raw::c_int,
444        pub pr_fname: [::std::os::raw::c_uchar; 16usize],
445        pub pr_psargs: [::std::os::raw::c_uchar; 80usize],
446    }
447}
448
449#[cfg(test)]
450mod test {
451    use super::*;
452    use py_spy_testdata::get_coredump_path;
453
454    #[cfg(target_pointer_width = "64")]
455    #[test]
456    fn test_coredump() {
457        // we won't have the python binary for the core dump here,
458        // so we can't (yet) figure out the interpreter address & version.
459        // Manually specify here to test out instead
460        let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap();
461        let version = Version {
462            major: 3,
463            minor: 9,
464            patch: 13,
465            release_flags: "".to_owned(),
466            build_metadata: None,
467        };
468        let python_core = PythonCoreDump {
469            core,
470            version,
471            interpreter_address: 0x000055a8293dbe20,
472            threadstate_address: 0x000055a82745fe18,
473        };
474
475        let config = Config::default();
476        let traces = python_core.get_stack(&config).unwrap();
477
478        // should have two threads
479        assert_eq!(traces.len(), 2);
480
481        let main_thread = &traces[1];
482        assert_eq!(main_thread.frames.len(), 1);
483        assert_eq!(main_thread.frames[0].name, "<module>");
484        assert_eq!(main_thread.thread_name, Some("MainThread".to_owned()));
485
486        let child_thread = &traces[0];
487        assert_eq!(child_thread.frames.len(), 5);
488        assert_eq!(child_thread.frames[0].name, "dump_sum");
489        assert_eq!(child_thread.frames[0].line, 16);
490        assert_eq!(child_thread.thread_name, Some("child_thread".to_owned()));
491    }
492}