Skip to main content

py_spy/
coredump.rs

1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::Read;
5use std::os::unix::ffi::OsStrExt;
6use std::path::Path;
7use std::path::PathBuf;
8
9use anyhow::{Context, Error, Result};
10use console::style;
11use log::info;
12use remoteprocess::ProcessMemory;
13
14use crate::binary_parser::{parse_binary, BinaryInfo};
15use crate::config::Config;
16use crate::dump::print_trace;
17use crate::python_bindings::{
18    v2_7_15, v3_10_0, v3_11_0, v3_12_0, v3_13_0, v3_14_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0,
19    v3_9_5,
20};
21use crate::python_data_access::format_variable;
22use crate::python_interpreters::InterpreterState;
23use crate::python_process_info::{
24    get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib,
25    ContainsAddr, PythonProcessInfo,
26};
27use crate::python_threading::thread_names_from_interpreter;
28use crate::stack_trace::{get_stack_traces, StackTrace};
29use crate::version::Version;
30
31#[derive(Debug, Clone)]
32pub struct CoreMapRange {
33    pub pathname: Option<PathBuf>,
34    pub segment: goblin::elf::ProgramHeader,
35}
36
37// Defines accessors to match those in proc_maps. However, can't use the
38// proc_maps trait since is private
39impl CoreMapRange {
40    pub fn size(&self) -> usize {
41        self.segment.p_memsz as usize
42    }
43    pub fn start(&self) -> usize {
44        self.segment.p_vaddr as usize
45    }
46    pub fn filename(&self) -> Option<&Path> {
47        self.pathname.as_deref()
48    }
49    pub fn is_exec(&self) -> bool {
50        self.segment.is_executable()
51    }
52    pub fn is_write(&self) -> bool {
53        self.segment.is_write()
54    }
55    pub fn is_read(&self) -> bool {
56        self.segment.is_read()
57    }
58}
59
60impl ContainsAddr for Vec<CoreMapRange> {
61    fn contains_addr(&self, addr: usize) -> bool {
62        self.iter()
63            .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size())))
64    }
65}
66
67pub struct CoreDump {
68    filename: PathBuf,
69    contents: Vec<u8>,
70    maps: Vec<CoreMapRange>,
71    psinfo: Option<elfcore::elf_prpsinfo>,
72    status: Vec<elfcore::elf_prstatus>,
73}
74
75impl CoreDump {
76    pub fn new<P: AsRef<Path>>(filename: P) -> Result<CoreDump, Error> {
77        let filename = filename.as_ref();
78        let mut file = File::open(filename)?;
79        let mut contents = Vec::new();
80        file.read_to_end(&mut contents)?;
81        let elf = goblin::elf::Elf::parse(&contents)?;
82
83        let notes = elf
84            .iter_note_headers(&contents)
85            .ok_or_else(|| format_err!("no note segment found"))?;
86
87        let mut filenames = HashMap::new();
88        let mut psinfo = None;
89        let mut status = Vec::new();
90        for note in notes.flatten() {
91            if note.n_type == goblin::elf::note::NT_PRPSINFO {
92                psinfo = Some(unsafe {
93                    std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prpsinfo)
94                });
95            } else if note.n_type == goblin::elf::note::NT_PRSTATUS {
96                let thread_status: elfcore::elf_prstatus = unsafe {
97                    std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prstatus)
98                };
99                status.push(thread_status);
100            } else if note.n_type == goblin::elf::note::NT_FILE {
101                let data = note.desc;
102                let ptrs = data.as_ptr() as *const usize;
103
104                let count = unsafe { std::ptr::read_unaligned(ptrs) };
105                let _page_size = unsafe { std::ptr::read_unaligned(ptrs.offset(1)) };
106
107                let string_table = &data[(std::mem::size_of::<usize>() * (2 + count * 3))..];
108
109                for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() {
110                    if i < count {
111                        let i = i as isize;
112                        let start = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 2)) };
113                        let _end = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 3)) };
114                        let _page_offset =
115                            unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 4)) };
116
117                        let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf();
118                        filenames.insert(start, pathname);
119                    }
120                }
121            }
122        }
123
124        let mut maps = Vec::new();
125        for ph in elf.program_headers {
126            if ph.p_type == goblin::elf::program_header::PT_LOAD {
127                let pathname = filenames.get(&(ph.p_vaddr as _));
128                let map = CoreMapRange {
129                    pathname: pathname.cloned(),
130                    segment: ph,
131                };
132                info!(
133                    "map: {:016x}-{:016x} {}{}{} {}",
134                    map.start(),
135                    map.start() + map.size(),
136                    if map.is_read() { 'r' } else { '-' },
137                    if map.is_write() { 'w' } else { '-' },
138                    if map.is_exec() { 'x' } else { '-' },
139                    map.filename()
140                        .unwrap_or(&std::path::PathBuf::from(""))
141                        .display()
142                );
143
144                maps.push(map);
145            }
146        }
147
148        Ok(CoreDump {
149            filename: filename.to_owned(),
150            contents,
151            maps,
152            psinfo,
153            status,
154        })
155    }
156}
157
158impl ProcessMemory for CoreDump {
159    fn read(&self, addr: usize, buf: &mut [u8]) -> Result<(), remoteprocess::Error> {
160        let start = addr as u64;
161        let _end = (addr + buf.len()) as u64;
162
163        for map in &self.maps {
164            // TODO: one issue here is the bss addr spans multiple mmap segments - so checking the 'end'
165            // here means we skip it. Instead we're just checking if the start address exists in
166            // the segment
167            let ph = &map.segment;
168            if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) {
169                let offset = (start - ph.p_vaddr + ph.p_offset) as usize;
170                buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]);
171                return Ok(());
172            }
173        }
174
175        let io_error = std::io::Error::from_raw_os_error(libc::EFAULT);
176        Err(remoteprocess::Error::IOError(io_error))
177    }
178}
179
180pub struct PythonCoreDump {
181    core: CoreDump,
182    version: Version,
183    interpreter_address: usize,
184    threadstate_address: usize,
185}
186
187impl PythonCoreDump {
188    pub fn new<P: AsRef<Path>>(filename: P) -> Result<PythonCoreDump, Error> {
189        let core = CoreDump::new(filename)?;
190        let maps = &core.maps;
191
192        // Get the python binary from the maps, and parse it
193        let (python_filename, python_binary) = {
194            let map = maps
195                .iter()
196                .find(|m| m.filename().is_some() & m.is_exec())
197                .ok_or_else(|| format_err!("Failed to get binary from coredump"))?;
198            let python_filename = map.filename().unwrap();
199            let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _);
200            info!("Found python binary @ {}", python_filename.display());
201            (python_filename.to_owned(), python_binary)
202        };
203
204        // get the libpython binary (if any) from maps
205        let libpython_binary = {
206            let libmap = maps.iter().find(|m| {
207                if let Some(pathname) = m.filename() {
208                    if let Some(pathname) = pathname.to_str() {
209                        return is_python_lib(pathname) && m.is_exec();
210                    }
211                }
212                false
213            });
214
215            let mut libpython_binary: Option<BinaryInfo> = None;
216            if let Some(libpython) = libmap {
217                if let Some(filename) = &libpython.filename() {
218                    info!("Found libpython binary @ {}", filename.display());
219                    let parsed =
220                        parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?;
221                    libpython_binary = Some(parsed);
222                }
223            }
224            libpython_binary
225        };
226
227        // If we have a libpython binary - we can tolerate failures on parsing the main python binary.
228        let python_binary = match libpython_binary {
229            None => Some(python_binary.context("Failed to parse python binary")?),
230            _ => python_binary.ok(),
231        };
232
233        let python_info = PythonProcessInfo {
234            python_binary,
235            libpython_binary,
236            maps: Box::new(core.maps.clone()),
237            python_filename,
238            dockerized: false,
239        };
240
241        let version =
242            get_python_version(&python_info, &core).context("failed to get python version")?;
243        info!("Got python version {}", version);
244
245        let interpreter_address = get_interpreter_address(&python_info, &core, &version)?;
246        info!("Found interpreter at 0x{:016x}", interpreter_address);
247
248        // lets us figure out which thread has the GIL
249        let config = Config::default();
250        let threadstate_address =
251            get_threadstate_address(interpreter_address, &python_info, &core, &version, &config)?;
252        info!("found threadstate at 0x{:016x}", threadstate_address);
253
254        Ok(PythonCoreDump {
255            core,
256            version,
257            interpreter_address,
258            threadstate_address,
259        })
260    }
261
262    pub fn get_stack(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
263        if config.native {
264            return Err(format_err!(
265                "Native unwinding isn't yet supported with coredumps"
266            ));
267        }
268
269        if config.subprocesses {
270            return Err(format_err!(
271                "Subprocesses can't be used for getting stacktraces from coredumps"
272            ));
273        }
274
275        // different versions have different layouts, check as appropriate
276        match self.version {
277            Version {
278                major: 2,
279                minor: 3..=7,
280                ..
281            } => self._get_stack::<v2_7_15::_is>(config),
282            Version {
283                major: 3, minor: 3, ..
284            } => self._get_stack::<v3_3_7::_is>(config),
285            Version {
286                major: 3,
287                minor: 4..=5,
288                ..
289            } => self._get_stack::<v3_5_5::_is>(config),
290            Version {
291                major: 3, minor: 6, ..
292            } => self._get_stack::<v3_6_6::_is>(config),
293            Version {
294                major: 3, minor: 7, ..
295            } => self._get_stack::<v3_7_0::_is>(config),
296            Version {
297                major: 3, minor: 8, ..
298            } => self._get_stack::<v3_8_0::_is>(config),
299            Version {
300                major: 3, minor: 9, ..
301            } => self._get_stack::<v3_9_5::_is>(config),
302            Version {
303                major: 3,
304                minor: 10,
305                ..
306            } => self._get_stack::<v3_10_0::_is>(config),
307            Version {
308                major: 3,
309                minor: 11,
310                ..
311            } => self._get_stack::<v3_11_0::_is>(config),
312            Version {
313                major: 3,
314                minor: 12,
315                ..
316            } => self._get_stack::<v3_12_0::_is>(config),
317            Version {
318                major: 3,
319                minor: 13,
320                ..
321            } => self._get_stack::<v3_13_0::_is>(config),
322            Version {
323                major: 3,
324                minor: 14,
325                ..
326            } => self._get_stack::<v3_14_0::_is>(config),
327            _ => Err(format_err!(
328                "Unsupported version of Python: {}",
329                self.version
330            )),
331        }
332    }
333
334    fn _get_stack<I: InterpreterState>(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
335        let mut traces = get_stack_traces::<I, CoreDump>(
336            self.interpreter_address,
337            &self.core,
338            self.threadstate_address,
339            Some(config),
340        )?;
341        let thread_names = thread_names_from_interpreter::<I, CoreDump>(
342            self.interpreter_address,
343            &self.core,
344            &self.version,
345        )
346        .ok();
347
348        for trace in &mut traces {
349            if let Some(ref thread_names) = thread_names {
350                trace.thread_name = thread_names.get(&trace.thread_id).cloned();
351            }
352
353            for frame in &mut trace.frames {
354                if let Some(locals) = frame.locals.as_mut() {
355                    let max_length = (128 * config.dump_locals) as isize;
356                    for local in locals {
357                        let repr = format_variable::<I, CoreDump>(
358                            &self.core,
359                            &self.version,
360                            local.addr,
361                            max_length,
362                        );
363                        local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned()));
364                    }
365                }
366            }
367        }
368        Ok(traces)
369    }
370
371    pub fn print_traces(&self, traces: &Vec<StackTrace>, config: &Config) -> Result<(), Error> {
372        if config.dump_json {
373            println!("{}", serde_json::to_string_pretty(&traces)?);
374            return Ok(());
375        }
376
377        if let Some(status) = self.core.status.first() {
378            println!(
379                "Signal {}: {}",
380                style(status.pr_cursig).bold().yellow(),
381                self.core.filename.display()
382            );
383        }
384
385        if let Some(psinfo) = self.core.psinfo {
386            println!(
387                "Process {}: {}",
388                style(psinfo.pr_pid).bold().yellow(),
389                OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()
390            );
391        }
392        println!("Python v{}", style(&self.version).bold());
393        println!();
394        for trace in traces.iter().rev() {
395            print_trace(trace, false);
396        }
397        Ok(())
398    }
399}
400
401mod elfcore {
402    #[repr(C)]
403    #[derive(Debug, Copy, Clone)]
404    pub struct elf_siginfo {
405        pub si_signo: ::std::os::raw::c_int,
406        pub si_code: ::std::os::raw::c_int,
407        pub si_errno: ::std::os::raw::c_int,
408    }
409
410    #[repr(C)]
411    #[derive(Debug, Copy, Clone)]
412    pub struct timeval {
413        pub tv_sec: ::std::os::raw::c_long,
414        pub tv_usec: ::std::os::raw::c_long,
415    }
416
417    #[repr(C)]
418    #[derive(Debug, Copy, Clone)]
419    pub struct elf_prstatus {
420        pub pr_info: elf_siginfo,
421        pub pr_cursig: ::std::os::raw::c_short,
422        pub pr_sigpend: ::std::os::raw::c_ulong,
423        pub pr_sighold: ::std::os::raw::c_ulong,
424        pub pr_pid: ::std::os::raw::c_int,
425        pub pr_ppid: ::std::os::raw::c_int,
426        pub pr_pgrp: ::std::os::raw::c_int,
427        pub pr_sid: ::std::os::raw::c_int,
428        pub pr_utime: timeval,
429        pub pr_stime: timeval,
430        pub pr_cutime: timeval,
431        pub pr_cstime: timeval,
432        // TODO: has registers next for thread next - don't need them right now, but if we want to do
433        // unwinding we will
434    }
435
436    #[repr(C)]
437    #[derive(Debug, Copy, Clone)]
438    pub struct elf_prpsinfo {
439        pub pr_state: ::std::os::raw::c_char,
440        pub pr_sname: ::std::os::raw::c_char,
441        pub pr_zomb: ::std::os::raw::c_char,
442        pub pr_nice: ::std::os::raw::c_char,
443        pub pr_flag: ::std::os::raw::c_ulong,
444        pub pr_uid: ::std::os::raw::c_uint,
445        pub pr_gid: ::std::os::raw::c_uint,
446        pub pr_pid: ::std::os::raw::c_int,
447        pub pr_ppid: ::std::os::raw::c_int,
448        pub pr_pgrp: ::std::os::raw::c_int,
449        pub pr_sid: ::std::os::raw::c_int,
450        pub pr_fname: [::std::os::raw::c_uchar; 16usize],
451        pub pr_psargs: [::std::os::raw::c_uchar; 80usize],
452    }
453}
454
455#[cfg(test)]
456mod test {
457    use super::*;
458    use py_spy_testdata::get_coredump_path;
459
460    #[cfg(target_pointer_width = "64")]
461    #[test]
462    fn test_coredump() {
463        // we won't have the python binary for the core dump here,
464        // so we can't (yet) figure out the interpreter address & version.
465        // Manually specify here to test out instead
466        let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap();
467        let version = Version {
468            major: 3,
469            minor: 9,
470            patch: 13,
471            release_flags: "".to_owned(),
472            build_metadata: None,
473        };
474        let python_core = PythonCoreDump {
475            core,
476            version,
477            interpreter_address: 0x000055a8293dbe20,
478            threadstate_address: 0x000055a82745fe18,
479        };
480
481        let config = Config::default();
482        let traces = python_core.get_stack(&config).unwrap();
483
484        // should have two threads
485        assert_eq!(traces.len(), 2);
486
487        let main_thread = &traces[1];
488        assert_eq!(main_thread.frames.len(), 1);
489        assert_eq!(main_thread.frames[0].name, "<module>");
490        assert_eq!(main_thread.thread_name, Some("MainThread".to_owned()));
491
492        let child_thread = &traces[0];
493        assert_eq!(child_thread.frames.len(), 5);
494        assert_eq!(child_thread.frames[0].name, "dump_sum");
495        assert_eq!(child_thread.frames[0].line, 16);
496        assert_eq!(child_thread.thread_name, Some("child_thread".to_owned()));
497    }
498}