1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::Read;
5use std::os::unix::ffi::OsStrExt;
6use std::path::Path;
7use std::path::PathBuf;
8
9use anyhow::{Context, Error, Result};
10use console::style;
11use log::info;
12use remoteprocess::ProcessMemory;
13
14use crate::binary_parser::{parse_binary, BinaryInfo};
15use crate::config::Config;
16use crate::dump::print_trace;
17use crate::python_bindings::{
18 v2_7_15, v3_10_0, v3_11_0, v3_12_0, v3_13_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5,
19};
20use crate::python_data_access::format_variable;
21use crate::python_interpreters::InterpreterState;
22use crate::python_process_info::{
23 get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib,
24 ContainsAddr, PythonProcessInfo,
25};
26use crate::python_threading::thread_names_from_interpreter;
27use crate::stack_trace::{get_stack_traces, StackTrace};
28use crate::version::Version;
29
30#[derive(Debug, Clone)]
31pub struct CoreMapRange {
32 pub pathname: Option<PathBuf>,
33 pub segment: goblin::elf::ProgramHeader,
34}
35
36impl CoreMapRange {
39 pub fn size(&self) -> usize {
40 self.segment.p_memsz as usize
41 }
42 pub fn start(&self) -> usize {
43 self.segment.p_vaddr as usize
44 }
45 pub fn filename(&self) -> Option<&Path> {
46 self.pathname.as_deref()
47 }
48 pub fn is_exec(&self) -> bool {
49 self.segment.is_executable()
50 }
51 pub fn is_write(&self) -> bool {
52 self.segment.is_write()
53 }
54 pub fn is_read(&self) -> bool {
55 self.segment.is_read()
56 }
57}
58
59impl ContainsAddr for Vec<CoreMapRange> {
60 fn contains_addr(&self, addr: usize) -> bool {
61 self.iter()
62 .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size())))
63 }
64}
65
66pub struct CoreDump {
67 filename: PathBuf,
68 contents: Vec<u8>,
69 maps: Vec<CoreMapRange>,
70 psinfo: Option<elfcore::elf_prpsinfo>,
71 status: Vec<elfcore::elf_prstatus>,
72}
73
74impl CoreDump {
75 pub fn new<P: AsRef<Path>>(filename: P) -> Result<CoreDump, Error> {
76 let filename = filename.as_ref();
77 let mut file = File::open(filename)?;
78 let mut contents = Vec::new();
79 file.read_to_end(&mut contents)?;
80 let elf = goblin::elf::Elf::parse(&contents)?;
81
82 let notes = elf
83 .iter_note_headers(&contents)
84 .ok_or_else(|| format_err!("no note segment found"))?;
85
86 let mut filenames = HashMap::new();
87 let mut psinfo = None;
88 let mut status = Vec::new();
89 for note in notes.flatten() {
90 if note.n_type == goblin::elf::note::NT_PRPSINFO {
91 psinfo = Some(unsafe {
92 std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prpsinfo)
93 });
94 } else if note.n_type == goblin::elf::note::NT_PRSTATUS {
95 let thread_status: elfcore::elf_prstatus = unsafe {
96 std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prstatus)
97 };
98 status.push(thread_status);
99 } else if note.n_type == goblin::elf::note::NT_FILE {
100 let data = note.desc;
101 let ptrs = data.as_ptr() as *const usize;
102
103 let count = unsafe { std::ptr::read_unaligned(ptrs) };
104 let _page_size = unsafe { std::ptr::read_unaligned(ptrs.offset(1)) };
105
106 let string_table = &data[(std::mem::size_of::<usize>() * (2 + count * 3))..];
107
108 for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() {
109 if i < count {
110 let i = i as isize;
111 let start = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 2)) };
112 let _end = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 3)) };
113 let _page_offset =
114 unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 4)) };
115
116 let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf();
117 filenames.insert(start, pathname);
118 }
119 }
120 }
121 }
122
123 let mut maps = Vec::new();
124 for ph in elf.program_headers {
125 if ph.p_type == goblin::elf::program_header::PT_LOAD {
126 let pathname = filenames.get(&(ph.p_vaddr as _));
127 let map = CoreMapRange {
128 pathname: pathname.cloned(),
129 segment: ph,
130 };
131 info!(
132 "map: {:016x}-{:016x} {}{}{} {}",
133 map.start(),
134 map.start() + map.size(),
135 if map.is_read() { 'r' } else { '-' },
136 if map.is_write() { 'w' } else { '-' },
137 if map.is_exec() { 'x' } else { '-' },
138 map.filename()
139 .unwrap_or(&std::path::PathBuf::from(""))
140 .display()
141 );
142
143 maps.push(map);
144 }
145 }
146
147 Ok(CoreDump {
148 filename: filename.to_owned(),
149 contents,
150 maps,
151 psinfo,
152 status,
153 })
154 }
155}
156
157impl ProcessMemory for CoreDump {
158 fn read(&self, addr: usize, buf: &mut [u8]) -> Result<(), remoteprocess::Error> {
159 let start = addr as u64;
160 let _end = (addr + buf.len()) as u64;
161
162 for map in &self.maps {
163 let ph = &map.segment;
167 if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) {
168 let offset = (start - ph.p_vaddr + ph.p_offset) as usize;
169 buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]);
170 return Ok(());
171 }
172 }
173
174 let io_error = std::io::Error::from_raw_os_error(libc::EFAULT);
175 Err(remoteprocess::Error::IOError(io_error))
176 }
177}
178
179pub struct PythonCoreDump {
180 core: CoreDump,
181 version: Version,
182 interpreter_address: usize,
183 threadstate_address: usize,
184}
185
186impl PythonCoreDump {
187 pub fn new<P: AsRef<Path>>(filename: P) -> Result<PythonCoreDump, Error> {
188 let core = CoreDump::new(filename)?;
189 let maps = &core.maps;
190
191 let (python_filename, python_binary) = {
193 let map = maps
194 .iter()
195 .find(|m| m.filename().is_some() & m.is_exec())
196 .ok_or_else(|| format_err!("Failed to get binary from coredump"))?;
197 let python_filename = map.filename().unwrap();
198 let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _);
199 info!("Found python binary @ {}", python_filename.display());
200 (python_filename.to_owned(), python_binary)
201 };
202
203 let libpython_binary = {
205 let libmap = maps.iter().find(|m| {
206 if let Some(pathname) = m.filename() {
207 if let Some(pathname) = pathname.to_str() {
208 return is_python_lib(pathname) && m.is_exec();
209 }
210 }
211 false
212 });
213
214 let mut libpython_binary: Option<BinaryInfo> = None;
215 if let Some(libpython) = libmap {
216 if let Some(filename) = &libpython.filename() {
217 info!("Found libpython binary @ {}", filename.display());
218 let parsed =
219 parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?;
220 libpython_binary = Some(parsed);
221 }
222 }
223 libpython_binary
224 };
225
226 let python_binary = match libpython_binary {
228 None => Some(python_binary.context("Failed to parse python binary")?),
229 _ => python_binary.ok(),
230 };
231
232 let python_info = PythonProcessInfo {
233 python_binary,
234 libpython_binary,
235 maps: Box::new(core.maps.clone()),
236 python_filename,
237 dockerized: false,
238 };
239
240 let version =
241 get_python_version(&python_info, &core).context("failed to get python version")?;
242 info!("Got python version {}", version);
243
244 let interpreter_address = get_interpreter_address(&python_info, &core, &version)?;
245 info!("Found interpreter at 0x{:016x}", interpreter_address);
246
247 let config = Config::default();
249 let threadstate_address =
250 get_threadstate_address(interpreter_address, &python_info, &core, &version, &config)?;
251 info!("found threadstate at 0x{:016x}", threadstate_address);
252
253 Ok(PythonCoreDump {
254 core,
255 version,
256 interpreter_address,
257 threadstate_address,
258 })
259 }
260
261 pub fn get_stack(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
262 if config.native {
263 return Err(format_err!(
264 "Native unwinding isn't yet supported with coredumps"
265 ));
266 }
267
268 if config.subprocesses {
269 return Err(format_err!(
270 "Subprocesses can't be used for getting stacktraces from coredumps"
271 ));
272 }
273
274 match self.version {
276 Version {
277 major: 2,
278 minor: 3..=7,
279 ..
280 } => self._get_stack::<v2_7_15::_is>(config),
281 Version {
282 major: 3, minor: 3, ..
283 } => self._get_stack::<v3_3_7::_is>(config),
284 Version {
285 major: 3,
286 minor: 4..=5,
287 ..
288 } => self._get_stack::<v3_5_5::_is>(config),
289 Version {
290 major: 3, minor: 6, ..
291 } => self._get_stack::<v3_6_6::_is>(config),
292 Version {
293 major: 3, minor: 7, ..
294 } => self._get_stack::<v3_7_0::_is>(config),
295 Version {
296 major: 3, minor: 8, ..
297 } => self._get_stack::<v3_8_0::_is>(config),
298 Version {
299 major: 3, minor: 9, ..
300 } => self._get_stack::<v3_9_5::_is>(config),
301 Version {
302 major: 3,
303 minor: 10,
304 ..
305 } => self._get_stack::<v3_10_0::_is>(config),
306 Version {
307 major: 3,
308 minor: 11,
309 ..
310 } => self._get_stack::<v3_11_0::_is>(config),
311 Version {
312 major: 3,
313 minor: 12,
314 ..
315 } => self._get_stack::<v3_12_0::_is>(config),
316 Version {
317 major: 3,
318 minor: 13,
319 ..
320 } => self._get_stack::<v3_13_0::_is>(config),
321 _ => Err(format_err!(
322 "Unsupported version of Python: {}",
323 self.version
324 )),
325 }
326 }
327
328 fn _get_stack<I: InterpreterState>(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
329 let mut traces = get_stack_traces::<I, CoreDump>(
330 self.interpreter_address,
331 &self.core,
332 self.threadstate_address,
333 Some(config),
334 )?;
335 let thread_names = thread_names_from_interpreter::<I, CoreDump>(
336 self.interpreter_address,
337 &self.core,
338 &self.version,
339 )
340 .ok();
341
342 for trace in &mut traces {
343 if let Some(ref thread_names) = thread_names {
344 trace.thread_name = thread_names.get(&trace.thread_id).cloned();
345 }
346
347 for frame in &mut trace.frames {
348 if let Some(locals) = frame.locals.as_mut() {
349 let max_length = (128 * config.dump_locals) as isize;
350 for local in locals {
351 let repr = format_variable::<I, CoreDump>(
352 &self.core,
353 &self.version,
354 local.addr,
355 max_length,
356 );
357 local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned()));
358 }
359 }
360 }
361 }
362 Ok(traces)
363 }
364
365 pub fn print_traces(&self, traces: &Vec<StackTrace>, config: &Config) -> Result<(), Error> {
366 if config.dump_json {
367 println!("{}", serde_json::to_string_pretty(&traces)?);
368 return Ok(());
369 }
370
371 if let Some(status) = self.core.status.first() {
372 println!(
373 "Signal {}: {}",
374 style(status.pr_cursig).bold().yellow(),
375 self.core.filename.display()
376 );
377 }
378
379 if let Some(psinfo) = self.core.psinfo {
380 println!(
381 "Process {}: {}",
382 style(psinfo.pr_pid).bold().yellow(),
383 OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()
384 );
385 }
386 println!("Python v{}", style(&self.version).bold());
387 println!();
388 for trace in traces.iter().rev() {
389 print_trace(trace, false);
390 }
391 Ok(())
392 }
393}
394
395mod elfcore {
396 #[repr(C)]
397 #[derive(Debug, Copy, Clone)]
398 pub struct elf_siginfo {
399 pub si_signo: ::std::os::raw::c_int,
400 pub si_code: ::std::os::raw::c_int,
401 pub si_errno: ::std::os::raw::c_int,
402 }
403
404 #[repr(C)]
405 #[derive(Debug, Copy, Clone)]
406 pub struct timeval {
407 pub tv_sec: ::std::os::raw::c_long,
408 pub tv_usec: ::std::os::raw::c_long,
409 }
410
411 #[repr(C)]
412 #[derive(Debug, Copy, Clone)]
413 pub struct elf_prstatus {
414 pub pr_info: elf_siginfo,
415 pub pr_cursig: ::std::os::raw::c_short,
416 pub pr_sigpend: ::std::os::raw::c_ulong,
417 pub pr_sighold: ::std::os::raw::c_ulong,
418 pub pr_pid: ::std::os::raw::c_int,
419 pub pr_ppid: ::std::os::raw::c_int,
420 pub pr_pgrp: ::std::os::raw::c_int,
421 pub pr_sid: ::std::os::raw::c_int,
422 pub pr_utime: timeval,
423 pub pr_stime: timeval,
424 pub pr_cutime: timeval,
425 pub pr_cstime: timeval,
426 }
429
430 #[repr(C)]
431 #[derive(Debug, Copy, Clone)]
432 pub struct elf_prpsinfo {
433 pub pr_state: ::std::os::raw::c_char,
434 pub pr_sname: ::std::os::raw::c_char,
435 pub pr_zomb: ::std::os::raw::c_char,
436 pub pr_nice: ::std::os::raw::c_char,
437 pub pr_flag: ::std::os::raw::c_ulong,
438 pub pr_uid: ::std::os::raw::c_uint,
439 pub pr_gid: ::std::os::raw::c_uint,
440 pub pr_pid: ::std::os::raw::c_int,
441 pub pr_ppid: ::std::os::raw::c_int,
442 pub pr_pgrp: ::std::os::raw::c_int,
443 pub pr_sid: ::std::os::raw::c_int,
444 pub pr_fname: [::std::os::raw::c_uchar; 16usize],
445 pub pr_psargs: [::std::os::raw::c_uchar; 80usize],
446 }
447}
448
449#[cfg(test)]
450mod test {
451 use super::*;
452 use py_spy_testdata::get_coredump_path;
453
454 #[cfg(target_pointer_width = "64")]
455 #[test]
456 fn test_coredump() {
457 let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap();
461 let version = Version {
462 major: 3,
463 minor: 9,
464 patch: 13,
465 release_flags: "".to_owned(),
466 build_metadata: None,
467 };
468 let python_core = PythonCoreDump {
469 core,
470 version,
471 interpreter_address: 0x000055a8293dbe20,
472 threadstate_address: 0x000055a82745fe18,
473 };
474
475 let config = Config::default();
476 let traces = python_core.get_stack(&config).unwrap();
477
478 assert_eq!(traces.len(), 2);
480
481 let main_thread = &traces[1];
482 assert_eq!(main_thread.frames.len(), 1);
483 assert_eq!(main_thread.frames[0].name, "<module>");
484 assert_eq!(main_thread.thread_name, Some("MainThread".to_owned()));
485
486 let child_thread = &traces[0];
487 assert_eq!(child_thread.frames.len(), 5);
488 assert_eq!(child_thread.frames[0].name, "dump_sum");
489 assert_eq!(child_thread.frames[0].line, 16);
490 assert_eq!(child_thread.thread_name, Some("child_thread".to_owned()));
491 }
492}