1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::fs::File;
4use std::io::Read;
5use std::os::unix::ffi::OsStrExt;
6use std::path::Path;
7use std::path::PathBuf;
8
9use anyhow::{Context, Error, Result};
10use console::style;
11use log::info;
12use remoteprocess::ProcessMemory;
13
14use crate::binary_parser::{parse_binary, BinaryInfo};
15use crate::config::Config;
16use crate::dump::print_trace;
17use crate::python_bindings::{
18 v2_7_15, v3_10_0, v3_11_0, v3_12_0, v3_13_0, v3_14_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0,
19 v3_9_5,
20};
21use crate::python_data_access::format_variable;
22use crate::python_interpreters::InterpreterState;
23use crate::python_process_info::{
24 get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib,
25 ContainsAddr, PythonProcessInfo,
26};
27use crate::python_threading::thread_names_from_interpreter;
28use crate::stack_trace::{get_stack_traces, StackTrace};
29use crate::version::Version;
30
31#[derive(Debug, Clone)]
32pub struct CoreMapRange {
33 pub pathname: Option<PathBuf>,
34 pub segment: goblin::elf::ProgramHeader,
35}
36
37impl CoreMapRange {
40 pub fn size(&self) -> usize {
41 self.segment.p_memsz as usize
42 }
43 pub fn start(&self) -> usize {
44 self.segment.p_vaddr as usize
45 }
46 pub fn filename(&self) -> Option<&Path> {
47 self.pathname.as_deref()
48 }
49 pub fn is_exec(&self) -> bool {
50 self.segment.is_executable()
51 }
52 pub fn is_write(&self) -> bool {
53 self.segment.is_write()
54 }
55 pub fn is_read(&self) -> bool {
56 self.segment.is_read()
57 }
58}
59
60impl ContainsAddr for Vec<CoreMapRange> {
61 fn contains_addr(&self, addr: usize) -> bool {
62 self.iter()
63 .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size())))
64 }
65}
66
67pub struct CoreDump {
68 filename: PathBuf,
69 contents: Vec<u8>,
70 maps: Vec<CoreMapRange>,
71 psinfo: Option<elfcore::elf_prpsinfo>,
72 status: Vec<elfcore::elf_prstatus>,
73}
74
75impl CoreDump {
76 pub fn new<P: AsRef<Path>>(filename: P) -> Result<CoreDump, Error> {
77 let filename = filename.as_ref();
78 let mut file = File::open(filename)?;
79 let mut contents = Vec::new();
80 file.read_to_end(&mut contents)?;
81 let elf = goblin::elf::Elf::parse(&contents)?;
82
83 let notes = elf
84 .iter_note_headers(&contents)
85 .ok_or_else(|| format_err!("no note segment found"))?;
86
87 let mut filenames = HashMap::new();
88 let mut psinfo = None;
89 let mut status = Vec::new();
90 for note in notes.flatten() {
91 if note.n_type == goblin::elf::note::NT_PRPSINFO {
92 psinfo = Some(unsafe {
93 std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prpsinfo)
94 });
95 } else if note.n_type == goblin::elf::note::NT_PRSTATUS {
96 let thread_status: elfcore::elf_prstatus = unsafe {
97 std::ptr::read_unaligned(note.desc.as_ptr() as *const elfcore::elf_prstatus)
98 };
99 status.push(thread_status);
100 } else if note.n_type == goblin::elf::note::NT_FILE {
101 let data = note.desc;
102 let ptrs = data.as_ptr() as *const usize;
103
104 let count = unsafe { std::ptr::read_unaligned(ptrs) };
105 let _page_size = unsafe { std::ptr::read_unaligned(ptrs.offset(1)) };
106
107 let string_table = &data[(std::mem::size_of::<usize>() * (2 + count * 3))..];
108
109 for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() {
110 if i < count {
111 let i = i as isize;
112 let start = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 2)) };
113 let _end = unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 3)) };
114 let _page_offset =
115 unsafe { std::ptr::read_unaligned(ptrs.offset(i * 3 + 4)) };
116
117 let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf();
118 filenames.insert(start, pathname);
119 }
120 }
121 }
122 }
123
124 let mut maps = Vec::new();
125 for ph in elf.program_headers {
126 if ph.p_type == goblin::elf::program_header::PT_LOAD {
127 let pathname = filenames.get(&(ph.p_vaddr as _));
128 let map = CoreMapRange {
129 pathname: pathname.cloned(),
130 segment: ph,
131 };
132 info!(
133 "map: {:016x}-{:016x} {}{}{} {}",
134 map.start(),
135 map.start() + map.size(),
136 if map.is_read() { 'r' } else { '-' },
137 if map.is_write() { 'w' } else { '-' },
138 if map.is_exec() { 'x' } else { '-' },
139 map.filename()
140 .unwrap_or(&std::path::PathBuf::from(""))
141 .display()
142 );
143
144 maps.push(map);
145 }
146 }
147
148 Ok(CoreDump {
149 filename: filename.to_owned(),
150 contents,
151 maps,
152 psinfo,
153 status,
154 })
155 }
156}
157
158impl ProcessMemory for CoreDump {
159 fn read(&self, addr: usize, buf: &mut [u8]) -> Result<(), remoteprocess::Error> {
160 let start = addr as u64;
161 let _end = (addr + buf.len()) as u64;
162
163 for map in &self.maps {
164 let ph = &map.segment;
168 if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) {
169 let offset = (start - ph.p_vaddr + ph.p_offset) as usize;
170 buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]);
171 return Ok(());
172 }
173 }
174
175 let io_error = std::io::Error::from_raw_os_error(libc::EFAULT);
176 Err(remoteprocess::Error::IOError(io_error))
177 }
178}
179
180pub struct PythonCoreDump {
181 core: CoreDump,
182 version: Version,
183 interpreter_address: usize,
184 threadstate_address: usize,
185}
186
187impl PythonCoreDump {
188 pub fn new<P: AsRef<Path>>(filename: P) -> Result<PythonCoreDump, Error> {
189 let core = CoreDump::new(filename)?;
190 let maps = &core.maps;
191
192 let (python_filename, python_binary) = {
194 let map = maps
195 .iter()
196 .find(|m| m.filename().is_some() & m.is_exec())
197 .ok_or_else(|| format_err!("Failed to get binary from coredump"))?;
198 let python_filename = map.filename().unwrap();
199 let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _);
200 info!("Found python binary @ {}", python_filename.display());
201 (python_filename.to_owned(), python_binary)
202 };
203
204 let libpython_binary = {
206 let libmap = maps.iter().find(|m| {
207 if let Some(pathname) = m.filename() {
208 if let Some(pathname) = pathname.to_str() {
209 return is_python_lib(pathname) && m.is_exec();
210 }
211 }
212 false
213 });
214
215 let mut libpython_binary: Option<BinaryInfo> = None;
216 if let Some(libpython) = libmap {
217 if let Some(filename) = &libpython.filename() {
218 info!("Found libpython binary @ {}", filename.display());
219 let parsed =
220 parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?;
221 libpython_binary = Some(parsed);
222 }
223 }
224 libpython_binary
225 };
226
227 let python_binary = match libpython_binary {
229 None => Some(python_binary.context("Failed to parse python binary")?),
230 _ => python_binary.ok(),
231 };
232
233 let python_info = PythonProcessInfo {
234 python_binary,
235 libpython_binary,
236 maps: Box::new(core.maps.clone()),
237 python_filename,
238 dockerized: false,
239 };
240
241 let version =
242 get_python_version(&python_info, &core).context("failed to get python version")?;
243 info!("Got python version {}", version);
244
245 let interpreter_address = get_interpreter_address(&python_info, &core, &version)?;
246 info!("Found interpreter at 0x{:016x}", interpreter_address);
247
248 let config = Config::default();
250 let threadstate_address =
251 get_threadstate_address(interpreter_address, &python_info, &core, &version, &config)?;
252 info!("found threadstate at 0x{:016x}", threadstate_address);
253
254 Ok(PythonCoreDump {
255 core,
256 version,
257 interpreter_address,
258 threadstate_address,
259 })
260 }
261
262 pub fn get_stack(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
263 if config.native {
264 return Err(format_err!(
265 "Native unwinding isn't yet supported with coredumps"
266 ));
267 }
268
269 if config.subprocesses {
270 return Err(format_err!(
271 "Subprocesses can't be used for getting stacktraces from coredumps"
272 ));
273 }
274
275 match self.version {
277 Version {
278 major: 2,
279 minor: 3..=7,
280 ..
281 } => self._get_stack::<v2_7_15::_is>(config),
282 Version {
283 major: 3, minor: 3, ..
284 } => self._get_stack::<v3_3_7::_is>(config),
285 Version {
286 major: 3,
287 minor: 4..=5,
288 ..
289 } => self._get_stack::<v3_5_5::_is>(config),
290 Version {
291 major: 3, minor: 6, ..
292 } => self._get_stack::<v3_6_6::_is>(config),
293 Version {
294 major: 3, minor: 7, ..
295 } => self._get_stack::<v3_7_0::_is>(config),
296 Version {
297 major: 3, minor: 8, ..
298 } => self._get_stack::<v3_8_0::_is>(config),
299 Version {
300 major: 3, minor: 9, ..
301 } => self._get_stack::<v3_9_5::_is>(config),
302 Version {
303 major: 3,
304 minor: 10,
305 ..
306 } => self._get_stack::<v3_10_0::_is>(config),
307 Version {
308 major: 3,
309 minor: 11,
310 ..
311 } => self._get_stack::<v3_11_0::_is>(config),
312 Version {
313 major: 3,
314 minor: 12,
315 ..
316 } => self._get_stack::<v3_12_0::_is>(config),
317 Version {
318 major: 3,
319 minor: 13,
320 ..
321 } => self._get_stack::<v3_13_0::_is>(config),
322 Version {
323 major: 3,
324 minor: 14,
325 ..
326 } => self._get_stack::<v3_14_0::_is>(config),
327 _ => Err(format_err!(
328 "Unsupported version of Python: {}",
329 self.version
330 )),
331 }
332 }
333
334 fn _get_stack<I: InterpreterState>(&self, config: &Config) -> Result<Vec<StackTrace>, Error> {
335 let mut traces = get_stack_traces::<I, CoreDump>(
336 self.interpreter_address,
337 &self.core,
338 self.threadstate_address,
339 Some(config),
340 )?;
341 let thread_names = thread_names_from_interpreter::<I, CoreDump>(
342 self.interpreter_address,
343 &self.core,
344 &self.version,
345 )
346 .ok();
347
348 for trace in &mut traces {
349 if let Some(ref thread_names) = thread_names {
350 trace.thread_name = thread_names.get(&trace.thread_id).cloned();
351 }
352
353 for frame in &mut trace.frames {
354 if let Some(locals) = frame.locals.as_mut() {
355 let max_length = (128 * config.dump_locals) as isize;
356 for local in locals {
357 let repr = format_variable::<I, CoreDump>(
358 &self.core,
359 &self.version,
360 local.addr,
361 max_length,
362 );
363 local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned()));
364 }
365 }
366 }
367 }
368 Ok(traces)
369 }
370
371 pub fn print_traces(&self, traces: &Vec<StackTrace>, config: &Config) -> Result<(), Error> {
372 if config.dump_json {
373 println!("{}", serde_json::to_string_pretty(&traces)?);
374 return Ok(());
375 }
376
377 if let Some(status) = self.core.status.first() {
378 println!(
379 "Signal {}: {}",
380 style(status.pr_cursig).bold().yellow(),
381 self.core.filename.display()
382 );
383 }
384
385 if let Some(psinfo) = self.core.psinfo {
386 println!(
387 "Process {}: {}",
388 style(psinfo.pr_pid).bold().yellow(),
389 OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()
390 );
391 }
392 println!("Python v{}", style(&self.version).bold());
393 println!();
394 for trace in traces.iter().rev() {
395 print_trace(trace, false);
396 }
397 Ok(())
398 }
399}
400
401mod elfcore {
402 #[repr(C)]
403 #[derive(Debug, Copy, Clone)]
404 pub struct elf_siginfo {
405 pub si_signo: ::std::os::raw::c_int,
406 pub si_code: ::std::os::raw::c_int,
407 pub si_errno: ::std::os::raw::c_int,
408 }
409
410 #[repr(C)]
411 #[derive(Debug, Copy, Clone)]
412 pub struct timeval {
413 pub tv_sec: ::std::os::raw::c_long,
414 pub tv_usec: ::std::os::raw::c_long,
415 }
416
417 #[repr(C)]
418 #[derive(Debug, Copy, Clone)]
419 pub struct elf_prstatus {
420 pub pr_info: elf_siginfo,
421 pub pr_cursig: ::std::os::raw::c_short,
422 pub pr_sigpend: ::std::os::raw::c_ulong,
423 pub pr_sighold: ::std::os::raw::c_ulong,
424 pub pr_pid: ::std::os::raw::c_int,
425 pub pr_ppid: ::std::os::raw::c_int,
426 pub pr_pgrp: ::std::os::raw::c_int,
427 pub pr_sid: ::std::os::raw::c_int,
428 pub pr_utime: timeval,
429 pub pr_stime: timeval,
430 pub pr_cutime: timeval,
431 pub pr_cstime: timeval,
432 }
435
436 #[repr(C)]
437 #[derive(Debug, Copy, Clone)]
438 pub struct elf_prpsinfo {
439 pub pr_state: ::std::os::raw::c_char,
440 pub pr_sname: ::std::os::raw::c_char,
441 pub pr_zomb: ::std::os::raw::c_char,
442 pub pr_nice: ::std::os::raw::c_char,
443 pub pr_flag: ::std::os::raw::c_ulong,
444 pub pr_uid: ::std::os::raw::c_uint,
445 pub pr_gid: ::std::os::raw::c_uint,
446 pub pr_pid: ::std::os::raw::c_int,
447 pub pr_ppid: ::std::os::raw::c_int,
448 pub pr_pgrp: ::std::os::raw::c_int,
449 pub pr_sid: ::std::os::raw::c_int,
450 pub pr_fname: [::std::os::raw::c_uchar; 16usize],
451 pub pr_psargs: [::std::os::raw::c_uchar; 80usize],
452 }
453}
454
455#[cfg(test)]
456mod test {
457 use super::*;
458 use py_spy_testdata::get_coredump_path;
459
460 #[cfg(target_pointer_width = "64")]
461 #[test]
462 fn test_coredump() {
463 let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap();
467 let version = Version {
468 major: 3,
469 minor: 9,
470 patch: 13,
471 release_flags: "".to_owned(),
472 build_metadata: None,
473 };
474 let python_core = PythonCoreDump {
475 core,
476 version,
477 interpreter_address: 0x000055a8293dbe20,
478 threadstate_address: 0x000055a82745fe18,
479 };
480
481 let config = Config::default();
482 let traces = python_core.get_stack(&config).unwrap();
483
484 assert_eq!(traces.len(), 2);
486
487 let main_thread = &traces[1];
488 assert_eq!(main_thread.frames.len(), 1);
489 assert_eq!(main_thread.frames[0].name, "<module>");
490 assert_eq!(main_thread.thread_name, Some("MainThread".to_owned()));
491
492 let child_thread = &traces[0];
493 assert_eq!(child_thread.frames.len(), 5);
494 assert_eq!(child_thread.frames[0].name, "dump_sum");
495 assert_eq!(child_thread.frames[0].line, 16);
496 assert_eq!(child_thread.thread_name, Some("child_thread".to_owned()));
497 }
498}