use regex::Regex;
#[cfg(windows)]
use regex::RegexBuilder;
#[cfg(windows)]
use std::collections::HashMap;
use std::mem::size_of;
use std::path::Path;
use std::slice;
use anyhow::{Context, Error, Result};
use lazy_static::lazy_static;
use proc_maps::{get_process_maps, MapRange};
#[cfg(not(target_os = "macos"))]
use remoteprocess::Pid;
use remoteprocess::ProcessMemory;
use crate::binary_parser::{parse_binary, BinaryInfo};
use crate::config::Config;
use crate::python_bindings::{
pyruntime, v2_7_15, v3_10_0, v3_11_0, v3_12_0, v3_13_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0,
v3_9_5,
};
use crate::python_interpreters::{InterpreterState, ThreadState};
use crate::stack_trace::get_stack_traces;
use crate::version::Version;
pub struct PythonProcessInfo {
pub python_binary: Option<BinaryInfo>,
pub libpython_binary: Option<BinaryInfo>,
pub maps: Box<dyn ContainsAddr>,
pub python_filename: std::path::PathBuf,
#[cfg(target_os = "linux")]
pub dockerized: bool,
}
impl PythonProcessInfo {
pub fn new(process: &remoteprocess::Process) -> Result<PythonProcessInfo, Error> {
let filename = process
.exe()
.context("Failed to get process executable name. Check that the process is running.")?;
#[cfg(windows)]
let filename = filename.to_lowercase();
#[cfg(windows)]
let is_python_bin = |pathname: &str| pathname.to_lowercase() == filename;
#[cfg(not(windows))]
let is_python_bin = |pathname: &str| pathname == filename;
let maps = get_process_maps(process.pid)?;
info!("Got virtual memory maps from pid {}:", process.pid);
for map in &maps {
debug!(
"map: {:016x}-{:016x} {}{}{} {}",
map.start(),
map.start() + map.size(),
if map.is_read() { 'r' } else { '-' },
if map.is_write() { 'w' } else { '-' },
if map.is_exec() { 'x' } else { '-' },
map.filename()
.unwrap_or(&std::path::PathBuf::from(""))
.display()
);
}
let (python_binary, python_filename) = {
let map = maps.iter().find(|m| {
if let Some(pathname) = m.filename() {
if let Some(pathname) = pathname.to_str() {
#[cfg(not(windows))]
{
return is_python_bin(pathname) && m.is_exec();
}
#[cfg(windows)]
{
return is_python_bin(pathname);
}
}
}
false
});
let map = match map {
Some(map) => map,
None => {
warn!("Failed to find '{}' in virtual memory maps, falling back to first map region", filename);
maps.first().ok_or_else(|| {
format_err!("Failed to get virtual memory maps from process")
})?
}
};
#[cfg(not(target_os = "linux"))]
let filename = std::path::PathBuf::from(filename);
#[cfg(target_os = "linux")]
let filename = std::path::PathBuf::from(format!("/proc/{}/exe", process.pid));
let python_binary = parse_binary(&filename, map.start() as u64, map.size() as u64);
#[cfg(windows)]
let python_binary = python_binary.and_then(|mut pb| {
get_windows_python_symbols(process.pid, &filename, map.start() as u64)
.map(|symbols| {
pb.symbols.extend(symbols);
pb
})
.map_err(|err| err.into())
});
#[cfg(target_os = "macos")]
let python_binary = python_binary.map(|mut pb| {
let offset = pb.symbols["_mh_execute_header"] - map.start() as u64;
for address in pb.symbols.values_mut() {
*address -= offset;
}
if pb.bss_addr != 0 {
pb.bss_addr -= offset;
}
pb
});
(python_binary, filename)
};
let libpython_binary = {
let libmaps: Vec<_> = maps
.iter()
.filter(|m| {
if let Some(pathname) = m.filename() {
if let Some(pathname) = pathname.to_str() {
#[cfg(not(windows))]
{
return is_python_lib(pathname) && m.is_exec();
}
#[cfg(windows)]
{
return is_python_lib(pathname);
}
}
}
false
})
.collect();
let mut libpython_binary: Option<BinaryInfo> = None;
#[cfg(not(target_os = "linux"))]
let libpython_option = if !libmaps.is_empty() {
Some(&libmaps[0])
} else {
None
};
#[cfg(target_os = "linux")]
let libpython_option = libmaps.iter().min_by_key(|m| m.offset);
if let Some(libpython) = libpython_option {
if let Some(filename) = &libpython.filename() {
info!("Found libpython binary @ {}", filename.display());
#[cfg(target_os = "linux")]
let filename = &std::path::PathBuf::from(format!(
"/proc/{}/root{}",
process.pid,
filename.display()
));
#[allow(unused_mut)]
let mut parsed =
parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?;
#[cfg(windows)]
parsed.symbols.extend(get_windows_python_symbols(
process.pid,
filename,
libpython.start() as u64,
)?);
libpython_binary = Some(parsed);
}
}
#[cfg(target_os = "macos")]
{
if libpython_binary.is_none() {
use proc_maps::mac_maps::get_dyld_info;
let dyld_infos = get_dyld_info(process.pid)?;
for dyld in &dyld_infos {
let segname =
unsafe { std::ffi::CStr::from_ptr(dyld.segment.segname.as_ptr()) };
debug!(
"dyld: {:016x}-{:016x} {:10} {}",
dyld.segment.vmaddr,
dyld.segment.vmaddr + dyld.segment.vmsize,
segname.to_string_lossy(),
dyld.filename.display()
);
}
let python_dyld_data = dyld_infos.iter().find(|m| {
if let Some(filename) = m.filename.to_str() {
return is_python_framework(filename)
&& m.segment.segname[0..7] == [95, 95, 68, 65, 84, 65, 0];
}
false
});
if let Some(libpython) = python_dyld_data {
info!(
"Found libpython binary from dyld @ {}",
libpython.filename.display()
);
let mut binary = parse_binary(
&libpython.filename,
libpython.segment.vmaddr,
libpython.segment.vmsize,
)?;
binary.bss_addr = libpython.segment.vmaddr;
binary.bss_size = libpython.segment.vmsize;
libpython_binary = Some(binary);
}
}
}
libpython_binary
};
let python_binary = match libpython_binary {
None => Some(python_binary.context("Failed to parse python binary")?),
_ => python_binary.ok(),
};
#[cfg(target_os = "linux")]
let dockerized = is_dockerized(process.pid).unwrap_or(false);
Ok(PythonProcessInfo {
python_binary,
libpython_binary,
maps: Box::new(maps),
python_filename,
#[cfg(target_os = "linux")]
dockerized,
})
}
pub fn get_symbol(&self, symbol: &str) -> Option<&u64> {
if let Some(ref pb) = self.python_binary {
if let Some(addr) = pb.symbols.get(symbol) {
info!("got symbol {} (0x{:016x}) from python binary", symbol, addr);
return Some(addr);
}
}
if let Some(ref binary) = self.libpython_binary {
if let Some(addr) = binary.symbols.get(symbol) {
info!(
"got symbol {} (0x{:016x}) from libpython binary",
symbol, addr
);
return Some(addr);
}
}
None
}
}
pub fn get_python_version<P>(python_info: &PythonProcessInfo, process: &P) -> Result<Version, Error>
where
P: ProcessMemory,
{
if let Some(&addr) = python_info
.get_symbol("Py_GetVersion.version")
.or_else(|| python_info.get_symbol("version"))
{
info!("Getting version from symbol address");
if let Ok(bytes) = process.copy(addr as usize, 128) {
if let Ok(version) = Version::scan_bytes(&bytes) {
return Ok(version);
}
}
}
if let Some(ref pb) = python_info.python_binary {
info!("Getting version from python binary BSS");
let bss = process.copy(pb.bss_addr as usize, pb.bss_size as usize)?;
match Version::scan_bytes(&bss) {
Ok(version) => return Ok(version),
Err(err) => info!("Failed to get version from BSS section: {}", err),
}
}
if let Some(ref libpython) = python_info.libpython_binary {
info!("Getting version from libpython BSS");
let bss = process.copy(libpython.bss_addr as usize, libpython.bss_size as usize)?;
match Version::scan_bytes(&bss) {
Ok(version) => return Ok(version),
Err(err) => info!("Failed to get version from libpython BSS section: {}", err),
}
}
info!(
"Trying to get version from path: {}",
python_info.python_filename.display()
);
let path = Path::new(&python_info.python_filename);
if let Some(python) = path.file_name() {
if let Some(python) = python.to_str() {
if let Some(stripped_python) = python.strip_prefix("python") {
let tokens: Vec<&str> = stripped_python.split('.').collect();
if tokens.len() >= 2 {
if let (Ok(major), Ok(minor)) =
(tokens[0].parse::<u64>(), tokens[1].parse::<u64>())
{
return Ok(Version {
major,
minor,
patch: 0,
release_flags: "".to_owned(),
build_metadata: None,
});
}
}
}
}
}
Err(format_err!(
"Failed to find python version from target process"
))
}
pub fn get_interpreter_address<P>(
python_info: &PythonProcessInfo,
process: &P,
version: &Version,
) -> Result<usize, Error>
where
P: ProcessMemory,
{
match version {
Version {
major: 3,
minor: 13,
..
} => {
if let Some(&addr) = python_info.get_symbol("_PyRuntime") {
let debug_offsets: v3_13_0::_Py_DebugOffsets =
process.copy_struct(addr as usize)?;
let addr = process.copy_struct(
addr as usize + debug_offsets.runtime_state.interpreters_head as usize,
)?;
match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) {
Ok(addr) => return Ok(addr),
Err(_) => {
warn!(
"Interpreter address from _PyRuntime symbol is invalid {:016x}",
addr
);
}
};
}
}
Version {
major: 3,
minor: 7..=12,
..
} => {
if let Some(&addr) = python_info.get_symbol("_PyRuntime") {
let addr = process
.copy_struct(addr as usize + pyruntime::get_interp_head_offset(version))?;
match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) {
Ok(addr) => return Ok(addr),
Err(_) => {
warn!(
"Interpreter address from _PyRuntime symbol is invalid {:016x}",
addr
);
}
};
}
}
_ => {
if let Some(&addr) = python_info.get_symbol("interp_head") {
let addr = process.copy_struct(addr as usize)?;
match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) {
Ok(addr) => return Ok(addr),
Err(_) => {
warn!(
"Interpreter address from interp_head symbol is invalid {:016x}",
addr
);
}
};
}
}
};
info!("Failed to find runtime address from symbols, scanning BSS section from main binary");
let err = if let Some(ref pb) = python_info.python_binary {
match get_interpreter_address_from_binary(pb, &*python_info.maps, process, version) {
Ok(addr) => return Ok(addr),
err => Some(err),
}
} else {
None
};
if let Some(ref lpb) = python_info.libpython_binary {
info!("Failed to get interpreter from binary BSS, scanning libpython BSS");
match get_interpreter_address_from_binary(lpb, &*python_info.maps, process, version) {
Ok(addr) => Ok(addr),
lib_err => err.unwrap_or(lib_err),
}
} else {
err.expect("Both python and libpython are invalid.")
}
}
fn get_interpreter_address_from_binary<P>(
binary: &BinaryInfo,
maps: &dyn ContainsAddr,
process: &P,
version: &Version,
) -> Result<usize, Error>
where
P: ProcessMemory,
{
if binary.pyruntime_addr != 0 {
let bss = process.copy(
binary.pyruntime_addr as usize,
binary.pyruntime_size as usize,
)?;
#[allow(clippy::cast_ptr_alignment)]
let addrs = unsafe {
slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::<usize>())
};
if let Ok(addr) = check_interpreter_addresses(addrs, maps, process, version) {
return Ok(addr);
}
}
let bss = process.copy(binary.bss_addr as usize, binary.bss_size as usize)?;
#[allow(clippy::cast_ptr_alignment)]
let addrs = unsafe {
slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::<usize>())
};
check_interpreter_addresses(addrs, maps, process, version)
}
fn check_interpreter_addresses<P>(
addrs: &[usize],
maps: &dyn ContainsAddr,
process: &P,
version: &Version,
) -> Result<usize, Error>
where
P: ProcessMemory,
{
fn check<I, P>(addrs: &[usize], maps: &dyn ContainsAddr, process: &P) -> Result<usize, Error>
where
I: InterpreterState,
P: ProcessMemory,
{
for &addr in addrs {
if maps.contains_addr(addr) {
let threadstate_ptr_ptr = I::threadstate_ptr_ptr(addr);
let maybe_threads = process
.copy_struct(threadstate_ptr_ptr as usize)
.context("Failed to copy PyThreadState head pointer");
let threads: *const I::ThreadState = match maybe_threads {
Ok(threads) => threads,
Err(_) => continue,
};
if maps.contains_addr(threads as usize) {
let thread = match process.copy_pointer(threads) {
Ok(thread) => thread,
Err(_) => continue,
};
if thread.interp() as usize == addr
&& get_stack_traces::<I, P>(addr, process, 0, None).is_ok()
{
return Ok(addr);
}
}
}
}
Err(format_err!(
"Failed to find a python interpreter in the .data section"
))
}
match version {
Version {
major: 2,
minor: 3..=7,
..
} => check::<v2_7_15::_is, P>(addrs, maps, process),
Version {
major: 3, minor: 3, ..
} => check::<v3_3_7::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 4..=5,
..
} => check::<v3_5_5::_is, P>(addrs, maps, process),
Version {
major: 3, minor: 6, ..
} => check::<v3_6_6::_is, P>(addrs, maps, process),
Version {
major: 3, minor: 7, ..
} => check::<v3_7_0::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 8,
patch: 0,
..
} => match version.release_flags.as_ref() {
"a1" | "a2" | "a3" => check::<v3_7_0::_is, P>(addrs, maps, process),
_ => check::<v3_8_0::_is, P>(addrs, maps, process),
},
Version {
major: 3, minor: 8, ..
} => check::<v3_8_0::_is, P>(addrs, maps, process),
Version {
major: 3, minor: 9, ..
} => check::<v3_9_5::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 10,
..
} => check::<v3_10_0::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 11,
..
} => check::<v3_11_0::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 12,
..
} => check::<v3_12_0::_is, P>(addrs, maps, process),
Version {
major: 3,
minor: 13,
..
} => check::<v3_13_0::_is, P>(addrs, maps, process),
_ => Err(format_err!("Unsupported version of Python: {}", version)),
}
}
pub fn get_threadstate_address<P>(
interpreter_address: usize,
python_info: &PythonProcessInfo,
process: &P,
version: &Version,
config: &Config,
) -> Result<usize, Error>
where
P: ProcessMemory,
{
let threadstate_address = match version {
Version {
major: 3,
minor: 13,
..
} => {
let gil_ptr = interpreter_address + std::mem::offset_of!(v3_13_0::_is, ceval.gil);
let gil = process.copy_struct::<usize>(gil_ptr)?;
gil
}
Version {
major: 3,
minor: 12,
..
} => {
let gil_ptr = interpreter_address + std::mem::offset_of!(v3_12_0::_is, ceval.gil);
let gil: usize = process.copy_struct(gil_ptr)?;
gil
}
Version {
major: 3,
minor: 7..=11,
..
} => match python_info.get_symbol("_PyRuntime") {
Some(&addr) => {
if let Some(offset) = pyruntime::get_tstate_current_offset(version) {
info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}",
addr, offset);
addr as usize + offset
} else {
error_if_gil(
config,
version,
"unknown pyruntime.gilstate.tstate_current offset",
)?;
0
}
}
None => {
error_if_gil(config, version, "failed to find _PyRuntime symbol")?;
0
}
},
_ => match python_info.get_symbol("_PyThreadState_Current") {
Some(&addr) => {
info!("Found _PyThreadState_Current @ 0x{:016x}", addr);
addr as usize
}
None => {
error_if_gil(
config,
version,
"failed to find _PyThreadState_Current symbol",
)?;
0
}
},
};
Ok(threadstate_address)
}
fn error_if_gil(config: &Config, version: &Version, msg: &str) -> Result<(), Error> {
lazy_static! {
static ref WARNED: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
}
if config.gil_only {
if !WARNED.load(std::sync::atomic::Ordering::Relaxed) {
eprintln!(
"Cannot detect GIL holding in version '{}' on the current platform (reason: {})",
version, msg
);
eprintln!("Please open an issue in https://github.com/benfred/py-spy with the Python version and your platform.");
WARNED.store(true, std::sync::atomic::Ordering::Relaxed);
}
Err(format_err!(
"Cannot detect GIL holding in version '{}' on the current platform (reason: {})",
version,
msg
))
} else {
warn!("Unable to detect GIL usage: {}", msg);
Ok(())
}
}
pub trait ContainsAddr {
fn contains_addr(&self, addr: usize) -> bool;
}
impl ContainsAddr for Vec<MapRange> {
#[cfg(windows)]
fn contains_addr(&self, _addr: usize) -> bool {
true
}
#[cfg(not(windows))]
fn contains_addr(&self, addr: usize) -> bool {
proc_maps::maps_contain_addr(addr, self)
}
}
#[cfg(target_os = "linux")]
fn is_dockerized(pid: Pid) -> Result<bool, Error> {
let self_mnt = std::fs::read_link("/proc/self/ns/mnt")?;
let target_mnt = std::fs::read_link(format!("/proc/{}/ns/mnt", pid))?;
Ok(self_mnt != target_mnt)
}
#[cfg(windows)]
pub fn get_windows_python_symbols(
pid: Pid,
filename: &Path,
offset: u64,
) -> std::io::Result<HashMap<String, u64>> {
use proc_maps::win_maps::SymbolLoader;
let handler = SymbolLoader::new(pid)?;
let _module = handler.load_module(filename)?;
let mut ret = HashMap::new();
for symbol in ["_PyThreadState_Current", "interp_head", "_PyRuntime"].iter() {
if let Ok((base, addr)) = handler.address_from_name(symbol) {
let addr = if base == 0 {
addr
} else {
offset + addr - base
};
ret.insert(String::from(*symbol), addr);
}
}
Ok(ret)
}
#[cfg(any(target_os = "linux", target_os = "freebsd"))]
pub fn is_python_lib(pathname: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.so").unwrap();
}
RE.is_match(pathname)
}
#[cfg(target_os = "macos")]
pub fn is_python_lib(pathname: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.(dylib|so)$").unwrap();
}
RE.is_match(pathname) || is_python_framework(pathname)
}
#[cfg(windows)]
pub fn is_python_lib(pathname: &str) -> bool {
lazy_static! {
static ref RE: Regex = RegexBuilder::new(r"\\python\d\d\d?(m|d|u)?.dll$")
.case_insensitive(true)
.build()
.unwrap();
}
RE.is_match(pathname)
}
#[cfg(target_os = "macos")]
pub fn is_python_framework(pathname: &str) -> bool {
pathname.ends_with("/Python") && !pathname.contains("Python.app")
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(target_os = "macos")]
#[test]
fn test_is_python_lib() {
assert!(is_python_lib("~/Anaconda2/lib/libpython2.7.dylib"));
assert!(is_python_lib("/lib/libpython3.4d.dylib"));
assert!(is_python_lib("/usr/local/lib/libpython3.8m.dylib"));
assert!(is_python_lib("./libpython2.7u.dylib"));
assert!(!is_python_lib("/libboost_python.dylib"));
assert!(!is_python_lib("/lib/heapq.cpython-36m-darwin.dylib"));
}
#[cfg(any(target_os = "linux", target_os = "freebsd"))]
#[test]
fn test_is_python_lib() {
assert!(is_python_lib("/tmp/_MEIOqzg01/libpython2.7.so.1.0"));
assert!(is_python_lib("./libpython2.7.so"));
assert!(is_python_lib("/usr/lib/libpython3.4d.so"));
assert!(is_python_lib("/usr/local/lib/libpython3.8m.so"));
assert!(is_python_lib("/usr/lib/libpython2.7u.so"));
assert!(!is_python_lib("/usr/lib/libboost_python.so"));
assert!(!is_python_lib(
"/usr/lib/x86_64-linux-gnu/libboost_python-py27.so.1.58.0"
));
assert!(!is_python_lib("/usr/lib/libboost_python-py35.so"));
}
#[cfg(windows)]
#[test]
fn test_is_python_lib() {
assert!(is_python_lib(
"C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.dll"
));
assert!(is_python_lib(
"C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.DLL"
));
}
#[cfg(target_os = "macos")]
#[test]
fn test_python_frameworks() {
assert!(!is_python_framework("/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python"));
assert!(is_python_framework(
"/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Python"
));
assert!(!is_python_framework("/System/Library/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python"));
assert!(is_python_framework(
"/System/Library/Frameworks/Python.framework/Versions/2.7/Python"
));
assert!(is_python_framework(
"/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python"
));
assert!(!is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Resources/Python.app/Contents/MacOS/Python"));
assert!(is_python_framework(
"/private/var/folders/3x/qy479lpd1fb2q88lc9g4d3kr0000gn/T/_MEI2Akvi8/Python"
));
}
}