use std::borrow::Cow;
use std::fs;
use std::io::IoSliceMut;
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use anyhow::{Context, Result, anyhow, bail};
use gimli::{AttributeValue, EndianSlice, LittleEndian, Reader, Unit};
use goblin::elf::Elf;
use nix::sys::ptrace;
use nix::sys::ptrace::Options;
#[cfg(target_arch = "x86_64")]
use nix::sys::ptrace::regset::NT_PRSTATUS;
use nix::sys::uio::{RemoteIoVec, process_vm_readv};
use nix::sys::wait::{WaitPidFlag, WaitStatus, waitpid};
use nix::unistd::Pid;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ThreadCounters {
pub allocated_bytes: u64,
pub deallocated_bytes: u64,
}
#[derive(Debug, Clone)]
pub struct JemallocProbe {
symbol: TsdTlsSymbol,
offsets: CounterOffsets,
}
#[derive(Debug)]
#[non_exhaustive]
pub enum AttachError {
PidMissing(anyhow::Error),
ReadlinkFailure(anyhow::Error),
MapsReadFailure(anyhow::Error),
JemallocNotFound(anyhow::Error),
JemallocInDso(anyhow::Error),
ArchMismatch(anyhow::Error),
DwarfParseFailure(anyhow::Error),
}
impl AttachError {
pub fn source(&self) -> &anyhow::Error {
match self {
Self::PidMissing(e)
| Self::ReadlinkFailure(e)
| Self::MapsReadFailure(e)
| Self::JemallocNotFound(e)
| Self::JemallocInDso(e)
| Self::ArchMismatch(e)
| Self::DwarfParseFailure(e) => e,
}
}
pub fn tag(&self) -> &'static str {
match self {
Self::PidMissing(_) => "pid-missing",
Self::ReadlinkFailure(_) => "readlink-failure",
Self::MapsReadFailure(_) => "maps-read-failure",
Self::JemallocNotFound(_) => "jemalloc-not-found",
Self::JemallocInDso(_) => "jemalloc-in-dso",
Self::ArchMismatch(_) => "arch-mismatch",
Self::DwarfParseFailure(_) => "dwarf-parse-failure",
}
}
}
impl std::fmt::Display for AttachError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{}] {:#}", self.tag(), self.source())
}
}
impl std::error::Error for AttachError {}
#[derive(Debug)]
#[non_exhaustive]
pub enum ProbeError {
PtraceSeize(anyhow::Error),
PtraceInterrupt(anyhow::Error),
Waitpid(anyhow::Error),
GetRegset(anyhow::Error),
ProcessVmReadv(anyhow::Error),
TlsArithmetic(anyhow::Error),
}
impl ProbeError {
pub fn source(&self) -> &anyhow::Error {
match self {
Self::PtraceSeize(e)
| Self::PtraceInterrupt(e)
| Self::Waitpid(e)
| Self::GetRegset(e)
| Self::ProcessVmReadv(e)
| Self::TlsArithmetic(e) => e,
}
}
pub fn tag(&self) -> &'static str {
match self {
Self::PtraceSeize(_) => "ptrace-seize",
Self::PtraceInterrupt(_) => "ptrace-interrupt",
Self::Waitpid(_) => "waitpid",
Self::GetRegset(_) => "get-regset",
Self::ProcessVmReadv(_) => "process-vm-readv",
Self::TlsArithmetic(_) => "tls-arithmetic",
}
}
}
impl std::fmt::Display for ProbeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{}] {:#}", self.tag(), self.source())
}
}
impl std::error::Error for ProbeError {}
#[allow(dead_code)]
pub fn attach_jemalloc(pid: i32) -> std::result::Result<JemallocProbe, AttachError> {
attach_jemalloc_at(Path::new("/proc"), pid)
}
pub fn attach_jemalloc_at(
proc_root: &Path,
pid: i32,
) -> std::result::Result<JemallocProbe, AttachError> {
let pid_dir = proc_root.join(pid.to_string());
if !pid_dir.exists() {
return Err(AttachError::PidMissing(anyhow!(
"{} does not exist",
pid_dir.display(),
)));
}
let (symbol, offsets) = find_jemalloc_via_maps_at(proc_root, pid)?;
Ok(JemallocProbe { symbol, offsets })
}
const WAIT_FOR_STOP_TIMEOUT: Duration = Duration::from_millis(250);
const WAIT_FOR_STOP_POLL_INTERVAL: Duration = Duration::from_millis(1);
pub fn probe_thread(
probe: &JemallocProbe,
tid: i32,
) -> std::result::Result<ThreadCounters, ProbeError> {
probe_thread_with_cache(probe, tid, None).map(|(c, _)| c)
}
pub fn probe_thread_with_cache(
probe: &JemallocProbe,
tid: i32,
cached_tp: Option<u64>,
) -> std::result::Result<(ThreadCounters, u64), ProbeError> {
let pid = Pid::from_raw(tid);
let (thread_pointer, _detach) = match cached_tp {
Some(tp) => (tp, None),
None => {
ptrace::seize(pid, Options::empty()).map_err(|e| {
ProbeError::PtraceSeize(anyhow!("ptrace(PTRACE_SEIZE) on tid {tid}: {e}"))
})?;
let guard = ScopeDetach(tid);
ptrace::interrupt(pid).map_err(|e| {
ProbeError::PtraceInterrupt(anyhow!("ptrace(PTRACE_INTERRUPT) on tid {tid}: {e}"))
})?;
wait_for_stop(pid, tid)?;
let tp = arch::read_thread_pointer_ptrace(pid).map_err(|e| {
ProbeError::GetRegset(anyhow!(
"ptrace(PTRACE_GETREGSET, {}) on tid {tid}: {e}",
arch::REGSET_NAME,
))
})?;
(tp, Some(guard))
}
};
let counters =
read_counters_at_thread_pointer(thread_pointer, &probe.symbol, &probe.offsets, tid)?;
Ok((counters, thread_pointer))
}
fn wait_for_stop(pid: Pid, tid: i32) -> std::result::Result<(), ProbeError> {
let deadline = Instant::now() + WAIT_FOR_STOP_TIMEOUT;
loop {
match waitpid(pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::Stopped(_, _) | WaitStatus::PtraceEvent(_, _, _)) => return Ok(()),
Ok(WaitStatus::StillAlive) => {
if Instant::now() >= deadline {
return Err(ProbeError::Waitpid(anyhow!(
"waitpid on tid {tid} did not observe the post-interrupt \
stop within {:?}; the target may have a conflicting \
tracer or the kernel delayed signal delivery beyond \
the budget",
WAIT_FOR_STOP_TIMEOUT,
)));
}
std::thread::sleep(WAIT_FOR_STOP_POLL_INTERVAL);
}
Ok(other) => {
return Err(ProbeError::Waitpid(anyhow!(
"waitpid on tid {tid} returned unexpected status: {other:?}"
)));
}
Err(e) => return Err(ProbeError::Waitpid(anyhow!("waitpid on tid {tid}: {e}"))),
}
}
}
fn is_jemalloc_tsd_tls_symbol(name: &str) -> bool {
if name == "tsd_tls" {
return true;
}
name.len() > "_tsd_tls".len() && name.ends_with("_tsd_tls")
}
const TSD_STRUCT_NAME: &str = "tsd_s";
macro_rules! tsd_mangle_prefix {
() => {
"cant_access_tsd_items_directly_use_a_getter_or_setter_"
};
}
#[allow(dead_code)]
const TSD_MANGLE_PREFIX: &str = tsd_mangle_prefix!();
const ALLOCATED_FIELD: &str = concat!(tsd_mangle_prefix!(), "thread_allocated");
const DEALLOCATED_FIELD: &str = concat!(tsd_mangle_prefix!(), "thread_deallocated");
mod arch {
use super::*;
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
compile_error!(
"host_thread_probe supports only x86_64 and aarch64 targets; \
ptrace is same-arch and the TLS address math is arch-specific \
(Variant II on x86_64, Variant I on aarch64)"
);
#[cfg(target_arch = "x86_64")]
pub const EXPECTED_E_MACHINE: u16 = goblin::elf::header::EM_X86_64;
#[cfg(target_arch = "aarch64")]
pub const EXPECTED_E_MACHINE: u16 = goblin::elf::header::EM_AARCH64;
#[cfg(target_arch = "x86_64")]
pub const ARCH_NAME: &str = "x86_64";
#[cfg(target_arch = "aarch64")]
pub const ARCH_NAME: &str = "aarch64";
#[cfg(target_arch = "x86_64")]
pub const REGSET_NAME: &str = "NT_PRSTATUS";
#[cfg(target_arch = "aarch64")]
pub const REGSET_NAME: &str = "NT_ARM_TLS";
#[cfg(target_arch = "aarch64")]
pub const NT_ARM_TLS: libc::c_int = 0x401;
#[cfg(target_arch = "x86_64")]
pub fn read_thread_pointer_ptrace(pid: Pid) -> std::result::Result<u64, nix::errno::Errno> {
let regs = ptrace::getregset::<NT_PRSTATUS>(pid)?;
Ok(regs.fs_base)
}
#[cfg(target_arch = "aarch64")]
pub fn read_thread_pointer_ptrace(pid: Pid) -> std::result::Result<u64, nix::errno::Errno> {
let mut tpidr: u64 = 0;
let mut iov = libc::iovec {
iov_base: (&mut tpidr as *mut u64).cast::<libc::c_void>(),
iov_len: std::mem::size_of::<u64>(),
};
let res = unsafe {
libc::ptrace(
libc::PTRACE_GETREGSET,
pid.as_raw(),
NT_ARM_TLS as libc::c_long,
&mut iov as *mut libc::iovec,
)
};
if res == -1 {
return Err(nix::errno::Errno::last());
}
if iov.iov_len < std::mem::size_of::<u64>() {
return Err(nix::errno::Errno::EIO);
}
Ok(tpidr)
}
}
#[derive(Debug, Clone)]
struct TsdTlsSymbol {
elf_path: PathBuf,
st_value: u64,
tls_image_aligned_size: u64,
p_align: u64,
e_machine: u16,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CounterOffsets {
thread_allocated: u64,
thread_deallocated: u64,
}
impl CounterOffsets {
fn new(thread_allocated: u64, thread_deallocated: u64) -> Result<Self> {
if thread_allocated >= thread_deallocated {
bail!(
"unexpected tsd_s layout: thread_allocated ({thread_allocated}) \
must precede thread_deallocated ({thread_deallocated}) per \
jemalloc TSD_DATA_FAST ordering",
);
}
Ok(Self {
thread_allocated,
thread_deallocated,
})
}
fn combined_read_span(&self) -> u64 {
self.thread_deallocated + 8 - self.thread_allocated
}
}
fn find_tsd_tls(elf: &Elf<'_>, elf_path: &Path) -> Result<TsdTlsSymbol> {
let e_machine = elf.header.e_machine;
let (tls_image_aligned_size, p_align) = extract_pt_tls_layout(elf)?;
let tables: [(&str, &goblin::elf::Symtab<'_>, &goblin::strtab::Strtab<'_>); 2] = [
(".symtab", &elf.syms, &elf.strtab),
(".dynsym", &elf.dynsyms, &elf.dynstrtab),
];
for (_table_name, syms, strs) in tables {
if let Some(st_value) = find_jemalloc_tsd_tls_in_table(syms, strs) {
return Ok(TsdTlsSymbol {
elf_path: elf_path.to_path_buf(),
st_value,
tls_image_aligned_size,
p_align,
e_machine,
});
}
}
Err(anyhow!(
"no jemalloc TLS symbol (bare `tsd_tls` or any `<prefix>_tsd_tls`) \
found in .symtab or .dynsym of {}",
elf_path.display(),
))
}
fn find_jemalloc_tsd_tls_in_table(
syms: &goblin::elf::Symtab<'_>,
strs: &goblin::strtab::Strtab<'_>,
) -> Option<u64> {
for sym in syms.iter() {
if let Some(name) = strs.get_at(sym.st_name)
&& is_jemalloc_tsd_tls_symbol(name)
{
return Some(sym.st_value);
}
}
None
}
fn round_up_pow2(value: u64, align: u64) -> Option<u64> {
let align = align.max(1);
value.checked_add(align - 1).map(|v| v & !(align - 1))
}
fn extract_pt_tls_layout(elf: &Elf<'_>) -> Result<(u64, u64)> {
let tls_hdr = elf
.program_headers
.iter()
.find(|ph| ph.p_type == goblin::elf::program_header::PT_TLS)
.ok_or_else(|| anyhow!("ELF has no PT_TLS segment — target does not use static TLS"))?;
debug_assert!(
tls_hdr.p_align == 0 || tls_hdr.p_align.is_power_of_two(),
"PT_TLS.p_align must be 0 or a power of two, got {}",
tls_hdr.p_align,
);
let align = tls_hdr.p_align.max(1);
let rounded = round_up_pow2(tls_hdr.p_memsz, align)
.ok_or_else(|| anyhow!("PT_TLS size arithmetic overflow"))?;
Ok((rounded, align))
}
#[allow(dead_code)]
fn resolve_field_offsets(elf_path: &Path) -> Result<CounterOffsets> {
let data = fs::read(elf_path)
.with_context(|| format!("re-read {} for DWARF inspection", elf_path.display()))?;
let elf = Elf::parse(&data).with_context(|| format!("parse ELF {}", elf_path.display()))?;
if section_is_populated(&elf, &data, ".debug_info") {
return resolve_field_offsets_from_bytes(&data, elf_path);
}
let debuglink = read_gnu_debuglink(&elf, &data);
let build_id = read_build_id(&elf, &data);
let debuglink_name = debuglink.as_ref().map(|(n, _)| n.as_str());
let build_id_hex = build_id.as_deref();
let candidates = candidate_debuginfo_paths(elf_path, debuglink_name, build_id_hex);
if candidates.is_empty() {
if let Some(name) = debuglink_name
&& !debuglink_name_is_safe(name)
{
anyhow::bail!(
"{} has no populated .debug_info and its \
.gnu_debuglink filename `{}` was rejected as unsafe \
(carries path separators, NUL bytes, or `.`/`..` \
traversal forms). A well-formed `.gnu_debuglink` \
holds only a bare basename. Inspect the target ELF \
with `objdump --section .gnu_debuglink` to confirm \
the on-disk content is what your toolchain emitted; \
if it is, the toolchain is broken or the ELF was \
tampered with.",
elf_path.display(),
name,
);
}
if let Some(hex) = build_id_hex
&& hex.len() >= 2
&& !build_id_hex_is_safe(hex)
{
anyhow::bail!(
"{} has no populated .debug_info and its \
NT_GNU_BUILD_ID hex `{}` was rejected as unsafe \
(must be even-length lowercase hex per \
`read_build_id`'s output format; uppercase, \
non-hex bytes, path separators, NUL bytes, or odd \
length all fail the gate). Inspect the target ELF \
with `readelf -n` to confirm the on-disk note is \
what your toolchain emitted; if it is, the \
toolchain is broken or the ELF was tampered with.",
elf_path.display(),
hex,
);
}
anyhow::bail!(
"{} has no populated .debug_info and carries neither a \
.gnu_debuglink section nor an NT_GNU_BUILD_ID note — there \
is no pointer to external debuginfo. Rebuild the target \
with `-g`, ship a paired `.debug` file, or install the \
distro's -dbg / -debuginfo package.",
elf_path.display(),
);
}
let mut tried: Vec<String> = Vec::new();
for candidate in &candidates {
let debug_data = match fs::read(candidate) {
Ok(d) => d,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tried.push(format!("{} (not found)", candidate.display()));
continue;
}
Err(e) => {
tried.push(format!("{}: {e}", candidate.display()));
continue;
}
};
if let Some((_, expected_crc)) = debuglink.as_ref() {
let actual = crc32fast::hash(&debug_data);
if actual != *expected_crc {
tried.push(format!(
"{} (CRC mismatch: expected {:#010x}, got {:#010x})",
candidate.display(),
expected_crc,
actual,
));
continue;
}
}
return resolve_field_offsets_from_bytes(&debug_data, candidate);
}
anyhow::bail!(
"{} is stripped; searched for external debuginfo via \
debuglink={debuglink_name:?} build_id={build_id_hex:?} but \
no candidate was readable or CRC-matched. Tried: {}",
elf_path.display(),
tried.join("; "),
);
}
fn resolve_field_offsets_from_bytes(data: &[u8], source_path: &Path) -> Result<CounterOffsets> {
let elf = Elf::parse(data).with_context(|| format!("parse ELF {}", source_path.display()))?;
let load_section = |id: gimli::SectionId| -> Result<Cow<'_, [u8]>> {
let name = id.name();
for sh in &elf.section_headers {
if let Some(section_name) = elf.shdr_strtab.get_at(sh.sh_name)
&& section_name == name
{
let range = sh.file_range().unwrap_or(0..0);
return Ok(Cow::Borrowed(&data[range]));
}
}
Ok(Cow::Borrowed(&[]))
};
let dwarf_sections = gimli::DwarfSections::load(load_section)?;
let dwarf = dwarf_sections.borrow(|bytes| EndianSlice::new(bytes, LittleEndian));
if let Some(pubtypes_data) = find_section_slice(&elf, data, ".debug_pubtypes") {
let pubtypes = gimli::DebugPubTypes::new(pubtypes_data, LittleEndian);
let mut items = pubtypes.items();
while let Ok(Some(entry)) = items.next() {
let name_bytes = entry.name().to_slice().ok();
if name_bytes.as_ref().map(|s| s.as_ref()) == Some(TSD_STRUCT_NAME.as_bytes()) {
let unit_offset = entry.unit_header_offset();
if let Ok(header) = dwarf.debug_info.header_from_offset(unit_offset)
&& let Ok(unit) = dwarf.unit(header)
&& let Ok(Some((a, d))) = struct_member_offsets_in_unit(&dwarf, &unit)
{
let allocated = a.ok_or_else(|| {
anyhow!(
".debug_pubtypes fast path: found tsd_s but {} missing",
ALLOCATED_FIELD,
)
})?;
let deallocated = d.ok_or_else(|| {
anyhow!(
".debug_pubtypes fast path: found tsd_s but {} missing",
DEALLOCATED_FIELD,
)
})?;
return CounterOffsets::new(allocated, deallocated);
}
}
}
}
let mut allocated: Option<u64> = None;
let mut deallocated: Option<u64> = None;
let mut units = dwarf.units();
while let Some(header) = units.next()? {
let unit = dwarf.unit(header)?;
if let Some((a, d)) = struct_member_offsets_in_unit(&dwarf, &unit)? {
if let Some(v) = a {
allocated.get_or_insert(v);
}
if let Some(v) = d {
deallocated.get_or_insert(v);
}
if allocated.is_some() && deallocated.is_some() {
break;
}
}
}
let allocated = allocated.ok_or_else(|| {
anyhow!(
"DWARF walk of {} did not find field '{}' in struct '{}'",
source_path.display(),
ALLOCATED_FIELD,
TSD_STRUCT_NAME,
)
})?;
let deallocated = deallocated.ok_or_else(|| {
anyhow!(
"DWARF walk of {} did not find field '{}' in struct '{}'",
source_path.display(),
DEALLOCATED_FIELD,
TSD_STRUCT_NAME,
)
})?;
CounterOffsets::new(allocated, deallocated)
}
fn resolve_field_offsets_from_dwp(
parent_data: &[u8],
dwp_data: &[u8],
source_path: &Path,
) -> Result<CounterOffsets> {
let parent_elf = Elf::parse(parent_data)
.with_context(|| format!("parse parent ELF for DWP {}", source_path.display()))?;
let dwp_elf =
Elf::parse(dwp_data).with_context(|| format!("parse DWP ELF {}", source_path.display()))?;
let load_parent = |id: gimli::SectionId| -> Result<Cow<'_, [u8]>> {
let name = id.name();
for sh in &parent_elf.section_headers {
if let Some(sn) = parent_elf.shdr_strtab.get_at(sh.sh_name)
&& sn == name
{
let range = sh.file_range().unwrap_or(0..0);
return Ok(Cow::Borrowed(&parent_data[range]));
}
}
Ok(Cow::Borrowed(&[]))
};
let parent_sections = gimli::DwarfSections::load(load_parent)?;
let parent = parent_sections.borrow(|bytes| EndianSlice::new(bytes, LittleEndian));
let load_dwp = |id: gimli::SectionId| -> Result<EndianSlice<'_, LittleEndian>> {
let dwo_name = format!("{}.dwo", id.name());
for sh in &dwp_elf.section_headers {
if let Some(sn) = dwp_elf.shdr_strtab.get_at(sh.sh_name)
&& (sn == dwo_name || sn == id.name())
{
let range = sh.file_range().unwrap_or(0..0);
let bytes = dwp_data.get(range).unwrap_or(&[]);
return Ok(EndianSlice::new(bytes, LittleEndian));
}
}
Ok(EndianSlice::new(&[], LittleEndian))
};
let empty = EndianSlice::new(&[], LittleEndian);
let dwp = gimli::DwarfPackage::load(load_dwp, empty)?;
let mut skel_units = parent.units();
while let Some(skel_header) = skel_units.next()? {
let skel_unit = parent.unit(skel_header)?;
let dwo_id = match skel_unit.dwo_id {
Some(id) => id,
None => continue,
};
let Some(split_dwarf) = dwp.find_cu(dwo_id, &parent)? else {
continue;
};
let mut split_units = split_dwarf.units();
while let Some(split_header) = split_units.next()? {
let split_unit = split_dwarf.unit(split_header)?;
if let Some((a, d)) = struct_member_offsets_in_unit(&split_dwarf, &split_unit)? {
let allocated =
a.ok_or_else(|| anyhow!("DWP: found tsd_s but {} missing", ALLOCATED_FIELD,))?;
let deallocated =
d.ok_or_else(
|| anyhow!("DWP: found tsd_s but {} missing", DEALLOCATED_FIELD,),
)?;
return CounterOffsets::new(allocated, deallocated);
}
}
}
anyhow::bail!(
"DWP walk of {} visited all skeleton units but did not find \
field '{}' in struct '{}'",
source_path.display(),
ALLOCATED_FIELD,
TSD_STRUCT_NAME,
)
}
fn section_is_populated(elf: &Elf, data: &[u8], name: &str) -> bool {
for sh in &elf.section_headers {
if let Some(n) = elf.shdr_strtab.get_at(sh.sh_name)
&& n == name
{
let range = sh.file_range().unwrap_or(0..0);
let len = data.get(range).map(<[u8]>::len).unwrap_or(0);
return len > 0;
}
}
false
}
fn read_gnu_debuglink(elf: &Elf, data: &[u8]) -> Option<(String, u32)> {
let bytes = find_section_slice(elf, data, ".gnu_debuglink")?;
let nul = bytes.iter().position(|&b| b == 0)?;
let name = std::str::from_utf8(&bytes[..nul]).ok()?.to_string();
let after_name = (nul + 1).next_multiple_of(4);
if after_name + 4 > bytes.len() {
return None;
}
let crc = u32::from_le_bytes(bytes[after_name..after_name + 4].try_into().ok()?);
Some((name, crc))
}
fn read_build_id(elf: &Elf, data: &[u8]) -> Option<String> {
let bytes = find_section_slice(elf, data, ".note.gnu.build-id")?;
if bytes.len() < 12 {
return None;
}
let namesz = u32::from_le_bytes(bytes[0..4].try_into().ok()?) as usize;
let descsz = u32::from_le_bytes(bytes[4..8].try_into().ok()?) as usize;
let name_start = 12;
let desc_start = name_start + namesz.next_multiple_of(4);
let desc_end = desc_start + descsz;
if desc_end > bytes.len() {
return None;
}
let mut hex = String::with_capacity(descsz * 2);
for &b in &bytes[desc_start..desc_end] {
use std::fmt::Write;
let _ = write!(&mut hex, "{b:02x}");
}
Some(hex)
}
fn debuglink_name_is_safe(name: &str) -> bool {
!name.is_empty() && !name.contains('/') && !name.contains('\0') && name != "." && name != ".."
}
fn build_id_hex_is_safe(hex: &str) -> bool {
!hex.is_empty()
&& hex.len().is_multiple_of(2)
&& hex
.bytes()
.all(|b| b.is_ascii_digit() || (b'a'..=b'f').contains(&b))
}
fn candidate_debuginfo_paths(
elf_path: &Path,
debuglink_name: Option<&str>,
build_id_hex: Option<&str>,
) -> Vec<PathBuf> {
let mut out = Vec::new();
if let Some(hex) = build_id_hex
&& hex.len() >= 2
&& build_id_hex_is_safe(hex)
{
let (head, tail) = hex.split_at(2);
out.push(PathBuf::from(format!(
"/usr/lib/debug/.build-id/{head}/{tail}.debug"
)));
}
if let Some(name) = debuglink_name
&& debuglink_name_is_safe(name)
&& let Some(parent) = elf_path.parent()
{
out.push(parent.join(name));
out.push(parent.join(".debug").join(name));
if let Ok(rel) = parent.strip_prefix("/") {
out.push(PathBuf::from("/usr/lib/debug").join(rel).join(name));
}
}
out
}
fn find_section_slice<'a>(elf: &Elf, data: &'a [u8], name: &str) -> Option<&'a [u8]> {
for sh in &elf.section_headers {
if let Some(n) = elf.shdr_strtab.get_at(sh.sh_name)
&& n == name
{
let range = sh.file_range().unwrap_or(0..0);
return data.get(range);
}
}
None
}
fn struct_member_offsets_in_unit<R: Reader>(
dwarf: &gimli::Dwarf<R>,
unit: &Unit<R>,
) -> Result<Option<(Option<u64>, Option<u64>)>> {
let mut allocated: Option<u64> = None;
let mut deallocated: Option<u64> = None;
let mut entries = unit.entries();
while let Some(entry) = entries.next_dfs()? {
let tag = entry.tag();
let name_attr = entry.attr_value(gimli::DW_AT_name);
let loc_attr = entry.attr_value(gimli::DW_AT_data_member_location);
if tag == gimli::DW_TAG_structure_type {
if let Some(name_val) = name_attr
&& let Ok(name_str) = dwarf.attr_string(unit, name_val)
{
let name_bytes = name_str.to_slice().ok();
if name_bytes.as_ref().map(|s| s.as_ref()) == Some(TSD_STRUCT_NAME.as_bytes()) {
let struct_depth = entries.depth();
while let Some(child) = entries.next_dfs()? {
let child_tag = child.tag();
let child_name_attr = child.attr_value(gimli::DW_AT_name);
let child_loc_attr = child.attr_value(gimli::DW_AT_data_member_location);
let child_depth = entries.depth();
if child_depth <= struct_depth {
break;
}
if child_depth != struct_depth + 1 || child_tag != gimli::DW_TAG_member {
continue;
}
check_member_field(
dwarf,
unit,
child_name_attr,
child_loc_attr,
&mut allocated,
&mut deallocated,
)?;
if allocated.is_some() && deallocated.is_some() {
return Ok(Some((allocated, deallocated)));
}
}
if allocated.is_some() || deallocated.is_some() {
return Ok(Some((allocated, deallocated)));
}
continue;
}
}
continue;
}
if tag == gimli::DW_TAG_member {
check_member_field(
dwarf,
unit,
name_attr,
loc_attr,
&mut allocated,
&mut deallocated,
)?;
if allocated.is_some() && deallocated.is_some() {
return Ok(Some((allocated, deallocated)));
}
continue;
}
}
if allocated.is_some() || deallocated.is_some() {
return Ok(Some((allocated, deallocated)));
}
Ok(None)
}
fn check_member_field<R: Reader>(
dwarf: &gimli::Dwarf<R>,
unit: &Unit<R>,
name_attr: Option<gimli::AttributeValue<R>>,
loc_attr: Option<gimli::AttributeValue<R>>,
allocated: &mut Option<u64>,
deallocated: &mut Option<u64>,
) -> Result<()> {
let name = match name_attr {
Some(v) => v,
None => return Ok(()),
};
let name_str = dwarf.attr_string(unit, name)?;
let bytes = name_str.to_slice()?;
let as_str = bytes.as_ref();
let is_allocated = as_str == ALLOCATED_FIELD.as_bytes();
let is_deallocated = as_str == DEALLOCATED_FIELD.as_bytes();
if !is_allocated && !is_deallocated {
return Ok(());
}
let offset = member_offset(loc_attr)?;
if is_allocated && allocated.is_none() {
*allocated = offset;
}
if is_deallocated && deallocated.is_none() {
*deallocated = offset;
}
Ok(())
}
fn member_offset<R: Reader>(attr: Option<AttributeValue<R>>) -> Result<Option<u64>> {
let Some(attr) = attr else { return Ok(None) };
match attr {
AttributeValue::Udata(v) => Ok(Some(v)),
AttributeValue::Data1(v) => Ok(Some(v as u64)),
AttributeValue::Data2(v) => Ok(Some(v as u64)),
AttributeValue::Data4(v) => Ok(Some(v as u64)),
AttributeValue::Data8(v) => Ok(Some(v)),
AttributeValue::Sdata(v) if v >= 0 => Ok(Some(v as u64)),
other => Err(anyhow!(
"unexpected DW_AT_data_member_location form: {:?} — \
DWARF expression forms are not supported for field-offset resolution",
other
)),
}
}
#[cfg(target_arch = "aarch64")]
const TCB_SIZE_AARCH64: u64 = 16;
#[cfg(target_arch = "x86_64")]
fn compute_tls_address_variant_ii(
fs_base: u64,
tls_image_aligned_size: u64,
st_value: u64,
field_offset: u64,
) -> Result<u64> {
let image_base = fs_base.checked_sub(tls_image_aligned_size).ok_or_else(|| {
anyhow!(
"fs_base ({fs_base:#x}) is below the aligned TLS image size \
({tls_image_aligned_size:#x}); target likely has no static \
TLS initialized yet"
)
})?;
image_base
.checked_add(st_value)
.and_then(|v| v.checked_add(field_offset))
.ok_or_else(|| anyhow!("TLS address arithmetic overflow"))
}
#[cfg(target_arch = "aarch64")]
fn compute_tls_address_variant_i(
tpidr_el0: u64,
p_align: u64,
st_value: u64,
field_offset: u64,
) -> Result<u64> {
let image_offset = round_up_pow2(TCB_SIZE_AARCH64, p_align).ok_or_else(|| {
anyhow!(
"TLS image offset overflow: tcb={} align={p_align:#x}",
TCB_SIZE_AARCH64,
)
})?;
tpidr_el0
.checked_add(image_offset)
.and_then(|v| v.checked_add(st_value))
.and_then(|v| v.checked_add(field_offset))
.ok_or_else(|| anyhow!("TLS address arithmetic overflow"))
}
#[cfg(target_arch = "x86_64")]
fn compute_tls_address(
tp: u64,
tls_image_aligned_size: u64,
_p_align: u64,
st_value: u64,
field_offset: u64,
) -> Result<u64> {
compute_tls_address_variant_ii(tp, tls_image_aligned_size, st_value, field_offset)
}
#[cfg(target_arch = "aarch64")]
fn compute_tls_address(
tp: u64,
_tls_image_aligned_size: u64,
p_align: u64,
st_value: u64,
field_offset: u64,
) -> Result<u64> {
compute_tls_address_variant_i(tp, p_align, st_value, field_offset)
}
fn find_jemalloc_via_maps_at(
proc_root: &Path,
pid: i32,
) -> std::result::Result<(TsdTlsSymbol, CounterOffsets), AttachError> {
let exe_link = proc_root.join(pid.to_string()).join("exe");
let exe_path = fs::read_link(&exe_link).map_err(|e| {
AttachError::ReadlinkFailure(anyhow::Error::from(e).context(format!(
"readlink {} (need it to gate static-TLS match)",
exe_link.display(),
)))
})?;
let maps_path = proc_root.join(pid.to_string()).join("maps");
let contents = fs::read_to_string(&maps_path).map_err(|e| {
AttachError::MapsReadFailure(
anyhow::Error::from(e).context(format!("read {}", maps_path.display())),
)
})?;
let mut last_symbol_err: Option<anyhow::Error> = None;
let exe_mmap = std::fs::File::open(&exe_link).and_then(|f| unsafe { memmap2::Mmap::map(&f) });
if let Ok(ref data) = exe_mmap
&& let Ok(elf) = Elf::parse(data)
{
match find_tsd_tls(&elf, &exe_path) {
Ok(symbol) => {
if symbol.e_machine != arch::EXPECTED_E_MACHINE {
return Err(AttachError::ArchMismatch(anyhow!(
"probe is {}-only; target ELF {} carries e_machine={:#x}. \
Use a probe binary matching the target's architecture \
(ptrace is same-arch).",
arch::ARCH_NAME,
symbol.elf_path.display(),
symbol.e_machine,
)));
}
let ns_root = proc_root.join(pid.to_string()).join("root");
let exe_rel = exe_path.strip_prefix("/").unwrap_or(&exe_path);
if section_is_populated(&elf, data, ".debug_info")
&& section_is_populated(&elf, data, ".debug_pubtypes")
&& let Ok(offsets) = resolve_field_offsets_from_bytes(data, &exe_path)
{
return Ok((symbol, offsets));
}
let dwp_candidates = [
ns_root.join(format!("{}.dwp", exe_rel.display())),
PathBuf::from(format!("{}.dwp", exe_path.display())),
];
let debuginfo_parent_candidates = [
ns_root.join(format!("{}.debuginfo", exe_rel.display())),
ns_root.join(format!("{}.debug", exe_rel.display())),
];
for dwp_path in &dwp_candidates {
let dwp_mmap = match std::fs::File::open(dwp_path)
.and_then(|f| unsafe { memmap2::Mmap::map(&f) })
{
Ok(m) => m,
Err(e) => {
tracing::debug!(
pid, ?dwp_path, err = %e,
"ctprof probe: DWP not readable",
);
continue;
}
};
tracing::debug!(
pid,
?dwp_path,
bytes = dwp_mmap.len(),
"ctprof probe: trying DWP (mmap)",
);
if let Ok(offsets) = resolve_field_offsets_from_dwp(data, &dwp_mmap, dwp_path) {
return Ok((symbol, offsets));
}
for dbg_parent_path in &debuginfo_parent_candidates {
let dbg_parent = match std::fs::File::open(dbg_parent_path)
.and_then(|f| unsafe { memmap2::Mmap::map(&f) })
{
Ok(m) => m,
Err(_) => continue,
};
tracing::debug!(
pid,
?dbg_parent_path,
bytes = dbg_parent.len(),
"ctprof probe: trying DWP with debuginfo parent",
);
if let Ok(offsets) =
resolve_field_offsets_from_dwp(&dbg_parent, &dwp_mmap, dwp_path)
{
return Ok((symbol, offsets));
}
}
tracing::debug!(
pid,
?dwp_path,
"ctprof probe: DWP found but no parent had skeleton units",
);
}
let debuginfo_candidates: Vec<PathBuf> = {
let debuglink = read_gnu_debuglink(&elf, data);
let build_id = read_build_id(&elf, data);
let mut c = Vec::new();
c.push(ns_root.join(format!("{}.debuginfo", exe_rel.display())));
c.push(ns_root.join(format!("{}.debug", exe_rel.display())));
let host_candidates = candidate_debuginfo_paths(
&exe_path,
debuglink.as_ref().map(|(n, _)| n.as_str()),
build_id.as_deref(),
);
for hc in &host_candidates {
if let Ok(rel) = hc.strip_prefix("/") {
c.push(ns_root.join(rel));
}
c.push(hc.clone());
}
c
};
for candidate in &debuginfo_candidates {
let dbg_mmap = std::fs::File::open(candidate)
.and_then(|f| unsafe { memmap2::Mmap::map(&f) });
if let Ok(ref dbg_data) = dbg_mmap {
tracing::debug!(
pid,
?candidate,
bytes = dbg_data.len(),
"ctprof probe: trying debuginfo (mmap)",
);
if let Ok(r) = resolve_field_offsets_from_bytes(dbg_data, candidate) {
return Ok((symbol, r));
}
}
}
if section_is_populated(&elf, data, ".debug_info")
&& let Ok(offsets) = resolve_field_offsets_from_bytes(data, &exe_path)
{
return Ok((symbol, offsets));
}
let mut tried: Vec<String> = Vec::new();
for p in &dwp_candidates {
tried.push(p.display().to_string());
}
for p in &debuginfo_candidates {
tried.push(p.display().to_string());
}
return Err(AttachError::DwarfParseFailure(anyhow!(
"jemalloc TSD symbol found in {} but no usable DWARF: \
inline .debug_info absent, no .dwp or external debuginfo \
resolved field offsets for struct '{}'. \
Rebuild with -g, supply a .dwp, or install the debuginfo \
package. Searched: {}",
exe_path.display(),
TSD_STRUCT_NAME,
tried.join(", "),
)));
}
Err(e) => {
last_symbol_err = Some(e);
}
}
}
let mut seen: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
for line in contents.lines() {
let Some(path) = parse_maps_elf_path(line) else {
continue;
};
if path == exe_path {
continue; }
if !seen.insert(path.clone()) {
continue;
}
let data = match fs::read(&path) {
Ok(d) => d,
Err(_) => continue,
};
let elf = match Elf::parse(&data) {
Ok(e) => e,
Err(_) => continue,
};
let _symbol = match find_tsd_tls(&elf, &path) {
Ok(s) => s,
Err(e) => {
last_symbol_err = Some(e);
continue;
}
};
return Err(AttachError::JemallocInDso(anyhow!(
"jemalloc TLS symbol found in {} but static-TLS probe requires \
the match be in the main executable ({}); dynamic-TLS lookups \
in shared objects are not supported. Remediation: relink \
the target to embed jemalloc statically (e.g. build against \
tikv-jemallocator-sys rather than a system libjemalloc.so).",
path.display(),
exe_path.display(),
)));
}
let context = last_symbol_err
.map(|e| format!(" — last per-ELF error: {e}"))
.unwrap_or_default();
Err(AttachError::JemallocNotFound(anyhow!(
"jemalloc TLS symbol ({}) not found in any r-x mapping \
under {}. Either the target is not jemalloc-linked, or the \
symbol prefix is not in the recognized set.{}",
"tsd_tls / je_tsd_tls / _rjem_je_tsd_tls / *_tsd_tls",
maps_path.display(),
context,
)))
}
fn parse_maps_elf_path(line: &str) -> Option<PathBuf> {
let mut iter = line.split_whitespace();
let _range = iter.next()?;
let perms = iter.next()?;
if !perms.contains('x') {
return None;
}
let _offset = iter.next()?;
let _dev = iter.next()?;
let _inode = iter.next()?;
let path = iter.next()?;
if !path.starts_with('/') {
return None;
}
Some(PathBuf::from(path))
}
fn read_counters_at_thread_pointer(
thread_pointer: u64,
symbol: &TsdTlsSymbol,
offsets: &CounterOffsets,
tid: i32,
) -> std::result::Result<ThreadCounters, ProbeError> {
let pid = Pid::from_raw(tid);
let addr = compute_tls_address(
thread_pointer,
symbol.tls_image_aligned_size,
symbol.p_align,
symbol.st_value,
offsets.thread_allocated,
)
.map_err(ProbeError::TlsArithmetic)?;
let span = offsets.combined_read_span();
debug_assert!(
addr % 8 == 0,
"process_vm_readv remote base must be 8-byte aligned (jemalloc \
tsd_s.thread_allocated is a u64); got addr={addr:#x}",
);
let mut buf = vec![0u8; span as usize];
let remote = RemoteIoVec {
base: addr as usize,
len: span as usize,
};
let mut local = [IoSliceMut::new(&mut buf)];
let n = process_vm_readv(pid, &mut local, &[remote]).map_err(|e| {
ProbeError::ProcessVmReadv(anyhow!("process_vm_readv on tid {tid} at {addr:#x}: {e}"))
})?;
if n != span as usize {
return Err(ProbeError::ProcessVmReadv(anyhow!(
"short process_vm_readv on tid {tid}: got {n} bytes, expected {span}"
)));
}
let allocated = u64::from_le_bytes(buf[0..8].try_into().unwrap());
let dealloc_offset = (offsets.thread_deallocated - offsets.thread_allocated) as usize;
let deallocated =
u64::from_le_bytes(buf[dealloc_offset..dealloc_offset + 8].try_into().unwrap());
Ok(ThreadCounters {
allocated_bytes: allocated,
deallocated_bytes: deallocated,
})
}
struct ScopeDetach(i32);
impl Drop for ScopeDetach {
fn drop(&mut self) {
let pid = Pid::from_raw(self.0);
let _ = ptrace::detach(pid, None);
}
}
#[cfg(test)]
#[path = "host_thread_probe_tests.rs"]
mod tests;