use log::debug;
use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::thread::JoinHandle;
use std::time::Duration;
#[derive(Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct ProcessPeakMem {
pub vram_bytes: Option<u64>,
pub sys_bytes: Option<u64>,
pub rss_bytes: Option<u64>,
}
struct DriverConfig {
vram_suffixes: &'static [&'static str],
sys_suffixes: &'static [&'static str],
}
fn driver_config(driver: &str) -> Option<DriverConfig> {
match driver {
"amdgpu" => Some(DriverConfig {
vram_suffixes: &["vram"],
sys_suffixes: &["gtt"],
}),
"msm" => Some(DriverConfig {
vram_suffixes: &[],
sys_suffixes: &["memory"],
}),
"xe" => Some(DriverConfig {
vram_suffixes: &["vram0", "vram1"],
sys_suffixes: &["gtt", "system"],
}),
_ => None,
}
}
fn parse_drm_mem_value(value: &str) -> Option<u64> {
let value = value.trim();
if let Ok(n) = value.parse::<u64>() {
return Some(n);
}
let (num, unit) = value.split_once(' ')?;
let num: u64 = num.parse().ok()?;
let multiplier = match unit {
"B" => 1,
"KiB" => 1024,
"MiB" => 1024 * 1024,
"GiB" => 1024 * 1024 * 1024,
_ => return None,
};
Some(num * multiplier)
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct FdinfoKey {
driver: String,
client_id: u64,
}
fn parse_fdinfo(content: &str) -> Option<(FdinfoKey, ProcessPeakMem)> {
let mut driver: Option<&str> = None;
let mut client_id: Option<u64> = None;
let mut total_fields: HashMap<String, u64> = HashMap::new();
let mut memory_fields: HashMap<String, u64> = HashMap::new();
for line in content.lines() {
if let Some(val) = line.strip_prefix("drm-driver:") {
if driver.is_none() {
driver = Some(val.trim());
}
} else if let Some(val) = line.strip_prefix("drm-client-id:") {
if client_id.is_none() {
client_id = val.trim().parse().ok();
}
} else if let Some(rest) = line.strip_prefix("drm-total-") {
if let Some((suffix, val)) = rest.split_once(':') {
if let Some(bytes) = parse_drm_mem_value(val) {
total_fields.insert(suffix.trim().to_string(), bytes);
}
}
} else if let Some(rest) = line.strip_prefix("drm-memory-") {
if let Some((suffix, val)) = rest.split_once(':') {
if let Some(bytes) = parse_drm_mem_value(val) {
memory_fields.insert(suffix.trim().to_string(), bytes);
}
}
}
}
let driver = driver?;
let client_id = client_id?;
let lookup = |suffix: &&str| -> Option<u64> {
total_fields
.get(*suffix)
.or_else(|| memory_fields.get(*suffix))
.copied()
};
let mut total_vram: Option<u64> = None;
let mut total_sys: Option<u64> = None;
if let Some(config) = driver_config(driver) {
for suffix in config.vram_suffixes {
if let Some(bytes) = lookup(suffix) {
*total_vram.get_or_insert(0) += bytes;
}
}
for suffix in config.sys_suffixes {
if let Some(bytes) = lookup(suffix) {
*total_sys.get_or_insert(0) += bytes;
}
}
}
lazy_static! {
static ref LOGGED_DRIVERS: Mutex<HashSet<String>> = Mutex::new(HashSet::new());
}
if LOGGED_DRIVERS.lock().unwrap().insert(driver.to_owned()) {
let mut total_names: Vec<_> = total_fields.keys().map(String::as_str).collect();
total_names.sort();
let mut memory_names: Vec<_> = memory_fields.keys().map(String::as_str).collect();
memory_names.sort();
log::debug!(
"DRM driver '{}': drm-total-* fields: [{}], drm-memory-* fields: [{}]. \
If this driver should report vram/sysmem, please report these field names to \
https://gitlab.freedesktop.org/anholt/gpu-trace-perf/-/work_items.",
driver,
total_names.join(", "),
memory_names.join(", "),
);
if total_names.is_empty() && memory_names.is_empty() {
log::debug!("full output:\n{}", content);
}
}
let key = FdinfoKey {
driver: driver.to_string(),
client_id,
};
let peak = ProcessPeakMem {
vram_bytes: total_vram,
sys_bytes: total_sys,
rss_bytes: None,
};
Some((key, peak))
}
fn parse_dri_clients(input: &str) -> HashSet<u32> {
let mut tgids = HashSet::new();
for line in input.lines() {
let mut fields = line.split_whitespace();
let _command = fields.next();
if let Some(tgid_str) = fields.next() {
if let Ok(tgid) = tgid_str.parse::<u32>() {
tgids.insert(tgid);
}
}
}
tgids
}
fn dri_client_pids() -> HashSet<u32> {
let mut all = HashSet::new();
match std::fs::read_dir("/sys/kernel/debug/dri") {
Ok(entries) => {
for entry in entries.flatten() {
let path = entry.path().join("clients");
if let Ok(content) = std::fs::read_to_string(&path) {
all.extend(parse_dri_clients(&content));
}
}
}
Err(e) => debug!("Failed to read /sys/kernel/debug/dri/*/clients: {e:#}"),
}
all
}
fn pids_with_env_marker(marker: &str, candidate_pids: HashSet<u32>) -> Vec<u32> {
let search = format!("GPU_TRACE_PERF_ID={marker}").into_bytes();
let candidate_pids = if candidate_pids.is_empty() {
let Ok(proc_dir) = std::fs::read_dir("/proc") else {
return Vec::new();
};
proc_dir
.flatten()
.filter_map(|e| e.file_name().to_string_lossy().parse::<u32>().ok())
.collect::<HashSet<_>>()
} else {
candidate_pids
};
let mut pids = Vec::new();
for pid in candidate_pids {
let Ok(content) = std::fs::read(format!("/proc/{pid}/environ")) else {
continue;
};
if content
.split(|&b| b == 0)
.any(|entry| entry == search.as_slice())
{
pids.push(pid);
}
}
pids
}
fn descendant_pids(root_pid: u32) -> Vec<u32> {
let mut pids = vec![root_pid];
let mut i = 0;
while i < pids.len() {
let pid = pids[i];
if let Ok(tasks) = std::fs::read_dir(format!("/proc/{pid}/task")) {
for task in tasks.flatten() {
let children_path = task.path().join("children");
if let Ok(content) = std::fs::read_to_string(children_path) {
for child in content.split_whitespace() {
if let Ok(child_pid) = child.parse::<u32>() {
pids.push(child_pid);
}
}
}
}
}
i += 1;
}
pids
}
fn sample_fdinfo_memory(pids: &[u32]) -> Option<ProcessPeakMem> {
let mut clients: HashMap<FdinfoKey, ProcessPeakMem> = HashMap::new();
for &pid in pids {
let fdinfo_dir = format!("/proc/{pid}/fdinfo");
let entries = match std::fs::read_dir(&fdinfo_dir) {
Ok(e) => e,
Err(_) => continue,
};
for entry in entries.flatten() {
let content = match std::fs::read_to_string(entry.path()) {
Ok(c) => c,
Err(_) => continue,
};
if let Some((key, data)) = parse_fdinfo(&content) {
clients.entry(key).or_insert(data);
}
}
}
if clients.is_empty() {
return None;
}
let mut total = ProcessPeakMem::default();
for client in clients.values() {
if let Some(size) = client.vram_bytes {
*total.vram_bytes.get_or_insert(0) += size;
}
if let Some(size) = client.sys_bytes {
*total.sys_bytes.get_or_insert(0) += size;
}
}
Some(total)
}
fn sum_rss_bytes(pids: &[u32]) -> Option<u64> {
let mut total: Option<u64> = None;
for &pid in pids {
let content = match std::fs::read_to_string(format!("/proc/{pid}/statm")) {
Ok(c) => c,
Err(_) => continue,
};
if let Some(pages) = content
.split_whitespace()
.nth(1)
.and_then(|s| s.parse::<u64>().ok())
{
*total.get_or_insert(0) += pages * 4096;
}
}
total
}
pub fn sample_system_gpu_mem() -> Option<ProcessPeakMem> {
let pids: Vec<u32> = dri_client_pids().into_iter().collect();
if pids.is_empty() {
return None;
}
sample_fdinfo_memory(&pids)
}
pub struct ProcessWatcher {
stop: Arc<AtomicBool>,
thread: JoinHandle<ProcessPeakMem>,
}
impl ProcessWatcher {
pub fn watch(pid: u32, env_marker: String) -> Self {
let stop = Arc::new(AtomicBool::new(false));
let thread = {
let stop = stop.clone();
std::thread::spawn(move || {
let mut peak = ProcessPeakMem::default();
loop {
let mut pids: HashSet<u32> = descendant_pids(pid).into_iter().collect();
pids.extend(pids_with_env_marker(&env_marker, dri_client_pids()));
let pids: Vec<u32> = pids.into_iter().collect();
if let Some(sample) = sample_fdinfo_memory(&pids) {
if let Some(v) = sample.vram_bytes {
*peak.vram_bytes.get_or_insert(0) = peak.vram_bytes.unwrap_or(0).max(v);
}
if let Some(s) = sample.sys_bytes {
*peak.sys_bytes.get_or_insert(0) = peak.sys_bytes.unwrap_or(0).max(s);
}
}
if let Some(rss) = sum_rss_bytes(&pids) {
*peak.rss_bytes.get_or_insert(0) = peak.rss_bytes.unwrap_or(0).max(rss);
}
if stop.load(Ordering::Relaxed) {
break;
}
std::thread::sleep(Duration::from_millis(100));
}
peak
})
};
Self { stop, thread }
}
pub fn stop(self) -> ProcessPeakMem {
self.stop.store(true, Ordering::Relaxed);
self.thread.join().unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_drm_mem_value() {
assert_eq!(parse_drm_mem_value("0"), Some(0));
assert_eq!(parse_drm_mem_value("12 KiB"), Some(12 * 1024));
assert_eq!(parse_drm_mem_value("2 MiB"), Some(2 * 1024 * 1024));
assert_eq!(parse_drm_mem_value(" 1 GiB "), Some(1024 * 1024 * 1024));
assert_eq!(parse_drm_mem_value("abc"), None);
}
#[test]
fn test_freedreno_fdinfo() {
let input = r#"
pos: 0
flags: 02400002
mnt_id: 40
ino: 866
drm-driver: msm
drm-client-id: 73
drm-engine-gpu: 1211513957 ns
drm-cycles-gpu: 249015848
drm-maxfreq-gpu: 1250000000 Hz
drm-total-memory: 24992 KiB
drm-shared-memory: 12312 KiB
drm-resident-memory: 24992 KiB
drm-purgeable-memory: 12 KiB
"#;
assert_eq!(
parse_fdinfo(input),
Some((
FdinfoKey {
driver: "msm".to_string(),
client_id: 73,
},
ProcessPeakMem {
vram_bytes: None,
sys_bytes: Some(24992 * 1024),
rss_bytes: None,
}
))
);
}
#[test]
fn test_amdgpu_fdinfo() {
let input = r#"
pos: 0
flags: 02104002
mnt_id: 475
ino: 492
drm-driver: amdgpu
drm-client-id: 69407
drm-pdev: 0000:c3:00.0
pasid: 32776
drm-total-cpu: 0
drm-shared-cpu: 0
drm-resident-cpu: 0
drm-purgeable-cpu: 0
drm-total-gtt: 44740 KiB
drm-shared-gtt: 0
drm-resident-gtt: 1607168 KiB
drm-purgeable-gtt: 768 KiB
drm-total-vram: 1649588 KiB
drm-shared-vram: 624928 KiB
drm-resident-vram: 87928 KiB
drm-purgeable-vram: 768 KiB
drm-total-gds: 0
drm-shared-gds: 0
drm-resident-gds: 0
drm-purgeable-gds: 0
drm-total-gws: 0
drm-shared-gws: 0
drm-resident-gws: 0
drm-purgeable-gws: 0
drm-total-oa: 0
drm-shared-oa: 0
drm-resident-oa: 0
drm-purgeable-oa: 0
drm-total-doorbell: 0
drm-shared-doorbell: 0
drm-resident-doorbell: 0
drm-purgeable-doorbell: 0
drm-total-mmioremap: 0
drm-shared-mmioremap: 0
drm-resident-mmioremap: 0
drm-purgeable-mmioremap: 0
drm-memory-vram: 87928 KiB
drm-memory-gtt: 1607168 KiB
drm-memory-cpu: 0 KiB
amd-evicted-vram: 0 KiB
amd-requested-vram: 1649588 KiB
amd-requested-gtt: 44740 KiB
drm-engine-gfx: 10254754355966 ns
drm-engine-compute: 160593253870 ns
"#;
assert_eq!(
parse_fdinfo(input),
Some((
FdinfoKey {
driver: "amdgpu".to_string(),
client_id: 69407,
},
ProcessPeakMem {
vram_bytes: Some(1649588 * 1024),
sys_bytes: Some(44740 * 1024),
rss_bytes: None,
}
))
);
}
#[test]
fn test_amdgpu_raven_fdinfo() {
let input = r#"
pos: 0
flags: 02100002
mnt_id: 21
ino: 156
drm-driver: amdgpu
drm-client-id: 2
drm-pdev: 0000:04:00.0
pasid: 32768
drm-driver: amdgpu
drm-pdev: 0000:04:00.0
drm-client-id: 260
drm-memory-vram: 17556 KiB
drm-memory-gtt: 12800 KiB
drm-memory-cpu: 0 KiB
amd-memory-visible-vram: 17556 KiB
amd-evicted-vram: 0 KiB
amd-evicted-visible-vram: 0 KiB
amd-requested-vram: 17556 KiB
amd-requested-visible-vram: 2048 KiB
amd-requested-gtt: 12800 KiB
drm-engine-gfx: 60112021 ns
"#;
assert_eq!(
parse_fdinfo(input),
Some((
FdinfoKey {
driver: "amdgpu".to_string(),
client_id: 2,
},
ProcessPeakMem {
vram_bytes: Some(17556 * 1024),
sys_bytes: Some(12800 * 1024),
rss_bytes: None,
}
))
);
}
#[test]
fn test_parse_dri_clients() {
let input = r#"
command tgid dev master a uid magic name id
systemd-logind 2534 0 y y 0 0 <unset> 69407
gnome-shell 3278876 128 n n 1000 0 <unset> 69408
Xwayland 3279443 128 n n 1000 0 <unset> 69425
gnome-shell 3278876 0 n n 1000 0 <unset> 69426
mutter-x11-fram 3279640 128 n n 1000 0 <unset> 69432
firefox 3292369 128 n n 1000 0 <unset> 69448
firefox 3292369 128 n n 1000 0 <unset> 69450
}"#;
assert_eq!(
HashSet::from([2534u32, 3278876, 3279443, 3279640, 3292369]),
parse_dri_clients(input)
);
}
}