pub const AC_GPUTRAIN_003_NVIDIA_SMI_POLL_WINDOW_SECONDS: u32 = 5;
pub const AC_GPUTRAIN_003_MIN_USED_MEMORY_MIB: u64 = 1;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NvidiaSmiComputeApp {
pub pid: u32,
pub used_memory_mib: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Gputrain003Verdict {
Pass,
Fail,
}
#[allow(clippy::result_unit_err)]
pub fn parse_nvidia_smi_compute_apps(output: &str) -> Result<Vec<NvidiaSmiComputeApp>, ()> {
let mut apps = Vec::new();
for line in output.lines() {
if line.is_empty() {
continue;
}
let (pid_s, mem_s) = line.split_once(", ").ok_or(())?;
let pid: u32 = pid_s.parse().map_err(|_| ())?;
let used_memory_mib: u64 = mem_s.parse().map_err(|_| ())?;
apps.push(NvidiaSmiComputeApp { pid, used_memory_mib });
}
Ok(apps)
}
#[must_use]
pub fn verdict_from_residency(
training_pid: u32,
apps: &[NvidiaSmiComputeApp],
) -> Gputrain003Verdict {
for app in apps {
if app.pid == training_pid && app.used_memory_mib >= AC_GPUTRAIN_003_MIN_USED_MEMORY_MIB {
return Gputrain003Verdict::Pass;
}
}
Gputrain003Verdict::Fail
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn falsify_gputrain_003_residency_proof_logic() {
let training_pid = 12345;
let output_happy = "12345, 5000\n";
let apps = parse_nvidia_smi_compute_apps(output_happy).expect("canonical line parses");
assert_eq!(apps, vec![NvidiaSmiComputeApp { pid: 12345, used_memory_mib: 5000 }],);
assert_eq!(
verdict_from_residency(training_pid, &apps),
Gputrain003Verdict::Pass,
"matching pid with non-zero mem must Pass",
);
let output_zero = "12345, 0\n";
let apps = parse_nvidia_smi_compute_apps(output_zero).expect("zero-mem line parses");
assert_eq!(
verdict_from_residency(training_pid, &apps),
Gputrain003Verdict::Fail,
"matching pid with 0 MiB must Fail (GPU allocated nothing)",
);
let output_other = "99999, 5000\n";
let apps = parse_nvidia_smi_compute_apps(output_other).expect("other-pid line parses");
assert_eq!(
verdict_from_residency(training_pid, &apps),
Gputrain003Verdict::Fail,
"non-matching pid must Fail even if it holds lots of memory",
);
let apps_empty: Vec<NvidiaSmiComputeApp> =
parse_nvidia_smi_compute_apps("").expect("empty input parses as zero-length slice");
assert!(apps_empty.is_empty());
assert_eq!(
verdict_from_residency(training_pid, &apps_empty),
Gputrain003Verdict::Fail,
"empty compute-app list must Fail",
);
let output_multi_ours_first = "12345, 2000\n99999, 1500\n";
let apps =
parse_nvidia_smi_compute_apps(output_multi_ours_first).expect("two-line output parses");
assert_eq!(apps.len(), 2);
assert_eq!(
verdict_from_residency(training_pid, &apps),
Gputrain003Verdict::Pass,
"multi-process output with our pid first must Pass",
);
let output_multi_ours_last = "99999, 1500\n12345, 2000\n";
let apps =
parse_nvidia_smi_compute_apps(output_multi_ours_last).expect("two-line output parses");
assert_eq!(
verdict_from_residency(training_pid, &apps),
Gputrain003Verdict::Pass,
"multi-process output with our pid last must Pass \
(loop must not short-circuit on non-matching rows)",
);
let no_comma = "12345 5000\n";
assert_eq!(parse_nvidia_smi_compute_apps(no_comma), Err(()));
let wrong_separator = "12345,5000\n"; assert_eq!(parse_nvidia_smi_compute_apps(wrong_separator), Err(()));
let extra_whitespace = "12345, 5000\n"; assert_eq!(parse_nvidia_smi_compute_apps(extra_whitespace), Err(()));
let non_digit_pid = "abc, 5000\n";
assert_eq!(parse_nvidia_smi_compute_apps(non_digit_pid), Err(()));
let non_digit_mem = "12345, xyz\n";
assert_eq!(parse_nvidia_smi_compute_apps(non_digit_mem), Err(()));
let missing_field = "12345,\n"; assert_eq!(parse_nvidia_smi_compute_apps(missing_field), Err(()));
assert_eq!(
verdict_from_residency(training_pid, &[]),
Gputrain003Verdict::Fail,
"conservative Fail when parse errored and caller passed empty slice",
);
let max_pid_max_mem =
vec![NvidiaSmiComputeApp { pid: u32::MAX, used_memory_mib: u64::MAX }];
assert_eq!(
verdict_from_residency(u32::MAX, &max_pid_max_mem),
Gputrain003Verdict::Pass,
"u32::MAX pid + u64::MAX mem must Pass",
);
let max_pid_zero_mem = vec![NvidiaSmiComputeApp { pid: u32::MAX, used_memory_mib: 0 }];
assert_eq!(
verdict_from_residency(u32::MAX, &max_pid_zero_mem),
Gputrain003Verdict::Fail,
"u32::MAX pid + 0 MiB must Fail (zero-mem rule is exceptionless)",
);
assert_eq!(
AC_GPUTRAIN_003_NVIDIA_SMI_POLL_WINDOW_SECONDS, 5,
"INV-GPUTRAIN-003 poll window is 5 seconds \
(spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-003)",
);
assert_eq!(
AC_GPUTRAIN_003_MIN_USED_MEMORY_MIB, 1,
"INV-GPUTRAIN-003 min-mem floor is 1 MiB \
(spec §14.4 / gpu-training-backend-v1 INV-GPUTRAIN-003)",
);
}
}