use std::fmt;
use super::calibration::DeviceCalibration;
#[derive(Clone, Debug, PartialEq)]
pub struct GpuDeviceInfo {
pub ordinal: usize,
pub name: String,
pub compute_capability_major: i32,
pub compute_capability_minor: i32,
pub sm_count: i32,
pub total_memory_bytes: usize,
pub calibration: DeviceCalibration,
}
impl Eq for GpuDeviceInfo {}
impl GpuDeviceInfo {
#[inline]
pub fn peak_fp64_gflops(&self) -> f64 {
self.calibration.fp64_gflops
}
#[inline]
pub fn pcie_gb_per_s(&self) -> f64 {
self.calibration.h2d_gb_s.min(self.calibration.d2h_gb_s)
}
pub fn score(&self) -> f64 {
let memory_gib = self.total_memory_bytes as f64 / 1_073_741_824.0;
self.peak_fp64_gflops() + memory_gib * 4.0
}
pub fn dispatch_memory_budget_bytes(&self) -> usize {
let half = self.total_memory_bytes / 2;
let seventy_percent = self.total_memory_bytes.saturating_mul(7) / 10;
let reserve: usize = 512 * 1024 * 1024;
if self.total_memory_bytes > reserve.saturating_mul(2) {
seventy_percent.min(self.total_memory_bytes.saturating_sub(reserve))
} else {
half
}
}
}
impl fmt::Display for GpuDeviceInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"device {} '{}' sm_{}{} {} SMs {:.1} GiB",
self.ordinal,
self.name,
self.compute_capability_major,
self.compute_capability_minor,
self.sm_count,
self.total_memory_bytes as f64 / 1_073_741_824.0,
)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn calibration(fp64: f64) -> DeviceCalibration {
DeviceCalibration {
fp64_gflops: fp64,
h2d_gb_s: 24.0,
d2h_gb_s: 23.0,
}
}
fn make(major: i32, minor: i32, sms: i32, mem_gib: usize, fp64: f64) -> GpuDeviceInfo {
GpuDeviceInfo {
ordinal: 0,
name: "test".to_string(),
compute_capability_major: major,
compute_capability_minor: minor,
sm_count: sms,
total_memory_bytes: mem_gib * 1024 * 1024 * 1024,
calibration: calibration(fp64),
}
}
#[test]
fn measured_throughput_drives_peak() {
let slow = make(7, 5, 40, 16, 250.0);
let fast = make(9, 0, 132, 80, 9000.0);
assert!(fast.peak_fp64_gflops() > slow.peak_fp64_gflops() * 30.0);
}
#[test]
fn score_prefers_higher_throughput_and_memory() {
let small = make(7, 5, 40, 8, 200.0);
let large = make(9, 0, 132, 80, 8000.0);
assert!(large.score() > small.score());
}
#[test]
fn pcie_uses_min_of_directions() {
let dev = GpuDeviceInfo {
ordinal: 0,
name: "asym".to_string(),
compute_capability_major: 8,
compute_capability_minor: 0,
sm_count: 80,
total_memory_bytes: 40 * 1024 * 1024 * 1024,
calibration: DeviceCalibration {
fp64_gflops: 6000.0,
h2d_gb_s: 23.0,
d2h_gb_s: 25.5,
},
};
assert!((dev.pcie_gb_per_s() - 23.0).abs() < 1e-9);
}
}