use std::fmt;
use std::time::Instant;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TransferDirection {
HostToDevice,
DeviceToHost,
DeviceToDevice,
HostToHost,
}
impl fmt::Display for TransferDirection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::HostToDevice => write!(f, "Host -> Device"),
Self::DeviceToHost => write!(f, "Device -> Host"),
Self::DeviceToDevice => write!(f, "Device -> Device"),
Self::HostToHost => write!(f, "Host -> Host"),
}
}
}
#[derive(Debug, Clone)]
pub struct BandwidthMeasurement {
pub direction: TransferDirection,
pub bytes: usize,
pub elapsed_ms: f64,
pub bandwidth_gbps: f64,
pub timestamp: Instant,
}
impl BandwidthMeasurement {
pub fn new(direction: TransferDirection, bytes: usize, elapsed_ms: f64) -> Self {
let bandwidth_gbps = if elapsed_ms > 0.0 {
(bytes as f64) / (elapsed_ms * 1e-3) / 1e9
} else {
0.0
};
Self {
direction,
bytes,
elapsed_ms,
bandwidth_gbps,
timestamp: Instant::now(),
}
}
#[inline]
pub fn bandwidth_mbps(&self) -> f64 {
self.bandwidth_gbps * 1000.0
}
#[inline]
pub fn latency_us(&self) -> f64 {
self.elapsed_ms * 1000.0
}
}
impl fmt::Display for BandwidthMeasurement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}: {} bytes in {:.3} ms ({:.2} GB/s)",
self.direction, self.bytes, self.elapsed_ms, self.bandwidth_gbps
)
}
}
#[derive(Debug, Clone)]
pub struct DirectionSummary {
pub direction: TransferDirection,
pub transfer_count: usize,
pub total_bytes: usize,
pub avg_bandwidth_gbps: f64,
pub min_bandwidth_gbps: f64,
pub max_bandwidth_gbps: f64,
pub latency_overhead_us: f64,
}
impl fmt::Display for DirectionSummary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}: {} transfers, {} bytes total, avg {:.2} GB/s (min {:.2}, max {:.2}), \
overhead ~{:.1} us",
self.direction,
self.transfer_count,
self.total_bytes,
self.avg_bandwidth_gbps,
self.min_bandwidth_gbps,
self.max_bandwidth_gbps,
self.latency_overhead_us
)
}
}
#[derive(Debug, Clone)]
pub struct BandwidthSummary {
pub total_transfers: usize,
pub total_bytes: usize,
pub total_time_ms: f64,
pub avg_bandwidth_gbps: f64,
pub peak_bandwidth_gbps: f64,
pub per_direction: Vec<DirectionSummary>,
}
impl fmt::Display for BandwidthSummary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "=== Bandwidth Summary ===")?;
writeln!(
f,
"Total: {} transfers, {} bytes, {:.3} ms",
self.total_transfers, self.total_bytes, self.total_time_ms
)?;
writeln!(
f,
"Avg: {:.2} GB/s, Peak: {:.2} GB/s",
self.avg_bandwidth_gbps, self.peak_bandwidth_gbps
)?;
for ds in &self.per_direction {
writeln!(f, " {ds}")?;
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct BandwidthProfiler {
measurements: Vec<BandwidthMeasurement>,
pub warmup_iterations: u32,
pub benchmark_iterations: u32,
}
impl Default for BandwidthProfiler {
fn default() -> Self {
Self::new()
}
}
impl BandwidthProfiler {
pub fn new() -> Self {
Self {
measurements: Vec::new(),
warmup_iterations: 3,
benchmark_iterations: 10,
}
}
pub fn with_iterations(warmup: u32, benchmark: u32) -> Self {
Self {
measurements: Vec::new(),
warmup_iterations: warmup,
benchmark_iterations: benchmark,
}
}
pub fn record(&mut self, measurement: BandwidthMeasurement) {
self.measurements.push(measurement);
}
#[inline]
pub fn measurement_count(&self) -> usize {
self.measurements.len()
}
#[inline]
pub fn measurements(&self) -> &[BandwidthMeasurement] {
&self.measurements
}
pub fn clear(&mut self) {
self.measurements.clear();
}
pub fn summary(&self) -> BandwidthSummary {
if self.measurements.is_empty() {
return BandwidthSummary {
total_transfers: 0,
total_bytes: 0,
total_time_ms: 0.0,
avg_bandwidth_gbps: 0.0,
peak_bandwidth_gbps: 0.0,
per_direction: Vec::new(),
};
}
let total_transfers = self.measurements.len();
let total_bytes: usize = self.measurements.iter().map(|m| m.bytes).sum();
let total_time_ms: f64 = self.measurements.iter().map(|m| m.elapsed_ms).sum();
let bw_sum: f64 = self.measurements.iter().map(|m| m.bandwidth_gbps).sum();
let avg_bandwidth_gbps = bw_sum / total_transfers as f64;
let peak_bandwidth_gbps = self
.measurements
.iter()
.map(|m| m.bandwidth_gbps)
.fold(0.0_f64, f64::max);
let directions = [
TransferDirection::HostToDevice,
TransferDirection::DeviceToHost,
TransferDirection::DeviceToDevice,
TransferDirection::HostToHost,
];
let per_direction: Vec<DirectionSummary> = directions
.iter()
.filter_map(|&dir| self.compute_direction_summary(dir))
.collect();
BandwidthSummary {
total_transfers,
total_bytes,
total_time_ms,
avg_bandwidth_gbps,
peak_bandwidth_gbps,
per_direction,
}
}
pub fn summary_by_direction(&self, dir: TransferDirection) -> Option<DirectionSummary> {
self.compute_direction_summary(dir)
}
fn compute_direction_summary(&self, dir: TransferDirection) -> Option<DirectionSummary> {
let filtered: Vec<&BandwidthMeasurement> = self
.measurements
.iter()
.filter(|m| m.direction == dir)
.collect();
if filtered.is_empty() {
return None;
}
let transfer_count = filtered.len();
let total_bytes: usize = filtered.iter().map(|m| m.bytes).sum();
let bw_sum: f64 = filtered.iter().map(|m| m.bandwidth_gbps).sum();
let avg_bandwidth_gbps = bw_sum / transfer_count as f64;
let min_bandwidth_gbps = filtered
.iter()
.map(|m| m.bandwidth_gbps)
.fold(f64::INFINITY, f64::min);
let max_bandwidth_gbps = filtered
.iter()
.map(|m| m.bandwidth_gbps)
.fold(0.0_f64, f64::max);
let latency_overhead_us = filtered
.iter()
.min_by_key(|m| m.bytes)
.map(|m| m.latency_us())
.unwrap_or(0.0);
Some(DirectionSummary {
direction: dir,
transfer_count,
total_bytes,
avg_bandwidth_gbps,
min_bandwidth_gbps,
max_bandwidth_gbps,
latency_overhead_us,
})
}
}
#[derive(Debug, Clone)]
pub struct BandwidthBenchmarkConfig {
pub sizes: Vec<usize>,
pub directions: Vec<TransferDirection>,
pub warmup_iterations: u32,
pub benchmark_iterations: u32,
pub use_pinned_memory: bool,
}
impl Default for BandwidthBenchmarkConfig {
fn default() -> Self {
Self {
sizes: vec![
1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1 << 20, 4 << 20, 16 << 20, 64 << 20, 256 << 20, ],
directions: vec![
TransferDirection::HostToDevice,
TransferDirection::DeviceToHost,
TransferDirection::DeviceToDevice,
TransferDirection::HostToHost,
],
warmup_iterations: 3,
benchmark_iterations: 10,
use_pinned_memory: true,
}
}
}
impl BandwidthBenchmarkConfig {
pub fn with_sizes(sizes: Vec<usize>) -> Self {
Self {
sizes,
..Self::default()
}
}
pub fn for_direction(direction: TransferDirection) -> Self {
Self {
directions: vec![direction],
..Self::default()
}
}
pub fn set_iterations(&mut self, warmup: u32, benchmark: u32) {
self.warmup_iterations = warmup;
self.benchmark_iterations = benchmark;
}
pub fn total_transfers(&self) -> usize {
self.sizes.len() * self.directions.len() * self.benchmark_iterations as usize
}
}
pub fn estimate_transfer_time(bytes: usize, bandwidth_gbps: f64, latency_us: f64) -> f64 {
if bandwidth_gbps <= 0.0 {
return f64::INFINITY;
}
let latency_ms = latency_us / 1000.0;
let data_time_ms = bytes as f64 / (bandwidth_gbps * 1e6);
latency_ms + data_time_ms
}
pub fn theoretical_peak_bandwidth(pcie_gen: u32, lanes: u32) -> f64 {
if lanes == 0 {
return 0.0;
}
let rate_gtps: f64 = match pcie_gen {
1 => 2.5,
2 => 5.0,
3 => 8.0,
4 => 16.0,
5 => 32.0,
6 => 64.0,
_ => return 0.0,
};
let encoding_efficiency: f64 = if pcie_gen <= 2 { 0.8 } else { 128.0 / 130.0 };
rate_gtps * lanes as f64 * encoding_efficiency / 8.0
}
pub fn bandwidth_utilization(measured_gbps: f64, peak_gbps: f64) -> f64 {
if peak_gbps <= 0.0 {
return 0.0;
}
(measured_gbps / peak_gbps).clamp(0.0, 1.0)
}
pub fn format_bytes(bytes: usize) -> String {
const KB: usize = 1 << 10;
const MB: usize = 1 << 20;
const GB: usize = 1 << 30;
if bytes >= GB {
format!("{:.2} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.2} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.2} KB", bytes as f64 / KB as f64)
} else {
format!("{bytes} B")
}
}
pub fn describe_bandwidth(gbps: f64) -> String {
if gbps >= 1.0 {
format!("{gbps:.2} GB/s")
} else {
format!("{:.2} MB/s", gbps * 1000.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn measurement_new_computes_bandwidth() {
let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1_000_000_000, 1000.0);
assert!((m.bandwidth_gbps - 1.0).abs() < 1e-6);
}
#[test]
fn measurement_zero_elapsed_gives_zero_bandwidth() {
let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1024, 0.0);
assert!((m.bandwidth_gbps - 0.0).abs() < f64::EPSILON);
}
#[test]
fn measurement_negative_elapsed_gives_zero_bandwidth() {
let m = BandwidthMeasurement::new(TransferDirection::DeviceToHost, 1024, -1.0);
assert!((m.bandwidth_gbps - 0.0).abs() < f64::EPSILON);
}
#[test]
fn measurement_bandwidth_mbps() {
let m = BandwidthMeasurement::new(TransferDirection::DeviceToDevice, 1_000_000_000, 1000.0);
assert!((m.bandwidth_mbps() - 1000.0).abs() < 1e-3);
}
#[test]
fn measurement_latency_us() {
let m = BandwidthMeasurement::new(TransferDirection::HostToHost, 1024, 2.5);
assert!((m.latency_us() - 2500.0).abs() < 1e-6);
}
#[test]
fn measurement_display() {
let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1048576, 0.5);
let s = format!("{m}");
assert!(s.contains("Host -> Device"));
assert!(s.contains("1048576"));
assert!(s.contains("0.500 ms"));
assert!(s.contains("GB/s"));
}
#[test]
fn profiler_empty_summary() {
let profiler = BandwidthProfiler::new();
let s = profiler.summary();
assert_eq!(s.total_transfers, 0);
assert_eq!(s.total_bytes, 0);
assert!((s.avg_bandwidth_gbps - 0.0).abs() < f64::EPSILON);
assert!(s.per_direction.is_empty());
}
#[test]
fn profiler_record_and_summary() {
let mut profiler = BandwidthProfiler::new();
profiler.record(BandwidthMeasurement::new(
TransferDirection::HostToDevice,
1 << 20,
0.5,
));
profiler.record(BandwidthMeasurement::new(
TransferDirection::HostToDevice,
2 << 20,
1.0,
));
profiler.record(BandwidthMeasurement::new(
TransferDirection::DeviceToHost,
512 << 10,
0.25,
));
let s = profiler.summary();
assert_eq!(s.total_transfers, 3);
assert_eq!(s.total_bytes, (1 << 20) + (2 << 20) + (512 << 10));
assert!((s.total_time_ms - 1.75).abs() < 1e-9);
assert!(s.peak_bandwidth_gbps > 0.0);
assert_eq!(s.per_direction.len(), 2); }
#[test]
fn profiler_summary_by_direction() {
let mut profiler = BandwidthProfiler::new();
profiler.record(BandwidthMeasurement::new(
TransferDirection::HostToDevice,
1 << 20,
0.5,
));
profiler.record(BandwidthMeasurement::new(
TransferDirection::DeviceToHost,
1 << 20,
0.6,
));
assert!(
profiler
.summary_by_direction(TransferDirection::HostToDevice)
.is_some()
);
assert!(
profiler
.summary_by_direction(TransferDirection::DeviceToHost)
.is_some()
);
assert!(
profiler
.summary_by_direction(TransferDirection::DeviceToDevice)
.is_none()
);
}
#[test]
fn profiler_direction_summary_stats() {
let mut profiler = BandwidthProfiler::new();
let m1 = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1_000_000, 1.0);
let m2 = BandwidthMeasurement::new(TransferDirection::HostToDevice, 2_000_000, 1.0);
let bw1 = m1.bandwidth_gbps;
let bw2 = m2.bandwidth_gbps;
profiler.record(m1);
profiler.record(m2);
let ds = profiler
.summary_by_direction(TransferDirection::HostToDevice)
.expect("should have HtoD summary");
assert_eq!(ds.transfer_count, 2);
assert_eq!(ds.total_bytes, 3_000_000);
assert!((ds.avg_bandwidth_gbps - (bw1 + bw2) / 2.0).abs() < 1e-9);
assert!((ds.min_bandwidth_gbps - bw1).abs() < 1e-9);
assert!((ds.max_bandwidth_gbps - bw2).abs() < 1e-9);
}
#[test]
fn profiler_with_iterations() {
let p = BandwidthProfiler::with_iterations(5, 20);
assert_eq!(p.warmup_iterations, 5);
assert_eq!(p.benchmark_iterations, 20);
assert_eq!(p.measurement_count(), 0);
}
#[test]
fn profiler_clear() {
let mut p = BandwidthProfiler::new();
p.record(BandwidthMeasurement::new(
TransferDirection::HostToDevice,
1024,
0.1,
));
assert_eq!(p.measurement_count(), 1);
p.clear();
assert_eq!(p.measurement_count(), 0);
}
#[test]
fn estimate_transfer_time_basic() {
let t = estimate_transfer_time(1_000_000_000, 10.0, 5.0);
assert!((t - 100.005).abs() < 1e-6);
}
#[test]
fn estimate_transfer_time_zero_bandwidth() {
let t = estimate_transfer_time(1024, 0.0, 5.0);
assert!(t.is_infinite());
}
#[test]
fn theoretical_peak_bandwidth_pcie3_x16() {
let bw = theoretical_peak_bandwidth(3, 16);
assert!((bw - 15.754).abs() < 0.01);
}
#[test]
fn theoretical_peak_bandwidth_pcie4_x16() {
let bw = theoretical_peak_bandwidth(4, 16);
assert!((bw - 31.508).abs() < 0.01);
}
#[test]
fn theoretical_peak_bandwidth_pcie5_x16() {
let bw = theoretical_peak_bandwidth(5, 16);
assert!((bw - 63.015).abs() < 0.02);
}
#[test]
fn theoretical_peak_bandwidth_invalid_gen() {
assert!((theoretical_peak_bandwidth(0, 16) - 0.0).abs() < f64::EPSILON);
assert!((theoretical_peak_bandwidth(7, 16) - 0.0).abs() < f64::EPSILON);
}
#[test]
fn theoretical_peak_bandwidth_zero_lanes() {
assert!((theoretical_peak_bandwidth(3, 0) - 0.0).abs() < f64::EPSILON);
}
#[test]
fn bandwidth_utilization_basic() {
let u = bandwidth_utilization(12.0, 16.0);
assert!((u - 0.75).abs() < 1e-9);
}
#[test]
fn bandwidth_utilization_clamps_above_one() {
let u = bandwidth_utilization(20.0, 16.0);
assert!((u - 1.0).abs() < f64::EPSILON);
}
#[test]
fn bandwidth_utilization_zero_peak() {
let u = bandwidth_utilization(10.0, 0.0);
assert!((u - 0.0).abs() < f64::EPSILON);
}
#[test]
fn benchmark_config_default_sizes() {
let cfg = BandwidthBenchmarkConfig::default();
assert_eq!(cfg.sizes.len(), 10);
assert_eq!(cfg.sizes[0], 1 << 10); assert_eq!(cfg.sizes[9], 256 << 20); assert_eq!(cfg.directions.len(), 4);
assert_eq!(cfg.warmup_iterations, 3);
assert_eq!(cfg.benchmark_iterations, 10);
assert!(cfg.use_pinned_memory);
}
#[test]
fn benchmark_config_total_transfers() {
let cfg = BandwidthBenchmarkConfig::default();
assert_eq!(cfg.total_transfers(), 400);
}
#[test]
fn benchmark_config_with_sizes() {
let cfg = BandwidthBenchmarkConfig::with_sizes(vec![1024, 2048]);
assert_eq!(cfg.sizes.len(), 2);
assert_eq!(cfg.directions.len(), 4); }
#[test]
fn benchmark_config_for_direction() {
let cfg = BandwidthBenchmarkConfig::for_direction(TransferDirection::DeviceToDevice);
assert_eq!(cfg.directions.len(), 1);
assert_eq!(cfg.directions[0], TransferDirection::DeviceToDevice);
}
#[test]
fn summary_display_format() {
let mut profiler = BandwidthProfiler::new();
profiler.record(BandwidthMeasurement::new(
TransferDirection::HostToDevice,
1 << 20,
0.5,
));
let summary = profiler.summary();
let display = format!("{summary}");
assert!(display.contains("Bandwidth Summary"));
assert!(display.contains("GB/s"));
}
#[test]
fn direction_display() {
assert_eq!(
format!("{}", TransferDirection::HostToDevice),
"Host -> Device"
);
assert_eq!(
format!("{}", TransferDirection::DeviceToHost),
"Device -> Host"
);
assert_eq!(
format!("{}", TransferDirection::DeviceToDevice),
"Device -> Device"
);
assert_eq!(format!("{}", TransferDirection::HostToHost), "Host -> Host");
}
#[test]
fn format_bytes_ranges() {
assert_eq!(format_bytes(500), "500 B");
assert_eq!(format_bytes(1024), "1.00 KB");
assert_eq!(format_bytes(1 << 20), "1.00 MB");
assert_eq!(format_bytes(1 << 30), "1.00 GB");
}
#[test]
fn describe_bandwidth_formatting() {
assert_eq!(describe_bandwidth(2.5), "2.50 GB/s");
assert_eq!(describe_bandwidth(0.5), "500.00 MB/s");
}
#[test]
fn theoretical_peak_bandwidth_pcie1_x16() {
let bw = theoretical_peak_bandwidth(1, 16);
assert!((bw - 4.0).abs() < 1e-6);
}
#[test]
fn theoretical_peak_bandwidth_pcie2_x16() {
let bw = theoretical_peak_bandwidth(2, 16);
assert!((bw - 8.0).abs() < 1e-6);
}
}