1use std::fmt;
38use std::time::Instant;
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
46pub enum TransferDirection {
47 HostToDevice,
49 DeviceToHost,
51 DeviceToDevice,
53 HostToHost,
55}
56
57impl fmt::Display for TransferDirection {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 match self {
60 Self::HostToDevice => write!(f, "Host -> Device"),
61 Self::DeviceToHost => write!(f, "Device -> Host"),
62 Self::DeviceToDevice => write!(f, "Device -> Device"),
63 Self::HostToHost => write!(f, "Host -> Host"),
64 }
65 }
66}
67
68#[derive(Debug, Clone)]
77pub struct BandwidthMeasurement {
78 pub direction: TransferDirection,
80 pub bytes: usize,
82 pub elapsed_ms: f64,
84 pub bandwidth_gbps: f64,
86 pub timestamp: Instant,
88}
89
90impl BandwidthMeasurement {
91 pub fn new(direction: TransferDirection, bytes: usize, elapsed_ms: f64) -> Self {
97 let bandwidth_gbps = if elapsed_ms > 0.0 {
98 (bytes as f64) / (elapsed_ms * 1e-3) / 1e9
101 } else {
102 0.0
103 };
104
105 Self {
106 direction,
107 bytes,
108 elapsed_ms,
109 bandwidth_gbps,
110 timestamp: Instant::now(),
111 }
112 }
113
114 #[inline]
116 pub fn bandwidth_mbps(&self) -> f64 {
117 self.bandwidth_gbps * 1000.0
118 }
119
120 #[inline]
122 pub fn latency_us(&self) -> f64 {
123 self.elapsed_ms * 1000.0
124 }
125}
126
127impl fmt::Display for BandwidthMeasurement {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 write!(
130 f,
131 "{}: {} bytes in {:.3} ms ({:.2} GB/s)",
132 self.direction, self.bytes, self.elapsed_ms, self.bandwidth_gbps
133 )
134 }
135}
136
137#[derive(Debug, Clone)]
143pub struct DirectionSummary {
144 pub direction: TransferDirection,
146 pub transfer_count: usize,
148 pub total_bytes: usize,
150 pub avg_bandwidth_gbps: f64,
152 pub min_bandwidth_gbps: f64,
154 pub max_bandwidth_gbps: f64,
156 pub latency_overhead_us: f64,
161}
162
163impl fmt::Display for DirectionSummary {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 write!(
166 f,
167 "{}: {} transfers, {} bytes total, avg {:.2} GB/s (min {:.2}, max {:.2}), \
168 overhead ~{:.1} us",
169 self.direction,
170 self.transfer_count,
171 self.total_bytes,
172 self.avg_bandwidth_gbps,
173 self.min_bandwidth_gbps,
174 self.max_bandwidth_gbps,
175 self.latency_overhead_us
176 )
177 }
178}
179
180#[derive(Debug, Clone)]
186pub struct BandwidthSummary {
187 pub total_transfers: usize,
189 pub total_bytes: usize,
191 pub total_time_ms: f64,
193 pub avg_bandwidth_gbps: f64,
195 pub peak_bandwidth_gbps: f64,
197 pub per_direction: Vec<DirectionSummary>,
199}
200
201impl fmt::Display for BandwidthSummary {
202 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203 writeln!(f, "=== Bandwidth Summary ===")?;
204 writeln!(
205 f,
206 "Total: {} transfers, {} bytes, {:.3} ms",
207 self.total_transfers, self.total_bytes, self.total_time_ms
208 )?;
209 writeln!(
210 f,
211 "Avg: {:.2} GB/s, Peak: {:.2} GB/s",
212 self.avg_bandwidth_gbps, self.peak_bandwidth_gbps
213 )?;
214 for ds in &self.per_direction {
215 writeln!(f, " {ds}")?;
216 }
217 Ok(())
218 }
219}
220
221#[derive(Debug, Clone)]
231pub struct BandwidthProfiler {
232 measurements: Vec<BandwidthMeasurement>,
234 pub warmup_iterations: u32,
236 pub benchmark_iterations: u32,
238}
239
240impl Default for BandwidthProfiler {
241 fn default() -> Self {
242 Self::new()
243 }
244}
245
246impl BandwidthProfiler {
247 pub fn new() -> Self {
250 Self {
251 measurements: Vec::new(),
252 warmup_iterations: 3,
253 benchmark_iterations: 10,
254 }
255 }
256
257 pub fn with_iterations(warmup: u32, benchmark: u32) -> Self {
259 Self {
260 measurements: Vec::new(),
261 warmup_iterations: warmup,
262 benchmark_iterations: benchmark,
263 }
264 }
265
266 pub fn record(&mut self, measurement: BandwidthMeasurement) {
268 self.measurements.push(measurement);
269 }
270
271 #[inline]
273 pub fn measurement_count(&self) -> usize {
274 self.measurements.len()
275 }
276
277 #[inline]
279 pub fn measurements(&self) -> &[BandwidthMeasurement] {
280 &self.measurements
281 }
282
283 pub fn clear(&mut self) {
285 self.measurements.clear();
286 }
287
288 pub fn summary(&self) -> BandwidthSummary {
293 if self.measurements.is_empty() {
294 return BandwidthSummary {
295 total_transfers: 0,
296 total_bytes: 0,
297 total_time_ms: 0.0,
298 avg_bandwidth_gbps: 0.0,
299 peak_bandwidth_gbps: 0.0,
300 per_direction: Vec::new(),
301 };
302 }
303
304 let total_transfers = self.measurements.len();
305 let total_bytes: usize = self.measurements.iter().map(|m| m.bytes).sum();
306 let total_time_ms: f64 = self.measurements.iter().map(|m| m.elapsed_ms).sum();
307
308 let bw_sum: f64 = self.measurements.iter().map(|m| m.bandwidth_gbps).sum();
309 let avg_bandwidth_gbps = bw_sum / total_transfers as f64;
310
311 let peak_bandwidth_gbps = self
312 .measurements
313 .iter()
314 .map(|m| m.bandwidth_gbps)
315 .fold(0.0_f64, f64::max);
316
317 let directions = [
319 TransferDirection::HostToDevice,
320 TransferDirection::DeviceToHost,
321 TransferDirection::DeviceToDevice,
322 TransferDirection::HostToHost,
323 ];
324
325 let per_direction: Vec<DirectionSummary> = directions
326 .iter()
327 .filter_map(|&dir| self.compute_direction_summary(dir))
328 .collect();
329
330 BandwidthSummary {
331 total_transfers,
332 total_bytes,
333 total_time_ms,
334 avg_bandwidth_gbps,
335 peak_bandwidth_gbps,
336 per_direction,
337 }
338 }
339
340 pub fn summary_by_direction(&self, dir: TransferDirection) -> Option<DirectionSummary> {
344 self.compute_direction_summary(dir)
345 }
346
347 fn compute_direction_summary(&self, dir: TransferDirection) -> Option<DirectionSummary> {
349 let filtered: Vec<&BandwidthMeasurement> = self
350 .measurements
351 .iter()
352 .filter(|m| m.direction == dir)
353 .collect();
354
355 if filtered.is_empty() {
356 return None;
357 }
358
359 let transfer_count = filtered.len();
360 let total_bytes: usize = filtered.iter().map(|m| m.bytes).sum();
361
362 let bw_sum: f64 = filtered.iter().map(|m| m.bandwidth_gbps).sum();
363 let avg_bandwidth_gbps = bw_sum / transfer_count as f64;
364
365 let min_bandwidth_gbps = filtered
366 .iter()
367 .map(|m| m.bandwidth_gbps)
368 .fold(f64::INFINITY, f64::min);
369
370 let max_bandwidth_gbps = filtered
371 .iter()
372 .map(|m| m.bandwidth_gbps)
373 .fold(0.0_f64, f64::max);
374
375 let latency_overhead_us = filtered
379 .iter()
380 .min_by_key(|m| m.bytes)
381 .map(|m| m.latency_us())
382 .unwrap_or(0.0);
383
384 Some(DirectionSummary {
385 direction: dir,
386 transfer_count,
387 total_bytes,
388 avg_bandwidth_gbps,
389 min_bandwidth_gbps,
390 max_bandwidth_gbps,
391 latency_overhead_us,
392 })
393 }
394}
395
396#[derive(Debug, Clone)]
406pub struct BandwidthBenchmarkConfig {
407 pub sizes: Vec<usize>,
409 pub directions: Vec<TransferDirection>,
411 pub warmup_iterations: u32,
413 pub benchmark_iterations: u32,
415 pub use_pinned_memory: bool,
417}
418
419impl Default for BandwidthBenchmarkConfig {
420 fn default() -> Self {
421 Self {
422 sizes: vec![
423 1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1 << 20, 4 << 20, 16 << 20, 64 << 20, 256 << 20, ],
434 directions: vec![
435 TransferDirection::HostToDevice,
436 TransferDirection::DeviceToHost,
437 TransferDirection::DeviceToDevice,
438 TransferDirection::HostToHost,
439 ],
440 warmup_iterations: 3,
441 benchmark_iterations: 10,
442 use_pinned_memory: true,
443 }
444 }
445}
446
447impl BandwidthBenchmarkConfig {
448 pub fn with_sizes(sizes: Vec<usize>) -> Self {
450 Self {
451 sizes,
452 ..Self::default()
453 }
454 }
455
456 pub fn for_direction(direction: TransferDirection) -> Self {
458 Self {
459 directions: vec![direction],
460 ..Self::default()
461 }
462 }
463
464 pub fn set_iterations(&mut self, warmup: u32, benchmark: u32) {
466 self.warmup_iterations = warmup;
467 self.benchmark_iterations = benchmark;
468 }
469
470 pub fn total_transfers(&self) -> usize {
474 self.sizes.len() * self.directions.len() * self.benchmark_iterations as usize
475 }
476}
477
478pub fn estimate_transfer_time(bytes: usize, bandwidth_gbps: f64, latency_us: f64) -> f64 {
497 if bandwidth_gbps <= 0.0 {
498 return f64::INFINITY;
499 }
500 let latency_ms = latency_us / 1000.0;
501 let data_time_ms = bytes as f64 / (bandwidth_gbps * 1e6);
506 latency_ms + data_time_ms
507}
508
509pub fn theoretical_peak_bandwidth(pcie_gen: u32, lanes: u32) -> f64 {
534 if lanes == 0 {
535 return 0.0;
536 }
537
538 let rate_gtps: f64 = match pcie_gen {
540 1 => 2.5,
541 2 => 5.0,
542 3 => 8.0,
543 4 => 16.0,
544 5 => 32.0,
545 6 => 64.0,
546 _ => return 0.0,
547 };
548
549 let encoding_efficiency: f64 = if pcie_gen <= 2 { 0.8 } else { 128.0 / 130.0 };
552
553 rate_gtps * lanes as f64 * encoding_efficiency / 8.0
556}
557
558pub fn bandwidth_utilization(measured_gbps: f64, peak_gbps: f64) -> f64 {
570 if peak_gbps <= 0.0 {
571 return 0.0;
572 }
573 (measured_gbps / peak_gbps).clamp(0.0, 1.0)
574}
575
576pub fn format_bytes(bytes: usize) -> String {
578 const KB: usize = 1 << 10;
579 const MB: usize = 1 << 20;
580 const GB: usize = 1 << 30;
581
582 if bytes >= GB {
583 format!("{:.2} GB", bytes as f64 / GB as f64)
584 } else if bytes >= MB {
585 format!("{:.2} MB", bytes as f64 / MB as f64)
586 } else if bytes >= KB {
587 format!("{:.2} KB", bytes as f64 / KB as f64)
588 } else {
589 format!("{bytes} B")
590 }
591}
592
593pub fn describe_bandwidth(gbps: f64) -> String {
597 if gbps >= 1.0 {
598 format!("{gbps:.2} GB/s")
599 } else {
600 format!("{:.2} MB/s", gbps * 1000.0)
601 }
602}
603
604#[cfg(test)]
609mod tests {
610 use super::*;
611
612 #[test]
615 fn measurement_new_computes_bandwidth() {
616 let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1_000_000_000, 1000.0);
618 assert!((m.bandwidth_gbps - 1.0).abs() < 1e-6);
619 }
620
621 #[test]
622 fn measurement_zero_elapsed_gives_zero_bandwidth() {
623 let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1024, 0.0);
624 assert!((m.bandwidth_gbps - 0.0).abs() < f64::EPSILON);
625 }
626
627 #[test]
628 fn measurement_negative_elapsed_gives_zero_bandwidth() {
629 let m = BandwidthMeasurement::new(TransferDirection::DeviceToHost, 1024, -1.0);
630 assert!((m.bandwidth_gbps - 0.0).abs() < f64::EPSILON);
631 }
632
633 #[test]
634 fn measurement_bandwidth_mbps() {
635 let m = BandwidthMeasurement::new(TransferDirection::DeviceToDevice, 1_000_000_000, 1000.0);
636 assert!((m.bandwidth_mbps() - 1000.0).abs() < 1e-3);
637 }
638
639 #[test]
640 fn measurement_latency_us() {
641 let m = BandwidthMeasurement::new(TransferDirection::HostToHost, 1024, 2.5);
642 assert!((m.latency_us() - 2500.0).abs() < 1e-6);
643 }
644
645 #[test]
646 fn measurement_display() {
647 let m = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1048576, 0.5);
648 let s = format!("{m}");
649 assert!(s.contains("Host -> Device"));
650 assert!(s.contains("1048576"));
651 assert!(s.contains("0.500 ms"));
652 assert!(s.contains("GB/s"));
653 }
654
655 #[test]
658 fn profiler_empty_summary() {
659 let profiler = BandwidthProfiler::new();
660 let s = profiler.summary();
661 assert_eq!(s.total_transfers, 0);
662 assert_eq!(s.total_bytes, 0);
663 assert!((s.avg_bandwidth_gbps - 0.0).abs() < f64::EPSILON);
664 assert!(s.per_direction.is_empty());
665 }
666
667 #[test]
668 fn profiler_record_and_summary() {
669 let mut profiler = BandwidthProfiler::new();
670
671 profiler.record(BandwidthMeasurement::new(
673 TransferDirection::HostToDevice,
674 1 << 20,
675 0.5,
676 ));
677 profiler.record(BandwidthMeasurement::new(
678 TransferDirection::HostToDevice,
679 2 << 20,
680 1.0,
681 ));
682 profiler.record(BandwidthMeasurement::new(
684 TransferDirection::DeviceToHost,
685 512 << 10,
686 0.25,
687 ));
688
689 let s = profiler.summary();
690 assert_eq!(s.total_transfers, 3);
691 assert_eq!(s.total_bytes, (1 << 20) + (2 << 20) + (512 << 10));
692 assert!((s.total_time_ms - 1.75).abs() < 1e-9);
693 assert!(s.peak_bandwidth_gbps > 0.0);
694 assert_eq!(s.per_direction.len(), 2); }
696
697 #[test]
698 fn profiler_summary_by_direction() {
699 let mut profiler = BandwidthProfiler::new();
700
701 profiler.record(BandwidthMeasurement::new(
702 TransferDirection::HostToDevice,
703 1 << 20,
704 0.5,
705 ));
706 profiler.record(BandwidthMeasurement::new(
707 TransferDirection::DeviceToHost,
708 1 << 20,
709 0.6,
710 ));
711
712 assert!(
713 profiler
714 .summary_by_direction(TransferDirection::HostToDevice)
715 .is_some()
716 );
717 assert!(
718 profiler
719 .summary_by_direction(TransferDirection::DeviceToHost)
720 .is_some()
721 );
722 assert!(
723 profiler
724 .summary_by_direction(TransferDirection::DeviceToDevice)
725 .is_none()
726 );
727 }
728
729 #[test]
730 fn profiler_direction_summary_stats() {
731 let mut profiler = BandwidthProfiler::new();
732
733 let m1 = BandwidthMeasurement::new(TransferDirection::HostToDevice, 1_000_000, 1.0);
735 let m2 = BandwidthMeasurement::new(TransferDirection::HostToDevice, 2_000_000, 1.0);
736 let bw1 = m1.bandwidth_gbps;
737 let bw2 = m2.bandwidth_gbps;
738 profiler.record(m1);
739 profiler.record(m2);
740
741 let ds = profiler
742 .summary_by_direction(TransferDirection::HostToDevice)
743 .expect("should have HtoD summary");
744
745 assert_eq!(ds.transfer_count, 2);
746 assert_eq!(ds.total_bytes, 3_000_000);
747 assert!((ds.avg_bandwidth_gbps - (bw1 + bw2) / 2.0).abs() < 1e-9);
748 assert!((ds.min_bandwidth_gbps - bw1).abs() < 1e-9);
749 assert!((ds.max_bandwidth_gbps - bw2).abs() < 1e-9);
750 }
751
752 #[test]
753 fn profiler_with_iterations() {
754 let p = BandwidthProfiler::with_iterations(5, 20);
755 assert_eq!(p.warmup_iterations, 5);
756 assert_eq!(p.benchmark_iterations, 20);
757 assert_eq!(p.measurement_count(), 0);
758 }
759
760 #[test]
761 fn profiler_clear() {
762 let mut p = BandwidthProfiler::new();
763 p.record(BandwidthMeasurement::new(
764 TransferDirection::HostToDevice,
765 1024,
766 0.1,
767 ));
768 assert_eq!(p.measurement_count(), 1);
769 p.clear();
770 assert_eq!(p.measurement_count(), 0);
771 }
772
773 #[test]
776 fn estimate_transfer_time_basic() {
777 let t = estimate_transfer_time(1_000_000_000, 10.0, 5.0);
779 assert!((t - 100.005).abs() < 1e-6);
783 }
784
785 #[test]
786 fn estimate_transfer_time_zero_bandwidth() {
787 let t = estimate_transfer_time(1024, 0.0, 5.0);
788 assert!(t.is_infinite());
789 }
790
791 #[test]
792 fn theoretical_peak_bandwidth_pcie3_x16() {
793 let bw = theoretical_peak_bandwidth(3, 16);
794 assert!((bw - 15.754).abs() < 0.01);
796 }
797
798 #[test]
799 fn theoretical_peak_bandwidth_pcie4_x16() {
800 let bw = theoretical_peak_bandwidth(4, 16);
801 assert!((bw - 31.508).abs() < 0.01);
803 }
804
805 #[test]
806 fn theoretical_peak_bandwidth_pcie5_x16() {
807 let bw = theoretical_peak_bandwidth(5, 16);
808 assert!((bw - 63.015).abs() < 0.02);
810 }
811
812 #[test]
813 fn theoretical_peak_bandwidth_invalid_gen() {
814 assert!((theoretical_peak_bandwidth(0, 16) - 0.0).abs() < f64::EPSILON);
815 assert!((theoretical_peak_bandwidth(7, 16) - 0.0).abs() < f64::EPSILON);
816 }
817
818 #[test]
819 fn theoretical_peak_bandwidth_zero_lanes() {
820 assert!((theoretical_peak_bandwidth(3, 0) - 0.0).abs() < f64::EPSILON);
821 }
822
823 #[test]
824 fn bandwidth_utilization_basic() {
825 let u = bandwidth_utilization(12.0, 16.0);
826 assert!((u - 0.75).abs() < 1e-9);
827 }
828
829 #[test]
830 fn bandwidth_utilization_clamps_above_one() {
831 let u = bandwidth_utilization(20.0, 16.0);
832 assert!((u - 1.0).abs() < f64::EPSILON);
833 }
834
835 #[test]
836 fn bandwidth_utilization_zero_peak() {
837 let u = bandwidth_utilization(10.0, 0.0);
838 assert!((u - 0.0).abs() < f64::EPSILON);
839 }
840
841 #[test]
844 fn benchmark_config_default_sizes() {
845 let cfg = BandwidthBenchmarkConfig::default();
846 assert_eq!(cfg.sizes.len(), 10);
847 assert_eq!(cfg.sizes[0], 1 << 10); assert_eq!(cfg.sizes[9], 256 << 20); assert_eq!(cfg.directions.len(), 4);
850 assert_eq!(cfg.warmup_iterations, 3);
851 assert_eq!(cfg.benchmark_iterations, 10);
852 assert!(cfg.use_pinned_memory);
853 }
854
855 #[test]
856 fn benchmark_config_total_transfers() {
857 let cfg = BandwidthBenchmarkConfig::default();
858 assert_eq!(cfg.total_transfers(), 400);
860 }
861
862 #[test]
863 fn benchmark_config_with_sizes() {
864 let cfg = BandwidthBenchmarkConfig::with_sizes(vec![1024, 2048]);
865 assert_eq!(cfg.sizes.len(), 2);
866 assert_eq!(cfg.directions.len(), 4); }
868
869 #[test]
870 fn benchmark_config_for_direction() {
871 let cfg = BandwidthBenchmarkConfig::for_direction(TransferDirection::DeviceToDevice);
872 assert_eq!(cfg.directions.len(), 1);
873 assert_eq!(cfg.directions[0], TransferDirection::DeviceToDevice);
874 }
875
876 #[test]
879 fn summary_display_format() {
880 let mut profiler = BandwidthProfiler::new();
881 profiler.record(BandwidthMeasurement::new(
882 TransferDirection::HostToDevice,
883 1 << 20,
884 0.5,
885 ));
886 let summary = profiler.summary();
887 let display = format!("{summary}");
888 assert!(display.contains("Bandwidth Summary"));
889 assert!(display.contains("GB/s"));
890 }
891
892 #[test]
893 fn direction_display() {
894 assert_eq!(
895 format!("{}", TransferDirection::HostToDevice),
896 "Host -> Device"
897 );
898 assert_eq!(
899 format!("{}", TransferDirection::DeviceToHost),
900 "Device -> Host"
901 );
902 assert_eq!(
903 format!("{}", TransferDirection::DeviceToDevice),
904 "Device -> Device"
905 );
906 assert_eq!(format!("{}", TransferDirection::HostToHost), "Host -> Host");
907 }
908
909 #[test]
910 fn format_bytes_ranges() {
911 assert_eq!(format_bytes(500), "500 B");
912 assert_eq!(format_bytes(1024), "1.00 KB");
913 assert_eq!(format_bytes(1 << 20), "1.00 MB");
914 assert_eq!(format_bytes(1 << 30), "1.00 GB");
915 }
916
917 #[test]
918 fn describe_bandwidth_formatting() {
919 assert_eq!(describe_bandwidth(2.5), "2.50 GB/s");
920 assert_eq!(describe_bandwidth(0.5), "500.00 MB/s");
921 }
922
923 #[test]
926 fn theoretical_peak_bandwidth_pcie1_x16() {
927 let bw = theoretical_peak_bandwidth(1, 16);
928 assert!((bw - 4.0).abs() < 1e-6);
930 }
931
932 #[test]
933 fn theoretical_peak_bandwidth_pcie2_x16() {
934 let bw = theoretical_peak_bandwidth(2, 16);
935 assert!((bw - 8.0).abs() < 1e-6);
937 }
938}