runmat_accelerate/
telemetry.rs1use std::sync::{
2 atomic::{AtomicU64, Ordering},
3 Mutex,
4};
5
6use runmat_accelerate_api::{
7 KernelAttrTelemetry, KernelLaunchTelemetry, ProviderDispatchStats, ProviderTelemetry,
8};
9
10const MAX_KERNEL_LAUNCH_EVENTS: usize = 64;
11
12#[derive(Default)]
13pub struct AccelTelemetry {
14 fused_elementwise_count: AtomicU64,
15 fused_elementwise_wall_ns: AtomicU64,
16 fused_reduction_count: AtomicU64,
17 fused_reduction_wall_ns: AtomicU64,
18 matmul_count: AtomicU64,
19 matmul_wall_ns: AtomicU64,
20 upload_bytes: AtomicU64,
21 download_bytes: AtomicU64,
22 kernel_launches: Mutex<Vec<KernelLaunchTelemetry>>,
23}
24
25impl AccelTelemetry {
26 pub fn new() -> Self {
27 Self::default()
28 }
29
30 pub fn record_upload_bytes(&self, bytes: u64) {
31 if bytes > 0 {
32 self.upload_bytes.fetch_add(bytes, Ordering::Relaxed);
33 }
34 }
35
36 pub fn record_download_bytes(&self, bytes: u64) {
37 if bytes > 0 {
38 self.download_bytes.fetch_add(bytes, Ordering::Relaxed);
39 }
40 }
41
42 pub fn record_fused_elementwise(&self, wall_ns: u64) {
43 self.fused_elementwise_count.fetch_add(1, Ordering::Relaxed);
44 if wall_ns > 0 {
45 self.fused_elementwise_wall_ns
46 .fetch_add(wall_ns, Ordering::Relaxed);
47 }
48 }
49
50 pub fn record_fused_reduction(&self, wall_ns: u64) {
51 self.fused_reduction_count.fetch_add(1, Ordering::Relaxed);
52 if wall_ns > 0 {
53 self.fused_reduction_wall_ns
54 .fetch_add(wall_ns, Ordering::Relaxed);
55 }
56 }
57
58 pub fn record_matmul(&self, wall_ns: u64) {
59 self.matmul_count.fetch_add(1, Ordering::Relaxed);
60 if wall_ns > 0 {
61 self.matmul_wall_ns.fetch_add(wall_ns, Ordering::Relaxed);
62 }
63 }
64
65 pub fn reset(&self) {
66 self.fused_elementwise_count.store(0, Ordering::Relaxed);
67 self.fused_elementwise_wall_ns.store(0, Ordering::Relaxed);
68 self.fused_reduction_count.store(0, Ordering::Relaxed);
69 self.fused_reduction_wall_ns.store(0, Ordering::Relaxed);
70 self.matmul_count.store(0, Ordering::Relaxed);
71 self.matmul_wall_ns.store(0, Ordering::Relaxed);
72 self.upload_bytes.store(0, Ordering::Relaxed);
73 self.download_bytes.store(0, Ordering::Relaxed);
74 if let Ok(mut guard) = self.kernel_launches.lock() {
75 guard.clear();
76 }
77 }
78
79 pub fn snapshot(
80 &self,
81 fusion_cache_hits: u64,
82 fusion_cache_misses: u64,
83 bind_group_cache_hits: u64,
84 bind_group_cache_misses: u64,
85 bind_group_cache_by_layout: Option<Vec<runmat_accelerate_api::BindGroupLayoutTelemetry>>,
86 ) -> ProviderTelemetry {
87 let kernel_launches = self
88 .kernel_launches
89 .lock()
90 .map(|events| events.clone())
91 .unwrap_or_default();
92 ProviderTelemetry {
93 fused_elementwise: ProviderDispatchStats {
94 count: self.fused_elementwise_count.load(Ordering::Relaxed),
95 total_wall_time_ns: self.fused_elementwise_wall_ns.load(Ordering::Relaxed),
96 },
97 fused_reduction: ProviderDispatchStats {
98 count: self.fused_reduction_count.load(Ordering::Relaxed),
99 total_wall_time_ns: self.fused_reduction_wall_ns.load(Ordering::Relaxed),
100 },
101 matmul: ProviderDispatchStats {
102 count: self.matmul_count.load(Ordering::Relaxed),
103 total_wall_time_ns: self.matmul_wall_ns.load(Ordering::Relaxed),
104 },
105 upload_bytes: self.upload_bytes.load(Ordering::Relaxed),
106 download_bytes: self.download_bytes.load(Ordering::Relaxed),
107 fusion_cache_hits,
108 fusion_cache_misses,
109 bind_group_cache_hits,
110 bind_group_cache_misses,
111 bind_group_cache_by_layout,
112 kernel_launches,
113 }
114 }
115}
116
117fn saturating_duration_ns(duration: std::time::Duration) -> u64 {
118 duration.as_nanos().min(u64::MAX as u128) as u64
119}
120
121impl AccelTelemetry {
122 pub fn record_fused_elementwise_duration(&self, duration: std::time::Duration) {
123 self.record_fused_elementwise(saturating_duration_ns(duration));
124 }
125
126 pub fn record_fused_reduction_duration(&self, duration: std::time::Duration) {
127 self.record_fused_reduction(saturating_duration_ns(duration));
128 }
129
130 pub fn record_matmul_duration(&self, duration: std::time::Duration) {
131 self.record_matmul(saturating_duration_ns(duration));
132 }
133
134 pub fn record_kernel_launch(
135 &self,
136 kernel: &'static str,
137 precision: Option<&str>,
138 shape: &[(&str, u64)],
139 tuning: &[(&str, u64)],
140 ) {
141 let event = KernelLaunchTelemetry {
142 kernel: kernel.to_string(),
143 precision: precision.map(|p| p.to_string()),
144 shape: Self::pairs_to_attrs(shape),
145 tuning: Self::pairs_to_attrs(tuning),
146 };
147 if let Ok(mut guard) = self.kernel_launches.lock() {
148 if guard.len() >= MAX_KERNEL_LAUNCH_EVENTS {
149 let drop = guard.len() + 1 - MAX_KERNEL_LAUNCH_EVENTS;
150 guard.drain(0..drop);
151 }
152 guard.push(event);
153 }
154 }
155
156 fn pairs_to_attrs(pairs: &[(&str, u64)]) -> Vec<KernelAttrTelemetry> {
157 pairs
158 .iter()
159 .map(|(k, v)| KernelAttrTelemetry {
160 key: (*k).to_string(),
161 value: *v,
162 })
163 .collect()
164 }
165}