runmat_accelerate/
telemetry.rs

1use std::sync::{
2    atomic::{AtomicU64, Ordering},
3    Mutex,
4};
5
6use runmat_accelerate_api::{
7    KernelAttrTelemetry, KernelLaunchTelemetry, ProviderDispatchStats, ProviderTelemetry,
8};
9
10const MAX_KERNEL_LAUNCH_EVENTS: usize = 64;
11
12#[derive(Default)]
13pub struct AccelTelemetry {
14    fused_elementwise_count: AtomicU64,
15    fused_elementwise_wall_ns: AtomicU64,
16    fused_reduction_count: AtomicU64,
17    fused_reduction_wall_ns: AtomicU64,
18    matmul_count: AtomicU64,
19    matmul_wall_ns: AtomicU64,
20    upload_bytes: AtomicU64,
21    download_bytes: AtomicU64,
22    kernel_launches: Mutex<Vec<KernelLaunchTelemetry>>,
23}
24
25impl AccelTelemetry {
26    pub fn new() -> Self {
27        Self::default()
28    }
29
30    pub fn record_upload_bytes(&self, bytes: u64) {
31        if bytes > 0 {
32            self.upload_bytes.fetch_add(bytes, Ordering::Relaxed);
33        }
34    }
35
36    pub fn record_download_bytes(&self, bytes: u64) {
37        if bytes > 0 {
38            self.download_bytes.fetch_add(bytes, Ordering::Relaxed);
39        }
40    }
41
42    pub fn record_fused_elementwise(&self, wall_ns: u64) {
43        self.fused_elementwise_count.fetch_add(1, Ordering::Relaxed);
44        if wall_ns > 0 {
45            self.fused_elementwise_wall_ns
46                .fetch_add(wall_ns, Ordering::Relaxed);
47        }
48    }
49
50    pub fn record_fused_reduction(&self, wall_ns: u64) {
51        self.fused_reduction_count.fetch_add(1, Ordering::Relaxed);
52        if wall_ns > 0 {
53            self.fused_reduction_wall_ns
54                .fetch_add(wall_ns, Ordering::Relaxed);
55        }
56    }
57
58    pub fn record_matmul(&self, wall_ns: u64) {
59        self.matmul_count.fetch_add(1, Ordering::Relaxed);
60        if wall_ns > 0 {
61            self.matmul_wall_ns.fetch_add(wall_ns, Ordering::Relaxed);
62        }
63    }
64
65    pub fn reset(&self) {
66        self.fused_elementwise_count.store(0, Ordering::Relaxed);
67        self.fused_elementwise_wall_ns.store(0, Ordering::Relaxed);
68        self.fused_reduction_count.store(0, Ordering::Relaxed);
69        self.fused_reduction_wall_ns.store(0, Ordering::Relaxed);
70        self.matmul_count.store(0, Ordering::Relaxed);
71        self.matmul_wall_ns.store(0, Ordering::Relaxed);
72        self.upload_bytes.store(0, Ordering::Relaxed);
73        self.download_bytes.store(0, Ordering::Relaxed);
74        if let Ok(mut guard) = self.kernel_launches.lock() {
75            guard.clear();
76        }
77    }
78
79    pub fn snapshot(
80        &self,
81        fusion_cache_hits: u64,
82        fusion_cache_misses: u64,
83        bind_group_cache_hits: u64,
84        bind_group_cache_misses: u64,
85        bind_group_cache_by_layout: Option<Vec<runmat_accelerate_api::BindGroupLayoutTelemetry>>,
86    ) -> ProviderTelemetry {
87        let kernel_launches = self
88            .kernel_launches
89            .lock()
90            .map(|events| events.clone())
91            .unwrap_or_default();
92        ProviderTelemetry {
93            fused_elementwise: ProviderDispatchStats {
94                count: self.fused_elementwise_count.load(Ordering::Relaxed),
95                total_wall_time_ns: self.fused_elementwise_wall_ns.load(Ordering::Relaxed),
96            },
97            fused_reduction: ProviderDispatchStats {
98                count: self.fused_reduction_count.load(Ordering::Relaxed),
99                total_wall_time_ns: self.fused_reduction_wall_ns.load(Ordering::Relaxed),
100            },
101            matmul: ProviderDispatchStats {
102                count: self.matmul_count.load(Ordering::Relaxed),
103                total_wall_time_ns: self.matmul_wall_ns.load(Ordering::Relaxed),
104            },
105            upload_bytes: self.upload_bytes.load(Ordering::Relaxed),
106            download_bytes: self.download_bytes.load(Ordering::Relaxed),
107            fusion_cache_hits,
108            fusion_cache_misses,
109            bind_group_cache_hits,
110            bind_group_cache_misses,
111            bind_group_cache_by_layout,
112            kernel_launches,
113        }
114    }
115}
116
117fn saturating_duration_ns(duration: std::time::Duration) -> u64 {
118    duration.as_nanos().min(u64::MAX as u128) as u64
119}
120
121impl AccelTelemetry {
122    pub fn record_fused_elementwise_duration(&self, duration: std::time::Duration) {
123        self.record_fused_elementwise(saturating_duration_ns(duration));
124    }
125
126    pub fn record_fused_reduction_duration(&self, duration: std::time::Duration) {
127        self.record_fused_reduction(saturating_duration_ns(duration));
128    }
129
130    pub fn record_matmul_duration(&self, duration: std::time::Duration) {
131        self.record_matmul(saturating_duration_ns(duration));
132    }
133
134    pub fn record_kernel_launch(
135        &self,
136        kernel: &'static str,
137        precision: Option<&str>,
138        shape: &[(&str, u64)],
139        tuning: &[(&str, u64)],
140    ) {
141        let event = KernelLaunchTelemetry {
142            kernel: kernel.to_string(),
143            precision: precision.map(|p| p.to_string()),
144            shape: Self::pairs_to_attrs(shape),
145            tuning: Self::pairs_to_attrs(tuning),
146        };
147        if let Ok(mut guard) = self.kernel_launches.lock() {
148            if guard.len() >= MAX_KERNEL_LAUNCH_EVENTS {
149                let drop = guard.len() + 1 - MAX_KERNEL_LAUNCH_EVENTS;
150                guard.drain(0..drop);
151            }
152            guard.push(event);
153        }
154    }
155
156    fn pairs_to_attrs(pairs: &[(&str, u64)]) -> Vec<KernelAttrTelemetry> {
157        pairs
158            .iter()
159            .map(|(k, v)| KernelAttrTelemetry {
160                key: (*k).to_string(),
161                value: *v,
162            })
163            .collect()
164    }
165}