Skip to main content

trueno/blis/
profiler.rs

1#![allow(missing_docs)]
2//! BLIS Profiler Integration
3//!
4//! Performance tracking for BLIS operations at multiple granularity levels.
5//! Supports Kaizen (continuous improvement) methodology.
6//!
7//! # Philosophy
8//!
9//! Kaizen (改善) means "continuous improvement." By tracking performance metrics
10//! at each level of the BLIS hierarchy, we can identify bottlenecks and measure
11//! the impact of optimizations.
12//!
13//! # Profiling Levels
14//!
15//! - **Macro**: L3 cache blocking level (NC x KC tiles)
16//! - **Midi**: L2 cache blocking level (MC x KC tiles)
17//! - **Micro**: Microkernel level (MR x NR tiles)
18//! - **Pack**: Data packing operations
19//!
20//! # Usage
21//!
22//! ```
23//! use trueno::blis::profiler::{BlisProfiler, BlisProfileLevel};
24//!
25//! let mut profiler = BlisProfiler::enabled();
26//! profiler.record(BlisProfileLevel::Micro, 1000, 384);
27//! println!("{}", profiler.summary());
28//! ```
29
30// ============================================================================
31// Kaizen (Continuous Improvement) - Performance Tracking
32// ============================================================================
33
34/// Kaizen metrics for tracking improvement
35#[derive(Debug, Clone, Default)]
36pub struct KaizenMetrics {
37    /// Total FLOP count
38    pub flops: u64,
39    /// Total time in nanoseconds
40    pub time_ns: u64,
41    /// Number of measurements
42    pub samples: usize,
43}
44
45impl KaizenMetrics {
46    /// Record a GEMM operation
47    pub fn record(&mut self, m: usize, n: usize, k: usize, duration: std::time::Duration) {
48        self.flops += 2 * m as u64 * n as u64 * k as u64;
49        self.time_ns += duration.as_nanos() as u64;
50        self.samples += 1;
51    }
52
53    /// Get achieved GFLOP/s
54    pub fn gflops(&self) -> f64 {
55        if self.time_ns == 0 {
56            return 0.0;
57        }
58        self.flops as f64 / self.time_ns as f64
59    }
60
61    /// Reset metrics
62    pub fn reset(&mut self) {
63        *self = Self::default();
64    }
65}
66
67// ============================================================================
68// BLIS Profiler Integration
69// ============================================================================
70
71/// Profiling level for BLIS operations
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
73pub enum BlisProfileLevel {
74    /// L3 block level (NC x KC tiles)
75    Macro,
76    /// L2 block level (MC x KC tiles)
77    Midi,
78    /// Microkernel level (MR x NR tiles)
79    Micro,
80    /// Packing operations
81    Pack,
82}
83
84/// Statistics for a profiling level
85#[derive(Debug, Clone, Default)]
86pub struct BlisLevelStats {
87    /// Total time in nanoseconds
88    pub total_ns: u64,
89    /// Number of invocations
90    pub count: u64,
91    /// Total FLOPs at this level
92    pub flops: u64,
93}
94
95impl BlisLevelStats {
96    /// Record a timing
97    pub fn record(&mut self, duration_ns: u64, flops: u64) {
98        self.total_ns += duration_ns;
99        self.count += 1;
100        self.flops += flops;
101    }
102
103    /// Get average time in microseconds
104    pub fn avg_us(&self) -> f64 {
105        if self.count == 0 {
106            return 0.0;
107        }
108        self.total_ns as f64 / self.count as f64 / 1000.0
109    }
110
111    /// Get GFLOP/s
112    pub fn gflops(&self) -> f64 {
113        if self.total_ns == 0 {
114            return 0.0;
115        }
116        self.flops as f64 / self.total_ns as f64
117    }
118}
119
120/// BLIS-aware profiler
121#[derive(Debug, Clone, Default)]
122pub struct BlisProfiler {
123    /// Per-level statistics
124    pub macro_stats: BlisLevelStats,
125    pub midi_stats: BlisLevelStats,
126    pub micro_stats: BlisLevelStats,
127    pub pack_stats: BlisLevelStats,
128    /// Whether profiling is enabled
129    pub enabled: bool,
130}
131
132impl BlisProfiler {
133    /// Create a new profiler (disabled by default)
134    pub fn new() -> Self {
135        Self::default()
136    }
137
138    /// Create an enabled profiler
139    pub fn enabled() -> Self {
140        Self { enabled: true, ..Self::default() }
141    }
142
143    /// Record timing for a level
144    pub fn record(&mut self, level: BlisProfileLevel, duration_ns: u64, flops: u64) {
145        if !self.enabled {
146            return;
147        }
148        match level {
149            BlisProfileLevel::Macro => self.macro_stats.record(duration_ns, flops),
150            BlisProfileLevel::Midi => self.midi_stats.record(duration_ns, flops),
151            BlisProfileLevel::Micro => self.micro_stats.record(duration_ns, flops),
152            BlisProfileLevel::Pack => self.pack_stats.record(duration_ns, 0),
153        }
154    }
155
156    /// Record AVX-512 BLIS macro-level timing.
157    /// Contract: avx512-blis-v1.yaml (C-AVX512-PROF-001)
158    pub fn record_avx512_blis(
159        &mut self,
160        m: usize,
161        n: usize,
162        k: usize,
163        duration: std::time::Duration,
164    ) {
165        if !self.enabled {
166            return;
167        }
168        let flops = 2 * m as u64 * n as u64 * k as u64;
169        let duration_ns = duration.as_nanos() as u64;
170        self.macro_stats.record(duration_ns, flops);
171    }
172
173    /// Get total GFLOP/s
174    pub fn total_gflops(&self) -> f64 {
175        let total_ns = self.macro_stats.total_ns;
176        let total_flops = self.macro_stats.flops;
177        if total_ns == 0 {
178            return 0.0;
179        }
180        total_flops as f64 / total_ns as f64
181    }
182
183    /// Generate summary report
184    pub fn summary(&self) -> String {
185        let mut s = String::new();
186        s.push_str("BLIS Profiler Summary\n");
187        s.push_str("=====================\n");
188        s.push_str(&format!(
189            "Macro: {:.1}us avg, {:.1} GFLOP/s, {} calls\n",
190            self.macro_stats.avg_us(),
191            self.macro_stats.gflops(),
192            self.macro_stats.count
193        ));
194        s.push_str(&format!(
195            "Midi:  {:.1}us avg, {:.1} GFLOP/s, {} calls\n",
196            self.midi_stats.avg_us(),
197            self.midi_stats.gflops(),
198            self.midi_stats.count
199        ));
200        s.push_str(&format!(
201            "Micro: {:.1}us avg, {:.1} GFLOP/s, {} calls\n",
202            self.micro_stats.avg_us(),
203            self.micro_stats.gflops(),
204            self.micro_stats.count
205        ));
206        s.push_str(&format!(
207            "Pack:  {:.1}us avg, {} calls\n",
208            self.pack_stats.avg_us(),
209            self.pack_stats.count
210        ));
211        s.push_str(&format!("Total: {:.1} GFLOP/s\n", self.total_gflops()));
212        s
213    }
214
215    /// Reset all statistics
216    pub fn reset(&mut self) {
217        self.macro_stats = BlisLevelStats::default();
218        self.midi_stats = BlisLevelStats::default();
219        self.micro_stats = BlisLevelStats::default();
220        self.pack_stats = BlisLevelStats::default();
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use std::time::Duration;
228
229    #[test]
230    fn test_kaizen_metrics_default() {
231        let m = KaizenMetrics::default();
232        assert_eq!(m.flops, 0);
233        assert_eq!(m.time_ns, 0);
234        assert_eq!(m.samples, 0);
235    }
236
237    #[test]
238    fn test_kaizen_metrics_record() {
239        let mut m = KaizenMetrics::default();
240        m.record(2, 3, 4, Duration::from_nanos(100));
241        assert_eq!(m.flops, 48); // 2 * 2 * 3 * 4
242        assert_eq!(m.time_ns, 100);
243        assert_eq!(m.samples, 1);
244    }
245
246    #[test]
247    fn test_kaizen_metrics_gflops() {
248        let m =
249            KaizenMetrics { flops: 1_000_000_000, time_ns: 1_000_000_000, ..Default::default() };
250        assert!((m.gflops() - 1.0).abs() < 1e-10);
251    }
252
253    #[test]
254    fn test_kaizen_metrics_gflops_zero_time() {
255        let m = KaizenMetrics::default();
256        assert!((m.gflops() - 0.0).abs() < 1e-10);
257    }
258
259    #[test]
260    fn test_kaizen_metrics_reset() {
261        let mut m = KaizenMetrics::default();
262        m.record(2, 3, 4, Duration::from_nanos(100));
263        m.reset();
264        assert_eq!(m.flops, 0);
265        assert_eq!(m.time_ns, 0);
266        assert_eq!(m.samples, 0);
267    }
268
269    #[test]
270    fn test_blis_level_stats_default() {
271        let s = BlisLevelStats::default();
272        assert_eq!(s.total_ns, 0);
273        assert_eq!(s.count, 0);
274        assert_eq!(s.flops, 0);
275    }
276
277    #[test]
278    fn test_blis_level_stats_record() {
279        let mut s = BlisLevelStats::default();
280        s.record(1000, 500);
281        assert_eq!(s.total_ns, 1000);
282        assert_eq!(s.count, 1);
283        assert_eq!(s.flops, 500);
284    }
285
286    #[test]
287    fn test_blis_level_stats_avg_us() {
288        let mut s = BlisLevelStats::default();
289        s.record(2000, 0);
290        s.record(4000, 0);
291        assert!((s.avg_us() - 3.0).abs() < 1e-10);
292    }
293
294    #[test]
295    fn test_blis_level_stats_avg_us_zero_count() {
296        let s = BlisLevelStats::default();
297        assert!((s.avg_us() - 0.0).abs() < 1e-10);
298    }
299
300    #[test]
301    fn test_blis_level_stats_gflops() {
302        let s =
303            BlisLevelStats { total_ns: 1_000_000_000, flops: 1_000_000_000, ..Default::default() };
304        assert!((s.gflops() - 1.0).abs() < 1e-10);
305    }
306
307    #[test]
308    fn test_blis_level_stats_gflops_zero_time() {
309        let s = BlisLevelStats::default();
310        assert!((s.gflops() - 0.0).abs() < 1e-10);
311    }
312
313    #[test]
314    fn test_blis_profiler_new() {
315        let p = BlisProfiler::new();
316        assert!(!p.enabled);
317    }
318
319    #[test]
320    fn test_blis_profiler_enabled() {
321        let p = BlisProfiler::enabled();
322        assert!(p.enabled);
323    }
324
325    #[test]
326    fn test_blis_profiler_record_disabled() {
327        let mut p = BlisProfiler::new();
328        p.record(BlisProfileLevel::Micro, 1000, 500);
329        assert_eq!(p.micro_stats.count, 0);
330    }
331
332    #[test]
333    fn test_blis_profiler_record_enabled() {
334        let mut p = BlisProfiler::enabled();
335        p.record(BlisProfileLevel::Micro, 1000, 500);
336        assert_eq!(p.micro_stats.count, 1);
337        assert_eq!(p.micro_stats.total_ns, 1000);
338        assert_eq!(p.micro_stats.flops, 500);
339    }
340
341    #[test]
342    fn test_blis_profiler_record_all_levels() {
343        let mut p = BlisProfiler::enabled();
344        p.record(BlisProfileLevel::Macro, 1000, 100);
345        p.record(BlisProfileLevel::Midi, 2000, 200);
346        p.record(BlisProfileLevel::Micro, 3000, 300);
347        p.record(BlisProfileLevel::Pack, 4000, 400);
348
349        assert_eq!(p.macro_stats.count, 1);
350        assert_eq!(p.midi_stats.count, 1);
351        assert_eq!(p.micro_stats.count, 1);
352        assert_eq!(p.pack_stats.count, 1);
353        assert_eq!(p.pack_stats.flops, 0); // Pack doesn't track flops
354    }
355
356    #[test]
357    fn test_blis_profiler_total_gflops() {
358        let mut p = BlisProfiler::enabled();
359        p.macro_stats.total_ns = 1_000_000_000;
360        p.macro_stats.flops = 1_000_000_000;
361        assert!((p.total_gflops() - 1.0).abs() < 1e-10);
362    }
363
364    #[test]
365    fn test_blis_profiler_total_gflops_zero_time() {
366        let p = BlisProfiler::enabled();
367        assert!((p.total_gflops() - 0.0).abs() < 1e-10);
368    }
369
370    #[test]
371    fn test_blis_profiler_summary() {
372        let p = BlisProfiler::enabled();
373        let summary = p.summary();
374        assert!(summary.contains("BLIS Profiler Summary"));
375        assert!(summary.contains("Macro:"));
376        assert!(summary.contains("Midi:"));
377        assert!(summary.contains("Micro:"));
378        assert!(summary.contains("Pack:"));
379        assert!(summary.contains("Total:"));
380    }
381
382    #[test]
383    fn test_blis_profiler_reset() {
384        let mut p = BlisProfiler::enabled();
385        p.record(BlisProfileLevel::Micro, 1000, 500);
386        p.reset();
387        assert_eq!(p.micro_stats.count, 0);
388    }
389
390    #[test]
391    fn test_blis_profile_level_debug() {
392        assert_eq!(format!("{:?}", BlisProfileLevel::Macro), "Macro");
393        assert_eq!(format!("{:?}", BlisProfileLevel::Midi), "Midi");
394        assert_eq!(format!("{:?}", BlisProfileLevel::Micro), "Micro");
395        assert_eq!(format!("{:?}", BlisProfileLevel::Pack), "Pack");
396    }
397
398    #[test]
399    fn test_blis_profile_level_eq() {
400        assert_eq!(BlisProfileLevel::Macro, BlisProfileLevel::Macro);
401        assert_ne!(BlisProfileLevel::Macro, BlisProfileLevel::Micro);
402    }
403
404    #[test]
405    fn test_blis_profile_level_hash() {
406        use std::collections::HashSet;
407        let mut set = HashSet::new();
408        set.insert(BlisProfileLevel::Macro);
409        set.insert(BlisProfileLevel::Micro);
410        assert_eq!(set.len(), 2);
411        assert!(set.contains(&BlisProfileLevel::Macro));
412    }
413}