kizzasi_core/
profiling.rs

1//! Performance profiling utilities for kizzasi-core
2//!
3//! Provides tools for measuring and analyzing performance:
4//! - CPU cycle counting
5//! - Time measurement with high precision
6//! - Memory profiling
7//! - Operation counters
8//! - Statistical analysis
9
10#[cfg(not(feature = "std"))]
11use alloc::string::String;
12#[cfg(not(feature = "std"))]
13use alloc::vec::Vec;
14
15use core::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
16use std::time::{Duration, Instant};
17
18/// Performance counter for tracking operations
19#[derive(Debug, Default)]
20pub struct PerfCounter {
21    count: AtomicUsize,
22    total_time_ns: AtomicU64,
23    min_time_ns: AtomicU64,
24    max_time_ns: AtomicU64,
25}
26
27impl PerfCounter {
28    /// Create a new performance counter
29    pub const fn new() -> Self {
30        Self {
31            count: AtomicUsize::new(0),
32            total_time_ns: AtomicU64::new(0),
33            min_time_ns: AtomicU64::new(u64::MAX),
34            max_time_ns: AtomicU64::new(0),
35        }
36    }
37
38    /// Record a measurement
39    pub fn record(&self, duration_ns: u64) {
40        self.count.fetch_add(1, Ordering::Relaxed);
41        self.total_time_ns.fetch_add(duration_ns, Ordering::Relaxed);
42
43        // Update min
44        let mut current_min = self.min_time_ns.load(Ordering::Relaxed);
45        while duration_ns < current_min {
46            match self.min_time_ns.compare_exchange_weak(
47                current_min,
48                duration_ns,
49                Ordering::Relaxed,
50                Ordering::Relaxed,
51            ) {
52                Ok(_) => break,
53                Err(x) => current_min = x,
54            }
55        }
56
57        // Update max
58        let mut current_max = self.max_time_ns.load(Ordering::Relaxed);
59        while duration_ns > current_max {
60            match self.max_time_ns.compare_exchange_weak(
61                current_max,
62                duration_ns,
63                Ordering::Relaxed,
64                Ordering::Relaxed,
65            ) {
66                Ok(_) => break,
67                Err(x) => current_max = x,
68            }
69        }
70    }
71
72    /// Get the number of measurements
73    pub fn count(&self) -> usize {
74        self.count.load(Ordering::Relaxed)
75    }
76
77    /// Get the total time in nanoseconds
78    pub fn total_ns(&self) -> u64 {
79        self.total_time_ns.load(Ordering::Relaxed)
80    }
81
82    /// Get the average time in nanoseconds
83    pub fn average_ns(&self) -> u64 {
84        let count = self.count();
85        if count == 0 {
86            return 0;
87        }
88        self.total_ns() / count as u64
89    }
90
91    /// Get the minimum time in nanoseconds
92    pub fn min_ns(&self) -> u64 {
93        let min = self.min_time_ns.load(Ordering::Relaxed);
94        if min == u64::MAX {
95            0
96        } else {
97            min
98        }
99    }
100
101    /// Get the maximum time in nanoseconds
102    pub fn max_ns(&self) -> u64 {
103        self.max_time_ns.load(Ordering::Relaxed)
104    }
105
106    /// Reset the counter
107    pub fn reset(&self) {
108        self.count.store(0, Ordering::Relaxed);
109        self.total_time_ns.store(0, Ordering::Relaxed);
110        self.min_time_ns.store(u64::MAX, Ordering::Relaxed);
111        self.max_time_ns.store(0, Ordering::Relaxed);
112    }
113
114    /// Get statistics summary
115    pub fn stats(&self) -> CounterStats {
116        CounterStats {
117            count: self.count(),
118            total_ns: self.total_ns(),
119            average_ns: self.average_ns(),
120            min_ns: self.min_ns(),
121            max_ns: self.max_ns(),
122        }
123    }
124}
125
126/// Statistics snapshot from a performance counter
127#[derive(Debug, Clone, Copy)]
128pub struct CounterStats {
129    pub count: usize,
130    pub total_ns: u64,
131    pub average_ns: u64,
132    pub min_ns: u64,
133    pub max_ns: u64,
134}
135
136impl CounterStats {
137    /// Convert to microseconds
138    pub fn average_us(&self) -> f64 {
139        self.average_ns as f64 / 1000.0
140    }
141
142    /// Convert to milliseconds
143    pub fn average_ms(&self) -> f64 {
144        self.average_ns as f64 / 1_000_000.0
145    }
146
147    /// Get throughput (operations per second)
148    pub fn throughput(&self) -> f64 {
149        if self.average_ns == 0 {
150            return 0.0;
151        }
152        1_000_000_000.0 / self.average_ns as f64
153    }
154}
155
156/// Timer for measuring elapsed time
157pub struct Timer {
158    start: Instant,
159}
160
161impl Timer {
162    /// Start a new timer
163    pub fn start() -> Self {
164        Self {
165            start: Instant::now(),
166        }
167    }
168
169    /// Get elapsed time in nanoseconds
170    pub fn elapsed_ns(&self) -> u64 {
171        self.start.elapsed().as_nanos() as u64
172    }
173
174    /// Get elapsed time as Duration
175    pub fn elapsed(&self) -> Duration {
176        self.start.elapsed()
177    }
178
179    /// Reset the timer
180    pub fn reset(&mut self) {
181        self.start = Instant::now();
182    }
183}
184
185/// RAII scope timer that records to a counter on drop
186pub struct ScopeTimer<'a> {
187    counter: &'a PerfCounter,
188    start: Instant,
189}
190
191impl<'a> ScopeTimer<'a> {
192    /// Create a new scope timer
193    pub fn new(counter: &'a PerfCounter) -> Self {
194        Self {
195            counter,
196            start: Instant::now(),
197        }
198    }
199}
200
201impl<'a> Drop for ScopeTimer<'a> {
202    fn drop(&mut self) {
203        let elapsed_ns = self.start.elapsed().as_nanos() as u64;
204        self.counter.record(elapsed_ns);
205    }
206}
207
208/// Memory profiler for tracking allocations
209#[derive(Debug, Default)]
210pub struct MemoryProfiler {
211    current_bytes: AtomicUsize,
212    peak_bytes: AtomicUsize,
213    allocations: AtomicUsize,
214    deallocations: AtomicUsize,
215}
216
217impl MemoryProfiler {
218    /// Create a new memory profiler
219    pub const fn new() -> Self {
220        Self {
221            current_bytes: AtomicUsize::new(0),
222            peak_bytes: AtomicUsize::new(0),
223            allocations: AtomicUsize::new(0),
224            deallocations: AtomicUsize::new(0),
225        }
226    }
227
228    /// Record an allocation
229    pub fn allocate(&self, bytes: usize) {
230        self.allocations.fetch_add(1, Ordering::Relaxed);
231        let new_current = self.current_bytes.fetch_add(bytes, Ordering::Relaxed) + bytes;
232
233        // Update peak
234        let mut current_peak = self.peak_bytes.load(Ordering::Relaxed);
235        while new_current > current_peak {
236            match self.peak_bytes.compare_exchange_weak(
237                current_peak,
238                new_current,
239                Ordering::Relaxed,
240                Ordering::Relaxed,
241            ) {
242                Ok(_) => break,
243                Err(x) => current_peak = x,
244            }
245        }
246    }
247
248    /// Record a deallocation
249    pub fn deallocate(&self, bytes: usize) {
250        self.deallocations.fetch_add(1, Ordering::Relaxed);
251        self.current_bytes.fetch_sub(bytes, Ordering::Relaxed);
252    }
253
254    /// Get current memory usage
255    pub fn current_bytes(&self) -> usize {
256        self.current_bytes.load(Ordering::Relaxed)
257    }
258
259    /// Get peak memory usage
260    pub fn peak_bytes(&self) -> usize {
261        self.peak_bytes.load(Ordering::Relaxed)
262    }
263
264    /// Get allocation count
265    pub fn allocations(&self) -> usize {
266        self.allocations.load(Ordering::Relaxed)
267    }
268
269    /// Get deallocation count
270    pub fn deallocations(&self) -> usize {
271        self.deallocations.load(Ordering::Relaxed)
272    }
273
274    /// Get net allocations (allocations - deallocations)
275    pub fn net_allocations(&self) -> isize {
276        self.allocations() as isize - self.deallocations() as isize
277    }
278
279    /// Reset the profiler
280    pub fn reset(&self) {
281        self.current_bytes.store(0, Ordering::Relaxed);
282        self.peak_bytes.store(0, Ordering::Relaxed);
283        self.allocations.store(0, Ordering::Relaxed);
284        self.deallocations.store(0, Ordering::Relaxed);
285    }
286
287    /// Get memory statistics
288    pub fn stats(&self) -> ProfilerMemoryStats {
289        ProfilerMemoryStats {
290            current_bytes: self.current_bytes(),
291            peak_bytes: self.peak_bytes(),
292            allocations: self.allocations(),
293            deallocations: self.deallocations(),
294        }
295    }
296}
297
298/// Memory statistics snapshot from profiler
299#[derive(Debug, Clone, Copy)]
300pub struct ProfilerMemoryStats {
301    pub current_bytes: usize,
302    pub peak_bytes: usize,
303    pub allocations: usize,
304    pub deallocations: usize,
305}
306
307impl ProfilerMemoryStats {
308    /// Get current memory in MB
309    pub fn current_mb(&self) -> f64 {
310        self.current_bytes as f64 / (1024.0 * 1024.0)
311    }
312
313    /// Get peak memory in MB
314    pub fn peak_mb(&self) -> f64 {
315        self.peak_bytes as f64 / (1024.0 * 1024.0)
316    }
317}
318
319/// Profiling session that tracks multiple counters
320#[derive(Debug)]
321pub struct ProfilingSession {
322    name: String,
323    counters: Vec<(String, PerfCounter)>,
324    memory: MemoryProfiler,
325    start_time: Instant,
326}
327
328impl ProfilingSession {
329    /// Create a new profiling session
330    pub fn new(name: impl Into<String>) -> Self {
331        Self {
332            name: name.into(),
333            counters: Vec::new(),
334            memory: MemoryProfiler::new(),
335            start_time: Instant::now(),
336        }
337    }
338
339    /// Add a counter to track
340    pub fn add_counter(&mut self, name: impl Into<String>) -> usize {
341        let idx = self.counters.len();
342        self.counters.push((name.into(), PerfCounter::new()));
343        idx
344    }
345
346    /// Get a counter by index
347    pub fn counter(&self, idx: usize) -> Option<&PerfCounter> {
348        self.counters.get(idx).map(|(_, c)| c)
349    }
350
351    /// Get the memory profiler
352    pub fn memory(&self) -> &MemoryProfiler {
353        &self.memory
354    }
355
356    /// Get total elapsed time
357    pub fn elapsed(&self) -> Duration {
358        self.start_time.elapsed()
359    }
360
361    /// Generate a report
362    pub fn report(&self) -> String {
363        let mut output = format!("Profiling Session: {}\n", self.name);
364        output.push_str(&format!("Total Time: {:?}\n\n", self.elapsed()));
365
366        output.push_str("Performance Counters:\n");
367        for (name, counter) in &self.counters {
368            let stats = counter.stats();
369            output.push_str(&format!(
370                "  {}: {} calls, avg: {:.2}μs, min: {:.2}μs, max: {:.2}μs\n",
371                name,
372                stats.count,
373                stats.average_us(),
374                stats.min_ns as f64 / 1000.0,
375                stats.max_ns as f64 / 1000.0
376            ));
377        }
378
379        let mem_stats = self.memory.stats();
380        output.push_str("\nMemory:\n");
381        output.push_str(&format!("  Current: {:.2} MB\n", mem_stats.current_mb()));
382        output.push_str(&format!("  Peak: {:.2} MB\n", mem_stats.peak_mb()));
383        output.push_str(&format!(
384            "  Allocations: {} (Net: {})\n",
385            mem_stats.allocations,
386            mem_stats.allocations as isize - mem_stats.deallocations as isize
387        ));
388
389        output
390    }
391}
392
393/// Macro to time a block of code
394#[macro_export]
395macro_rules! time_block {
396    ($counter:expr, $block:expr) => {{
397        let _timer = $crate::profiling::ScopeTimer::new($counter);
398        $block
399    }};
400}
401
402/// Macro to profile memory usage of a block
403#[macro_export]
404macro_rules! profile_memory {
405    ($profiler:expr, $size:expr, $block:expr) => {{
406        $profiler.allocate($size);
407        let result = $block;
408        $profiler.deallocate($size);
409        result
410    }};
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use std::thread;
417
418    #[test]
419    fn test_perf_counter() {
420        let counter = PerfCounter::new();
421
422        counter.record(100);
423        counter.record(200);
424        counter.record(150);
425
426        assert_eq!(counter.count(), 3);
427        assert_eq!(counter.total_ns(), 450);
428        assert_eq!(counter.average_ns(), 150);
429        assert_eq!(counter.min_ns(), 100);
430        assert_eq!(counter.max_ns(), 200);
431    }
432
433    #[test]
434    fn test_timer() {
435        let timer = Timer::start();
436        thread::sleep(Duration::from_millis(10));
437        let elapsed = timer.elapsed_ns();
438
439        // Should be at least 10ms
440        assert!(elapsed >= 10_000_000);
441    }
442
443    #[test]
444    fn test_scope_timer() {
445        let counter = PerfCounter::new();
446
447        {
448            let _timer = ScopeTimer::new(&counter);
449            thread::sleep(Duration::from_millis(10));
450        }
451
452        assert_eq!(counter.count(), 1);
453        assert!(counter.total_ns() >= 10_000_000);
454    }
455
456    #[test]
457    fn test_memory_profiler() {
458        let profiler = MemoryProfiler::new();
459
460        profiler.allocate(1024);
461        assert_eq!(profiler.current_bytes(), 1024);
462        assert_eq!(profiler.peak_bytes(), 1024);
463
464        profiler.allocate(2048);
465        assert_eq!(profiler.current_bytes(), 3072);
466        assert_eq!(profiler.peak_bytes(), 3072);
467
468        profiler.deallocate(1024);
469        assert_eq!(profiler.current_bytes(), 2048);
470        assert_eq!(profiler.peak_bytes(), 3072); // Peak stays
471
472        assert_eq!(profiler.allocations(), 2);
473        assert_eq!(profiler.deallocations(), 1);
474    }
475
476    #[test]
477    fn test_profiling_session() {
478        let mut session = ProfilingSession::new("test_session");
479
480        let counter_idx = session.add_counter("test_op");
481        let counter = session.counter(counter_idx).unwrap();
482
483        counter.record(100);
484        counter.record(200);
485
486        session.memory().allocate(1024);
487
488        let report = session.report();
489        assert!(report.contains("test_session"));
490        assert!(report.contains("test_op"));
491        assert!(report.contains("2 calls"));
492    }
493
494    #[test]
495    fn test_counter_stats() {
496        let counter = PerfCounter::new();
497        counter.record(1_000_000); // 1ms
498
499        let stats = counter.stats();
500        assert_eq!(stats.average_us(), 1000.0);
501        assert_eq!(stats.average_ms(), 1.0);
502        assert_eq!(stats.throughput(), 1000.0); // 1000 ops/sec
503    }
504
505    #[test]
506    fn test_concurrent_counter() {
507        use std::sync::Arc;
508
509        let counter = Arc::new(PerfCounter::new());
510        let mut handles = vec![];
511
512        // Spawn multiple threads recording measurements
513        for i in 0..10 {
514            let counter_clone = Arc::clone(&counter);
515            let handle = thread::spawn(move || {
516                for _ in 0..100 {
517                    counter_clone.record((i + 1) * 100);
518                }
519            });
520            handles.push(handle);
521        }
522
523        for handle in handles {
524            handle.join().unwrap();
525        }
526
527        assert_eq!(counter.count(), 1000);
528    }
529
530    #[test]
531    fn test_time_block_macro() {
532        let counter = PerfCounter::new();
533
534        time_block!(&counter, {
535            thread::sleep(Duration::from_millis(10));
536        });
537
538        assert_eq!(counter.count(), 1);
539        assert!(counter.total_ns() >= 10_000_000);
540    }
541
542    #[test]
543    fn test_profile_memory_macro() {
544        let profiler = MemoryProfiler::new();
545
546        profile_memory!(&profiler, 1024, {
547            // Do some work
548            let _v = vec![0u8; 1024];
549        });
550
551        assert_eq!(profiler.allocations(), 1);
552        assert_eq!(profiler.deallocations(), 1);
553        assert_eq!(profiler.current_bytes(), 0);
554    }
555}