Skip to main content

trueno/brick/
profiling.rs

1//! High-Performance Profiling Patterns
2//!
3//! CPU cycle counters, cached time service, and page fault detection.
4//! Based on Phase 11: E.9 patterns.
5
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Instant;
8
9// ============================================================================
10// CPU Cycle Counters
11// ============================================================================
12
13/// CPU cycle counter using RDTSCP (x86_64) or CNTVCT_EL0 (ARM64).
14///
15/// Returns actual CPU cycles for frequency-invariant performance analysis.
16/// Use with `elapsed_ns` to calculate IPC (Instructions Per Cycle).
17///
18/// # Example
19/// ```rust,ignore
20/// let start_cycles = cpu_cycles();
21/// // ... operation ...
22/// let end_cycles = cpu_cycles();
23/// let cycles_per_element = (end_cycles - start_cycles) / num_elements;
24/// ```
25#[cfg(target_arch = "x86_64")]
26#[inline]
27pub fn cpu_cycles() -> u64 {
28    // SAFETY: __rdtscp reads the timestamp counter and has no safety invariants
29    // beyond requiring x86_64 (guaranteed by cfg).
30    unsafe {
31        let mut _aux: u32 = 0;
32        core::arch::x86_64::__rdtscp(&mut _aux)
33    }
34}
35
36/// CPU cycle counter for ARM64 using CNTVCT_EL0 register.
37#[cfg(target_arch = "aarch64")]
38#[inline]
39pub fn cpu_cycles() -> u64 {
40    let cycles: u64;
41    // SAFETY: CNTVCT_EL0 is always readable from EL0 on aarch64; no invariants.
42    unsafe {
43        core::arch::asm!("mrs {}, cntvct_el0", out(reg) cycles);
44    }
45    cycles
46}
47
48/// Fallback for unsupported architectures (returns 0).
49#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
50#[inline]
51pub fn cpu_cycles() -> u64 {
52    0
53}
54
55// ============================================================================
56// Cached Time Service (Pattern 2 from actix-web)
57// ============================================================================
58
59/// Global cached instant in nanoseconds, updated by background thread.
60static CACHED_NANOS: AtomicU64 = AtomicU64::new(0);
61
62/// Epoch instant for cached time calculation.
63static EPOCH: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();
64
65/// Guard ensuring the time service background thread is spawned at most once.
66static TIME_SERVICE_INIT: std::sync::OnceLock<()> = std::sync::OnceLock::new();
67
68/// Initialize the cached time service (call once at startup).
69///
70/// Spawns a background thread that updates cached time every 100µs.
71/// This avoids syscall overhead when profiling high-frequency operations.
72///
73/// # Example
74/// ```rust,ignore
75/// trueno::brick::init_time_service();
76/// // Later...
77/// let ns = trueno::brick::cached_nanos();
78/// ```
79pub fn init_time_service() {
80    TIME_SERVICE_INIT.get_or_init(|| {
81        let epoch = *EPOCH.get_or_init(Instant::now);
82        CACHED_NANOS.store(0, Ordering::Relaxed);
83
84        std::thread::Builder::new()
85            .name("trueno-time-service".into())
86            .spawn(move || loop {
87                std::thread::sleep(std::time::Duration::from_micros(100));
88                let elapsed = epoch.elapsed().as_nanos() as u64;
89                CACHED_NANOS.store(elapsed, Ordering::Relaxed);
90            })
91            .expect("Failed to spawn time service thread");
92    });
93}
94
95/// Get cached time in nanoseconds since epoch (NO SYSCALL, ~1ns overhead).
96///
97/// Returns 0 if time service is not initialized. For accurate timing,
98/// call `init_time_service()` at application startup.
99#[inline]
100pub fn cached_nanos() -> u64 {
101    CACHED_NANOS.load(Ordering::Relaxed)
102}
103
104/// Get cached time or fall back to Instant::now() if service not initialized.
105#[inline]
106pub fn cached_nanos_or_now() -> u64 {
107    let cached = CACHED_NANOS.load(Ordering::Relaxed);
108    if cached == 0 && TIME_SERVICE_INIT.get().is_none() {
109        // Fall back to syscall if time service not initialized
110        EPOCH.get_or_init(Instant::now).elapsed().as_nanos() as u64
111    } else {
112        cached
113    }
114}
115
116// ============================================================================
117// Page Fault Detection (Pattern from B4 Investigation)
118// ============================================================================
119
120/// Get current minor and major page fault counts (Linux only).
121///
122/// Returns (minor_faults, major_faults).
123/// - Minor faults: Page in memory but not mapped (soft fault)
124/// - Major faults: Page on disk, requires I/O (hard fault)
125#[cfg(target_os = "linux")]
126pub fn get_page_faults() -> (u64, u64) {
127    use std::fs;
128    let stat = fs::read_to_string("/proc/self/stat").unwrap_or_default();
129    let fields: Vec<&str> = stat.split_whitespace().collect();
130    if fields.len() > 12 {
131        let minor = fields[9].parse().unwrap_or(0);
132        let major = fields[11].parse().unwrap_or(0);
133        (minor, major)
134    } else {
135        (0, 0)
136    }
137}
138
139/// Fallback for non-Linux platforms.
140#[cfg(not(target_os = "linux"))]
141pub fn get_page_faults() -> (u64, u64) {
142    (0, 0)
143}
144
145/// Execute a closure while tracking page faults.
146///
147/// Logs a warning if more than 1000 minor faults or any major faults occur.
148///
149/// # Example
150/// ```rust,ignore
151/// let result = with_page_fault_tracking("mmap_copy", || {
152///     data.copy_from_slice(&mmap_region);
153/// });
154/// ```
155pub fn with_page_fault_tracking<T, F: FnOnce() -> T>(name: &str, f: F) -> (T, u64, u64) {
156    let (minor_before, major_before) = get_page_faults();
157    let result = f();
158    let (minor_after, major_after) = get_page_faults();
159
160    let minor_delta = minor_after.saturating_sub(minor_before);
161    let major_delta = major_after.saturating_sub(major_before);
162
163    #[cfg(feature = "tracing")]
164    if minor_delta > 1000 || major_delta > 0 {
165        tracing::warn!(
166            operation = name,
167            minor_faults = minor_delta,
168            major_faults = major_delta,
169            "High page fault count detected"
170        );
171    }
172
173    let _ = name; // Suppress unused warning when tracing disabled
174    (result, minor_delta, major_delta)
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_cpu_cycles_returns_value() {
183        let cycles = cpu_cycles();
184        // On x86_64/aarch64, should return non-zero
185        // On other architectures, returns 0
186        #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
187        assert!(cycles > 0);
188        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
189        assert_eq!(cycles, 0);
190    }
191
192    #[test]
193    fn test_cached_nanos_or_now_returns_value() {
194        let nanos = cached_nanos_or_now();
195        // Should return a valid nanosecond count (non-zero on most systems)
196        // Note: u64 is always >= 0, so just verify we got a value
197        let _ = nanos; // Value is always valid for u64
198    }
199
200    #[test]
201    fn test_page_fault_tracking() {
202        let (result, minor, major) = with_page_fault_tracking("test", || 42);
203        assert_eq!(result, 42);
204        // Page faults are u64, always non-negative by type
205        let _ = (minor, major); // Values are always valid for u64
206    }
207
208    #[test]
209    fn test_get_page_faults() {
210        let (minor, major) = get_page_faults();
211        // Page faults are u64, always non-negative by type (may be 0 on non-Linux)
212        let _ = (minor, major); // Values are always valid for u64
213    }
214}