trueno/brick/profiling.rs
1//! High-Performance Profiling Patterns
2//!
3//! CPU cycle counters, cached time service, and page fault detection.
4//! Based on Phase 11: E.9 patterns.
5
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Instant;
8
9// ============================================================================
10// CPU Cycle Counters
11// ============================================================================
12
13/// CPU cycle counter using RDTSCP (x86_64) or CNTVCT_EL0 (ARM64).
14///
15/// Returns actual CPU cycles for frequency-invariant performance analysis.
16/// Use with `elapsed_ns` to calculate IPC (Instructions Per Cycle).
17///
18/// # Example
19/// ```rust,ignore
20/// let start_cycles = cpu_cycles();
21/// // ... operation ...
22/// let end_cycles = cpu_cycles();
23/// let cycles_per_element = (end_cycles - start_cycles) / num_elements;
24/// ```
25#[cfg(target_arch = "x86_64")]
26#[inline]
27pub fn cpu_cycles() -> u64 {
28 // SAFETY: __rdtscp reads the timestamp counter and has no safety invariants
29 // beyond requiring x86_64 (guaranteed by cfg).
30 unsafe {
31 let mut _aux: u32 = 0;
32 core::arch::x86_64::__rdtscp(&mut _aux)
33 }
34}
35
36/// CPU cycle counter for ARM64 using CNTVCT_EL0 register.
37#[cfg(target_arch = "aarch64")]
38#[inline]
39pub fn cpu_cycles() -> u64 {
40 let cycles: u64;
41 // SAFETY: CNTVCT_EL0 is always readable from EL0 on aarch64; no invariants.
42 unsafe {
43 core::arch::asm!("mrs {}, cntvct_el0", out(reg) cycles);
44 }
45 cycles
46}
47
48/// Fallback for unsupported architectures (returns 0).
49#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
50#[inline]
51pub fn cpu_cycles() -> u64 {
52 0
53}
54
55// ============================================================================
56// Cached Time Service (Pattern 2 from actix-web)
57// ============================================================================
58
59/// Global cached instant in nanoseconds, updated by background thread.
60static CACHED_NANOS: AtomicU64 = AtomicU64::new(0);
61
62/// Epoch instant for cached time calculation.
63static EPOCH: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();
64
65/// Guard ensuring the time service background thread is spawned at most once.
66static TIME_SERVICE_INIT: std::sync::OnceLock<()> = std::sync::OnceLock::new();
67
68/// Initialize the cached time service (call once at startup).
69///
70/// Spawns a background thread that updates cached time every 100µs.
71/// This avoids syscall overhead when profiling high-frequency operations.
72///
73/// # Example
74/// ```rust,ignore
75/// trueno::brick::init_time_service();
76/// // Later...
77/// let ns = trueno::brick::cached_nanos();
78/// ```
79pub fn init_time_service() {
80 TIME_SERVICE_INIT.get_or_init(|| {
81 let epoch = *EPOCH.get_or_init(Instant::now);
82 CACHED_NANOS.store(0, Ordering::Relaxed);
83
84 std::thread::Builder::new()
85 .name("trueno-time-service".into())
86 .spawn(move || loop {
87 std::thread::sleep(std::time::Duration::from_micros(100));
88 let elapsed = epoch.elapsed().as_nanos() as u64;
89 CACHED_NANOS.store(elapsed, Ordering::Relaxed);
90 })
91 .expect("Failed to spawn time service thread");
92 });
93}
94
95/// Get cached time in nanoseconds since epoch (NO SYSCALL, ~1ns overhead).
96///
97/// Returns 0 if time service is not initialized. For accurate timing,
98/// call `init_time_service()` at application startup.
99#[inline]
100pub fn cached_nanos() -> u64 {
101 CACHED_NANOS.load(Ordering::Relaxed)
102}
103
104/// Get cached time or fall back to Instant::now() if service not initialized.
105#[inline]
106pub fn cached_nanos_or_now() -> u64 {
107 let cached = CACHED_NANOS.load(Ordering::Relaxed);
108 if cached == 0 && TIME_SERVICE_INIT.get().is_none() {
109 // Fall back to syscall if time service not initialized
110 EPOCH.get_or_init(Instant::now).elapsed().as_nanos() as u64
111 } else {
112 cached
113 }
114}
115
116// ============================================================================
117// Page Fault Detection (Pattern from B4 Investigation)
118// ============================================================================
119
120/// Get current minor and major page fault counts (Linux only).
121///
122/// Returns (minor_faults, major_faults).
123/// - Minor faults: Page in memory but not mapped (soft fault)
124/// - Major faults: Page on disk, requires I/O (hard fault)
125#[cfg(target_os = "linux")]
126pub fn get_page_faults() -> (u64, u64) {
127 use std::fs;
128 let stat = fs::read_to_string("/proc/self/stat").unwrap_or_default();
129 let fields: Vec<&str> = stat.split_whitespace().collect();
130 if fields.len() > 12 {
131 let minor = fields[9].parse().unwrap_or(0);
132 let major = fields[11].parse().unwrap_or(0);
133 (minor, major)
134 } else {
135 (0, 0)
136 }
137}
138
139/// Fallback for non-Linux platforms.
140#[cfg(not(target_os = "linux"))]
141pub fn get_page_faults() -> (u64, u64) {
142 (0, 0)
143}
144
145/// Execute a closure while tracking page faults.
146///
147/// Logs a warning if more than 1000 minor faults or any major faults occur.
148///
149/// # Example
150/// ```rust,ignore
151/// let result = with_page_fault_tracking("mmap_copy", || {
152/// data.copy_from_slice(&mmap_region);
153/// });
154/// ```
155pub fn with_page_fault_tracking<T, F: FnOnce() -> T>(name: &str, f: F) -> (T, u64, u64) {
156 let (minor_before, major_before) = get_page_faults();
157 let result = f();
158 let (minor_after, major_after) = get_page_faults();
159
160 let minor_delta = minor_after.saturating_sub(minor_before);
161 let major_delta = major_after.saturating_sub(major_before);
162
163 #[cfg(feature = "tracing")]
164 if minor_delta > 1000 || major_delta > 0 {
165 tracing::warn!(
166 operation = name,
167 minor_faults = minor_delta,
168 major_faults = major_delta,
169 "High page fault count detected"
170 );
171 }
172
173 let _ = name; // Suppress unused warning when tracing disabled
174 (result, minor_delta, major_delta)
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[test]
182 fn test_cpu_cycles_returns_value() {
183 let cycles = cpu_cycles();
184 // On x86_64/aarch64, should return non-zero
185 // On other architectures, returns 0
186 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
187 assert!(cycles > 0);
188 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
189 assert_eq!(cycles, 0);
190 }
191
192 #[test]
193 fn test_cached_nanos_or_now_returns_value() {
194 let nanos = cached_nanos_or_now();
195 // Should return a valid nanosecond count (non-zero on most systems)
196 // Note: u64 is always >= 0, so just verify we got a value
197 let _ = nanos; // Value is always valid for u64
198 }
199
200 #[test]
201 fn test_page_fault_tracking() {
202 let (result, minor, major) = with_page_fault_tracking("test", || 42);
203 assert_eq!(result, 42);
204 // Page faults are u64, always non-negative by type
205 let _ = (minor, major); // Values are always valid for u64
206 }
207
208 #[test]
209 fn test_get_page_faults() {
210 let (minor, major) = get_page_faults();
211 // Page faults are u64, always non-negative by type (may be 0 on non-Linux)
212 let _ = (minor, major); // Values are always valid for u64
213 }
214}