trueno/brick/profiler/recording.rs
1//! Core recording methods for BrickProfiler.
2//!
3//! Extracted from mod.rs to keep file sizes manageable.
4//! Contains: start/stop (legacy string API), record_elapsed, record_elapsed_with_bytes,
5//! set_brick_bottleneck, stats lookup, all_stats, all_brick_stats, brick_names, reset.
6
7use super::BrickProfiler;
8use crate::brick::exec_graph::{BrickBottleneck, BrickId, BrickStats};
9
10impl BrickProfiler {
11 /// Start timing a brick. Returns timer handle.
12 ///
13 /// IMPORTANT: For GPU operations, call sync AFTER the operation
14 /// completes but BEFORE calling stop().
15 #[must_use]
16 pub fn start(&self, name: &str) -> super::BrickTimer {
17 super::BrickTimer { name: name.to_string(), start: std::time::Instant::now() }
18 }
19
20 /// Stop timing and record the sample.
21 ///
22 /// # Arguments
23 /// - `timer`: Timer handle from `start()`
24 /// - `elements`: Number of elements (tokens) processed
25 pub fn stop(&mut self, timer: super::BrickTimer, elements: u64) {
26 if !self.enabled {
27 return;
28 }
29
30 let elapsed = timer.start.elapsed();
31 let elapsed_ns = elapsed.as_nanos() as u64;
32
33 // PAR-200: Try fast path first if name matches a known BrickId
34 if let Some(brick_id) = BrickId::from_str(&timer.name) {
35 let stats = &mut self.brick_stats[brick_id as usize];
36 stats.add_sample(elapsed_ns, elements);
37 } else {
38 // Fall back to dynamic stats
39 let name = timer.name;
40 let stats =
41 self.dynamic_stats.entry(name.clone()).or_insert_with(|| BrickStats::new(&name));
42 stats.add_sample(elapsed_ns, elements);
43 }
44
45 // Update totals
46 self.total_tokens += elements;
47 self.total_ns += elapsed_ns;
48 }
49
50 /// Record a pre-measured duration for a brick.
51 ///
52 /// PAR-073: This method allows timing with raw `Instant` calls, avoiding
53 /// borrow conflicts when profiling CUDA operations that also need `&mut self`.
54 ///
55 /// # Arguments
56 /// - `name`: Brick name
57 /// - `elapsed`: Duration of the operation (from `Instant::elapsed()`)
58 /// - `elements`: Number of elements (tokens) processed
59 ///
60 /// # Example
61 /// ```rust,ignore
62 /// let start = std::time::Instant::now();
63 /// cuda_stream.synchronize()?;
64 /// self.some_cuda_operation()?;
65 /// cuda_stream.synchronize()?;
66 /// let elapsed = start.elapsed();
67 /// self.profiler.record_elapsed("SomeBrick", elapsed, 1);
68 /// ```
69 pub fn record_elapsed(&mut self, name: &str, elapsed: std::time::Duration, elements: u64) {
70 if !self.enabled {
71 return;
72 }
73
74 let elapsed_ns = elapsed.as_nanos() as u64;
75
76 // PAR-200: Try fast path first if name matches a known BrickId
77 if let Some(brick_id) = BrickId::from_str(name) {
78 let stats = &mut self.brick_stats[brick_id as usize];
79 stats.add_sample(elapsed_ns, elements);
80 } else {
81 // Fall back to dynamic stats
82 let stats =
83 self.dynamic_stats.entry(name.to_string()).or_insert_with(|| BrickStats::new(name));
84 stats.add_sample(elapsed_ns, elements);
85 }
86
87 // Update totals
88 self.total_tokens += elements;
89 self.total_ns += elapsed_ns;
90 }
91
92 /// PMAT-451: Record elapsed time with byte metrics for compression workloads.
93 ///
94 /// # Arguments
95 /// - `name`: Brick name
96 /// - `elapsed`: Duration of the operation
97 /// - `elements`: Number of elements (pages) processed
98 /// - `input_bytes`: Original uncompressed size
99 /// - `output_bytes`: Compressed output size
100 ///
101 /// # Example
102 /// ```rust,ignore
103 /// let start = std::time::Instant::now();
104 /// let compressed = zstd_compress(&page_data);
105 /// let elapsed = start.elapsed();
106 /// profiler.record_elapsed_with_bytes(
107 /// "ZstdCompress",
108 /// elapsed,
109 /// 1,
110 /// page_data.len() as u64,
111 /// compressed.len() as u64,
112 /// );
113 /// ```
114 pub fn record_elapsed_with_bytes(
115 &mut self,
116 name: &str,
117 elapsed: std::time::Duration,
118 elements: u64,
119 input_bytes: u64,
120 output_bytes: u64,
121 ) {
122 if !self.enabled {
123 return;
124 }
125
126 let elapsed_ns = elapsed.as_nanos() as u64;
127
128 // PAR-200: Try fast path first if name matches a known BrickId
129 if let Some(brick_id) = BrickId::from_str(name) {
130 let stats = &mut self.brick_stats[brick_id as usize];
131 stats.add_sample_with_bytes(elapsed_ns, elements, input_bytes, output_bytes);
132 } else {
133 // Fall back to dynamic stats
134 let stats =
135 self.dynamic_stats.entry(name.to_string()).or_insert_with(|| BrickStats::new(name));
136 stats.add_sample_with_bytes(elapsed_ns, elements, input_bytes, output_bytes);
137 }
138
139 // Update totals
140 self.total_tokens += elements;
141 self.total_ns += elapsed_ns;
142 }
143
144 /// PMAT-451: Set bottleneck classification for a brick.
145 pub fn set_brick_bottleneck(&mut self, name: &str, bottleneck: BrickBottleneck) {
146 // PAR-200: Try fast path first
147 if let Some(brick_id) = BrickId::from_str(name) {
148 self.brick_stats[brick_id as usize].set_bottleneck(bottleneck);
149 } else if let Some(stats) = self.dynamic_stats.get_mut(name) {
150 stats.set_bottleneck(bottleneck);
151 }
152 }
153
154 /// Get statistics for a specific brick by name.
155 ///
156 /// First checks known BrickId types (O(1)), then falls back to dynamic stats.
157 #[must_use]
158 pub fn stats(&self, name: &str) -> Option<&BrickStats> {
159 // Try fast path first
160 if let Some(brick_id) = BrickId::from_str(name) {
161 let stats = &self.brick_stats[brick_id as usize];
162 if stats.count > 0 {
163 return Some(stats);
164 }
165 }
166 // Fall back to dynamic stats
167 self.dynamic_stats.get(name)
168 }
169
170 /// Get all brick statistics (legacy API, returns dynamic stats only).
171 ///
172 /// For full statistics including known bricks, use `all_brick_stats()` instead.
173 #[must_use]
174 #[deprecated(since = "0.12.0", note = "Use all_brick_stats() for complete statistics")]
175 pub fn all_stats(&self) -> &std::collections::HashMap<String, BrickStats> {
176 &self.dynamic_stats
177 }
178
179 /// Get all brick statistics including both known and dynamic bricks.
180 pub fn all_brick_stats(&self) -> impl Iterator<Item = &BrickStats> {
181 self.brick_stats.iter().filter(|s| s.count > 0).chain(self.dynamic_stats.values())
182 }
183
184 /// Get all brick names.
185 #[must_use]
186 pub fn brick_names(&self) -> Vec<String> {
187 let mut names: Vec<String> = self
188 .brick_stats
189 .iter()
190 .enumerate()
191 .filter(|(_, s)| s.count > 0)
192 .map(|(i, _)| {
193 let brick_id = BrickId::ALL[i];
194 brick_id.name().to_string()
195 })
196 .collect();
197 names.extend(self.dynamic_stats.keys().cloned());
198 names
199 }
200
201 /// Reset all statistics.
202 pub fn reset(&mut self) {
203 for stats in &mut self.brick_stats {
204 stats.count = 0;
205 stats.total_ns = 0;
206 stats.min_ns = u64::MAX;
207 stats.max_ns = 0;
208 stats.total_elements = 0;
209 stats.total_bytes = 0;
210 stats.total_compressed_bytes = 0;
211 }
212 self.dynamic_stats.clear();
213 self.pending.clear();
214 self.total_tokens = 0;
215 self.total_ns = 0;
216 }
217}