Skip to main content

trueno/brick/profiler/
recording.rs

1//! Core recording methods for BrickProfiler.
2//!
3//! Extracted from mod.rs to keep file sizes manageable.
4//! Contains: start/stop (legacy string API), record_elapsed, record_elapsed_with_bytes,
5//! set_brick_bottleneck, stats lookup, all_stats, all_brick_stats, brick_names, reset.
6
7use super::BrickProfiler;
8use crate::brick::exec_graph::{BrickBottleneck, BrickId, BrickStats};
9
10impl BrickProfiler {
11    /// Start timing a brick. Returns timer handle.
12    ///
13    /// IMPORTANT: For GPU operations, call sync AFTER the operation
14    /// completes but BEFORE calling stop().
15    #[must_use]
16    pub fn start(&self, name: &str) -> super::BrickTimer {
17        super::BrickTimer { name: name.to_string(), start: std::time::Instant::now() }
18    }
19
20    /// Stop timing and record the sample.
21    ///
22    /// # Arguments
23    /// - `timer`: Timer handle from `start()`
24    /// - `elements`: Number of elements (tokens) processed
25    pub fn stop(&mut self, timer: super::BrickTimer, elements: u64) {
26        if !self.enabled {
27            return;
28        }
29
30        let elapsed = timer.start.elapsed();
31        let elapsed_ns = elapsed.as_nanos() as u64;
32
33        // PAR-200: Try fast path first if name matches a known BrickId
34        if let Some(brick_id) = BrickId::from_str(&timer.name) {
35            let stats = &mut self.brick_stats[brick_id as usize];
36            stats.add_sample(elapsed_ns, elements);
37        } else {
38            // Fall back to dynamic stats
39            let name = timer.name;
40            let stats =
41                self.dynamic_stats.entry(name.clone()).or_insert_with(|| BrickStats::new(&name));
42            stats.add_sample(elapsed_ns, elements);
43        }
44
45        // Update totals
46        self.total_tokens += elements;
47        self.total_ns += elapsed_ns;
48    }
49
50    /// Record a pre-measured duration for a brick.
51    ///
52    /// PAR-073: This method allows timing with raw `Instant` calls, avoiding
53    /// borrow conflicts when profiling CUDA operations that also need `&mut self`.
54    ///
55    /// # Arguments
56    /// - `name`: Brick name
57    /// - `elapsed`: Duration of the operation (from `Instant::elapsed()`)
58    /// - `elements`: Number of elements (tokens) processed
59    ///
60    /// # Example
61    /// ```rust,ignore
62    /// let start = std::time::Instant::now();
63    /// cuda_stream.synchronize()?;
64    /// self.some_cuda_operation()?;
65    /// cuda_stream.synchronize()?;
66    /// let elapsed = start.elapsed();
67    /// self.profiler.record_elapsed("SomeBrick", elapsed, 1);
68    /// ```
69    pub fn record_elapsed(&mut self, name: &str, elapsed: std::time::Duration, elements: u64) {
70        if !self.enabled {
71            return;
72        }
73
74        let elapsed_ns = elapsed.as_nanos() as u64;
75
76        // PAR-200: Try fast path first if name matches a known BrickId
77        if let Some(brick_id) = BrickId::from_str(name) {
78            let stats = &mut self.brick_stats[brick_id as usize];
79            stats.add_sample(elapsed_ns, elements);
80        } else {
81            // Fall back to dynamic stats
82            let stats =
83                self.dynamic_stats.entry(name.to_string()).or_insert_with(|| BrickStats::new(name));
84            stats.add_sample(elapsed_ns, elements);
85        }
86
87        // Update totals
88        self.total_tokens += elements;
89        self.total_ns += elapsed_ns;
90    }
91
92    /// PMAT-451: Record elapsed time with byte metrics for compression workloads.
93    ///
94    /// # Arguments
95    /// - `name`: Brick name
96    /// - `elapsed`: Duration of the operation
97    /// - `elements`: Number of elements (pages) processed
98    /// - `input_bytes`: Original uncompressed size
99    /// - `output_bytes`: Compressed output size
100    ///
101    /// # Example
102    /// ```rust,ignore
103    /// let start = std::time::Instant::now();
104    /// let compressed = zstd_compress(&page_data);
105    /// let elapsed = start.elapsed();
106    /// profiler.record_elapsed_with_bytes(
107    ///     "ZstdCompress",
108    ///     elapsed,
109    ///     1,
110    ///     page_data.len() as u64,
111    ///     compressed.len() as u64,
112    /// );
113    /// ```
114    pub fn record_elapsed_with_bytes(
115        &mut self,
116        name: &str,
117        elapsed: std::time::Duration,
118        elements: u64,
119        input_bytes: u64,
120        output_bytes: u64,
121    ) {
122        if !self.enabled {
123            return;
124        }
125
126        let elapsed_ns = elapsed.as_nanos() as u64;
127
128        // PAR-200: Try fast path first if name matches a known BrickId
129        if let Some(brick_id) = BrickId::from_str(name) {
130            let stats = &mut self.brick_stats[brick_id as usize];
131            stats.add_sample_with_bytes(elapsed_ns, elements, input_bytes, output_bytes);
132        } else {
133            // Fall back to dynamic stats
134            let stats =
135                self.dynamic_stats.entry(name.to_string()).or_insert_with(|| BrickStats::new(name));
136            stats.add_sample_with_bytes(elapsed_ns, elements, input_bytes, output_bytes);
137        }
138
139        // Update totals
140        self.total_tokens += elements;
141        self.total_ns += elapsed_ns;
142    }
143
144    /// PMAT-451: Set bottleneck classification for a brick.
145    pub fn set_brick_bottleneck(&mut self, name: &str, bottleneck: BrickBottleneck) {
146        // PAR-200: Try fast path first
147        if let Some(brick_id) = BrickId::from_str(name) {
148            self.brick_stats[brick_id as usize].set_bottleneck(bottleneck);
149        } else if let Some(stats) = self.dynamic_stats.get_mut(name) {
150            stats.set_bottleneck(bottleneck);
151        }
152    }
153
154    /// Get statistics for a specific brick by name.
155    ///
156    /// First checks known BrickId types (O(1)), then falls back to dynamic stats.
157    #[must_use]
158    pub fn stats(&self, name: &str) -> Option<&BrickStats> {
159        // Try fast path first
160        if let Some(brick_id) = BrickId::from_str(name) {
161            let stats = &self.brick_stats[brick_id as usize];
162            if stats.count > 0 {
163                return Some(stats);
164            }
165        }
166        // Fall back to dynamic stats
167        self.dynamic_stats.get(name)
168    }
169
170    /// Get all brick statistics (legacy API, returns dynamic stats only).
171    ///
172    /// For full statistics including known bricks, use `all_brick_stats()` instead.
173    #[must_use]
174    #[deprecated(since = "0.12.0", note = "Use all_brick_stats() for complete statistics")]
175    pub fn all_stats(&self) -> &std::collections::HashMap<String, BrickStats> {
176        &self.dynamic_stats
177    }
178
179    /// Get all brick statistics including both known and dynamic bricks.
180    pub fn all_brick_stats(&self) -> impl Iterator<Item = &BrickStats> {
181        self.brick_stats.iter().filter(|s| s.count > 0).chain(self.dynamic_stats.values())
182    }
183
184    /// Get all brick names.
185    #[must_use]
186    pub fn brick_names(&self) -> Vec<String> {
187        let mut names: Vec<String> = self
188            .brick_stats
189            .iter()
190            .enumerate()
191            .filter(|(_, s)| s.count > 0)
192            .map(|(i, _)| {
193                let brick_id = BrickId::ALL[i];
194                brick_id.name().to_string()
195            })
196            .collect();
197        names.extend(self.dynamic_stats.keys().cloned());
198        names
199    }
200
201    /// Reset all statistics.
202    pub fn reset(&mut self) {
203        for stats in &mut self.brick_stats {
204            stats.count = 0;
205            stats.total_ns = 0;
206            stats.min_ns = u64::MAX;
207            stats.max_ns = 0;
208            stats.total_elements = 0;
209            stats.total_bytes = 0;
210            stats.total_compressed_bytes = 0;
211        }
212        self.dynamic_stats.clear();
213        self.pending.clear();
214        self.total_tokens = 0;
215        self.total_ns = 0;
216    }
217}