trueno/brick/
perf_metrics.rs1#[derive(Debug, Clone, Default)]
28pub struct PerfMetrics {
29 pub t_load_ms: u64,
31 pub t_p_eval_ms: u64,
33 pub t_eval_ms: u64,
35 pub n_p_eval: u32,
37 pub n_eval: u32,
39 pub n_samples: u32,
41}
42
43impl PerfMetrics {
44 pub fn new() -> Self {
46 Self::default()
47 }
48
49 pub fn record_load(&mut self, ms: u64) {
51 self.t_load_ms = ms;
52 }
53
54 pub fn record_prefill(&mut self, ms: u64, tokens: u32) {
56 self.t_p_eval_ms = ms;
57 self.n_p_eval = tokens;
58 }
59
60 pub fn record_decode(&mut self, ms: u64) {
62 self.t_eval_ms += ms;
63 self.n_eval += 1;
64 self.n_samples += 1;
65 }
66
67 pub fn record_decode_batch(&mut self, ms: u64, tokens: u32) {
69 self.t_eval_ms += ms;
70 self.n_eval += tokens;
71 self.n_samples += 1;
72 }
73
74 #[must_use]
76 pub fn tokens_per_second(&self) -> f64 {
77 if self.t_eval_ms == 0 {
78 0.0
79 } else {
80 1000.0 * self.n_eval as f64 / self.t_eval_ms as f64
81 }
82 }
83
84 #[must_use]
86 pub fn prefill_tokens_per_second(&self) -> f64 {
87 if self.t_p_eval_ms == 0 {
88 0.0
89 } else {
90 1000.0 * self.n_p_eval as f64 / self.t_p_eval_ms as f64
91 }
92 }
93
94 #[must_use]
96 pub fn total_ms(&self) -> u64 {
97 self.t_load_ms + self.t_p_eval_ms + self.t_eval_ms
98 }
99
100 #[must_use]
102 pub fn time_to_first_token_ms(&self) -> u64 {
103 self.t_load_ms + self.t_p_eval_ms
104 }
105
106 #[must_use]
108 pub fn avg_token_latency_ms(&self) -> f64 {
109 if self.n_eval == 0 {
110 0.0
111 } else {
112 self.t_eval_ms as f64 / self.n_eval as f64
113 }
114 }
115
116 #[must_use]
118 pub fn summary(&self) -> String {
119 format!(
120 "load: {}ms, prefill: {}ms ({:.1} tok/s, {} tokens), decode: {}ms ({:.1} tok/s, {} tokens), total: {}ms",
121 self.t_load_ms,
122 self.t_p_eval_ms,
123 self.prefill_tokens_per_second(),
124 self.n_p_eval,
125 self.t_eval_ms,
126 self.tokens_per_second(),
127 self.n_eval,
128 self.total_ms()
129 )
130 }
131
132 pub fn reset(&mut self) {
134 *self = Self::default();
135 }
136}
137
138#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
144pub enum InferencePhase {
145 #[default]
147 Prefill,
148 Decode,
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 #[test]
157 fn test_perf_metrics_default() {
158 let metrics = PerfMetrics::default();
159 assert_eq!(metrics.t_load_ms, 0);
160 assert_eq!(metrics.t_p_eval_ms, 0);
161 assert_eq!(metrics.t_eval_ms, 0);
162 assert_eq!(metrics.n_p_eval, 0);
163 assert_eq!(metrics.n_eval, 0);
164 }
165
166 #[test]
167 fn test_perf_metrics_record_load() {
168 let mut metrics = PerfMetrics::new();
169 metrics.record_load(1500);
170 assert_eq!(metrics.t_load_ms, 1500);
171 }
172
173 #[test]
174 fn test_perf_metrics_record_prefill() {
175 let mut metrics = PerfMetrics::new();
176 metrics.record_prefill(200, 512);
177 assert_eq!(metrics.t_p_eval_ms, 200);
178 assert_eq!(metrics.n_p_eval, 512);
179 }
180
181 #[test]
182 fn test_perf_metrics_record_decode() {
183 let mut metrics = PerfMetrics::new();
184 metrics.record_decode(50);
185 metrics.record_decode(50);
186 assert_eq!(metrics.t_eval_ms, 100);
187 assert_eq!(metrics.n_eval, 2);
188 assert_eq!(metrics.n_samples, 2);
189 }
190
191 #[test]
192 fn test_perf_metrics_tokens_per_second() {
193 let mut metrics = PerfMetrics::new();
194 metrics.record_decode_batch(1000, 100); assert!((metrics.tokens_per_second() - 100.0).abs() < 0.001);
196 }
197
198 #[test]
199 fn test_perf_metrics_prefill_throughput() {
200 let mut metrics = PerfMetrics::new();
201 metrics.record_prefill(500, 1000); assert!((metrics.prefill_tokens_per_second() - 2000.0).abs() < 0.001);
203 }
204
205 #[test]
206 fn test_perf_metrics_total_ms() {
207 let mut metrics = PerfMetrics::new();
208 metrics.record_load(1000);
209 metrics.record_prefill(200, 512);
210 metrics.record_decode_batch(300, 100);
211 assert_eq!(metrics.total_ms(), 1500);
212 }
213
214 #[test]
215 fn test_perf_metrics_time_to_first_token() {
216 let mut metrics = PerfMetrics::new();
217 metrics.record_load(1000);
218 metrics.record_prefill(200, 512);
219 assert_eq!(metrics.time_to_first_token_ms(), 1200);
220 }
221
222 #[test]
223 fn test_perf_metrics_reset() {
224 let mut metrics = PerfMetrics::new();
225 metrics.record_load(1500);
226 metrics.record_prefill(200, 512);
227 metrics.reset();
228 assert_eq!(metrics.t_load_ms, 0);
229 assert_eq!(metrics.n_p_eval, 0);
230 }
231
232 #[test]
233 fn test_inference_phase_default() {
234 let phase = InferencePhase::default();
235 assert_eq!(phase, InferencePhase::Prefill);
236 }
237
238 #[test]
239 fn test_inference_phase_eq() {
240 assert_eq!(InferencePhase::Prefill, InferencePhase::Prefill);
241 assert_eq!(InferencePhase::Decode, InferencePhase::Decode);
242 assert_ne!(InferencePhase::Prefill, InferencePhase::Decode);
243 }
244}