1use std::fmt::{Debug, Display, Formatter};
2
3#[derive(Clone, Copy, Debug)]
4pub struct LlamaTimings {
5 pub timings: llama_cpp_bindings_sys::llama_perf_context_data,
6}
7
8impl LlamaTimings {
9 #[must_use]
10 pub const fn new(
11 t_start_ms: f64,
12 t_load_ms: f64,
13 t_p_eval_ms: f64,
14 t_eval_ms: f64,
15 n_p_eval: i32,
16 n_eval: i32,
17 n_reused: i32,
18 ) -> Self {
19 Self {
20 timings: llama_cpp_bindings_sys::llama_perf_context_data {
21 t_start_ms,
22 t_load_ms,
23 t_p_eval_ms,
24 t_eval_ms,
25 n_p_eval,
26 n_eval,
27 n_reused,
28 },
29 }
30 }
31
32 #[must_use]
33 pub const fn t_start_ms(&self) -> f64 {
34 self.timings.t_start_ms
35 }
36
37 #[must_use]
38 pub const fn t_load_ms(&self) -> f64 {
39 self.timings.t_load_ms
40 }
41
42 #[must_use]
43 pub const fn t_p_eval_ms(&self) -> f64 {
44 self.timings.t_p_eval_ms
45 }
46
47 #[must_use]
48 pub const fn t_eval_ms(&self) -> f64 {
49 self.timings.t_eval_ms
50 }
51
52 #[must_use]
53 pub const fn n_p_eval(&self) -> i32 {
54 self.timings.n_p_eval
55 }
56
57 #[must_use]
58 pub const fn n_eval(&self) -> i32 {
59 self.timings.n_eval
60 }
61
62 pub const fn set_t_start_ms(&mut self, t_start_ms: f64) {
63 self.timings.t_start_ms = t_start_ms;
64 }
65
66 pub const fn set_t_load_ms(&mut self, t_load_ms: f64) {
67 self.timings.t_load_ms = t_load_ms;
68 }
69
70 pub const fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
71 self.timings.t_p_eval_ms = t_p_eval_ms;
72 }
73
74 pub const fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
75 self.timings.t_eval_ms = t_eval_ms;
76 }
77
78 pub const fn set_n_p_eval(&mut self, n_p_eval: i32) {
79 self.timings.n_p_eval = n_p_eval;
80 }
81
82 pub const fn set_n_eval(&mut self, n_eval: i32) {
83 self.timings.n_eval = n_eval;
84 }
85}
86
87fn write_timings(timings: &LlamaTimings, writer: &mut dyn std::fmt::Write) -> std::fmt::Result {
88 writeln!(writer, "load time = {:.2} ms", timings.t_load_ms())?;
89
90 if timings.n_p_eval() > 0 {
91 writeln!(
92 writer,
93 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
94 timings.t_p_eval_ms(),
95 timings.n_p_eval(),
96 timings.t_p_eval_ms() / f64::from(timings.n_p_eval()),
97 1e3 / timings.t_p_eval_ms() * f64::from(timings.n_p_eval())
98 )?;
99 } else {
100 writeln!(
101 writer,
102 "prompt eval time = {:.2} ms / 0 tokens",
103 timings.t_p_eval_ms(),
104 )?;
105 }
106
107 if timings.n_eval() > 0 {
108 writeln!(
109 writer,
110 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
111 timings.t_eval_ms(),
112 timings.n_eval(),
113 timings.t_eval_ms() / f64::from(timings.n_eval()),
114 1e3 / timings.t_eval_ms() * f64::from(timings.n_eval())
115 )?;
116 } else {
117 writeln!(writer, "eval time = {:.2} ms / 0 runs", timings.t_eval_ms())?;
118 }
119
120 Ok(())
121}
122
123impl Display for LlamaTimings {
124 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
125 write_timings(self, formatter)
126 }
127}
128
129#[cfg(test)]
130mod tests {
131 use super::LlamaTimings;
132
133 #[test]
134 fn display_format_with_valid_counts() {
135 let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
136 let output = format!("{timings}");
137
138 assert!(output.contains("load time = 2.00 ms"));
139 assert!(output.contains("prompt eval time = 3.00 ms / 5 tokens"));
140 assert!(output.contains("eval time = 4.00 ms / 6 runs"));
141 }
142
143 #[test]
144 fn display_format_handles_zero_eval_counts() {
145 let timings = LlamaTimings::new(0.0, 1.0, 2.0, 3.0, 0, 0, 0);
146 let output = format!("{timings}");
147
148 assert!(output.contains("load time = 1.00 ms"));
149 assert!(output.contains("prompt eval time = 2.00 ms / 0 tokens"));
150 assert!(output.contains("eval time = 3.00 ms / 0 runs"));
151 assert!(!output.contains("NaN"));
152 assert!(!output.contains("inf"));
153 }
154
155 #[test]
156 fn set_t_start_ms() {
157 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
158
159 timings.set_t_start_ms(42.0);
160
161 assert!((timings.t_start_ms() - 42.0).abs() < f64::EPSILON);
162 }
163
164 #[test]
165 fn set_t_load_ms() {
166 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
167
168 timings.set_t_load_ms(10.5);
169
170 assert!((timings.t_load_ms() - 10.5).abs() < f64::EPSILON);
171 }
172
173 #[test]
174 fn set_t_p_eval_ms() {
175 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
176
177 timings.set_t_p_eval_ms(7.7);
178
179 assert!((timings.t_p_eval_ms() - 7.7).abs() < f64::EPSILON);
180 }
181
182 #[test]
183 fn set_t_eval_ms() {
184 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
185
186 timings.set_t_eval_ms(3.3);
187
188 assert!((timings.t_eval_ms() - 3.3).abs() < f64::EPSILON);
189 }
190
191 #[test]
192 fn set_n_p_eval() {
193 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
194
195 timings.set_n_p_eval(100);
196
197 assert_eq!(timings.n_p_eval(), 100);
198 }
199
200 #[test]
201 fn set_n_eval() {
202 let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
203
204 timings.set_n_eval(200);
205
206 assert_eq!(timings.n_eval(), 200);
207 }
208
209 #[test]
210 fn write_timings_propagates_writer_errors() {
211 struct FailingWriter;
212
213 impl std::fmt::Write for FailingWriter {
214 fn write_str(&mut self, _text: &str) -> std::fmt::Result {
215 Err(std::fmt::Error)
216 }
217 }
218
219 let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
220 let result = super::write_timings(&timings, &mut FailingWriter);
221
222 assert!(result.is_err());
223 }
224
225 #[test]
226 fn write_timings_zero_p_eval_with_failing_writer() {
227 struct FailAfterNWrites {
228 remaining: usize,
229 }
230
231 impl std::fmt::Write for FailAfterNWrites {
232 fn write_str(&mut self, _text: &str) -> std::fmt::Result {
233 if self.remaining == 0 {
234 return Err(std::fmt::Error);
235 }
236 self.remaining -= 1;
237
238 Ok(())
239 }
240 }
241
242 let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 0, 6, 1);
243 let result = super::write_timings(&timings, &mut FailAfterNWrites { remaining: 1 });
244
245 assert!(result.is_err());
246 }
247
248 #[test]
249 fn write_timings_fails_at_each_writeln_boundary() {
250 struct FailAfterNWrites {
251 remaining: usize,
252 }
253
254 impl std::fmt::Write for FailAfterNWrites {
255 fn write_str(&mut self, _text: &str) -> std::fmt::Result {
256 if self.remaining == 0 {
257 return Err(std::fmt::Error);
258 }
259 self.remaining -= 1;
260
261 Ok(())
262 }
263 }
264
265 let with_counts = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
266 let zero_counts = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 0, 0, 1);
267
268 for writes_allowed in 0..20 {
269 let _ = super::write_timings(
270 &with_counts,
271 &mut FailAfterNWrites {
272 remaining: writes_allowed,
273 },
274 );
275 let _ = super::write_timings(
276 &zero_counts,
277 &mut FailAfterNWrites {
278 remaining: writes_allowed,
279 },
280 );
281 }
282 }
283}