Skip to main content

llama_cpp_bindings/
timing.rs

1use std::fmt::{Debug, Display, Formatter};
2
3#[derive(Clone, Copy, Debug)]
4pub struct LlamaTimings {
5    pub timings: llama_cpp_bindings_sys::llama_perf_context_data,
6}
7
8impl LlamaTimings {
9    #[must_use]
10    pub const fn new(
11        t_start_ms: f64,
12        t_load_ms: f64,
13        t_p_eval_ms: f64,
14        t_eval_ms: f64,
15        n_p_eval: i32,
16        n_eval: i32,
17        n_reused: i32,
18    ) -> Self {
19        Self {
20            timings: llama_cpp_bindings_sys::llama_perf_context_data {
21                t_start_ms,
22                t_load_ms,
23                t_p_eval_ms,
24                t_eval_ms,
25                n_p_eval,
26                n_eval,
27                n_reused,
28            },
29        }
30    }
31
32    #[must_use]
33    pub const fn t_start_ms(&self) -> f64 {
34        self.timings.t_start_ms
35    }
36
37    #[must_use]
38    pub const fn t_load_ms(&self) -> f64 {
39        self.timings.t_load_ms
40    }
41
42    #[must_use]
43    pub const fn t_p_eval_ms(&self) -> f64 {
44        self.timings.t_p_eval_ms
45    }
46
47    #[must_use]
48    pub const fn t_eval_ms(&self) -> f64 {
49        self.timings.t_eval_ms
50    }
51
52    #[must_use]
53    pub const fn n_p_eval(&self) -> i32 {
54        self.timings.n_p_eval
55    }
56
57    #[must_use]
58    pub const fn n_eval(&self) -> i32 {
59        self.timings.n_eval
60    }
61
62    pub const fn set_t_start_ms(&mut self, t_start_ms: f64) {
63        self.timings.t_start_ms = t_start_ms;
64    }
65
66    pub const fn set_t_load_ms(&mut self, t_load_ms: f64) {
67        self.timings.t_load_ms = t_load_ms;
68    }
69
70    pub const fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
71        self.timings.t_p_eval_ms = t_p_eval_ms;
72    }
73
74    pub const fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
75        self.timings.t_eval_ms = t_eval_ms;
76    }
77
78    pub const fn set_n_p_eval(&mut self, n_p_eval: i32) {
79        self.timings.n_p_eval = n_p_eval;
80    }
81
82    pub const fn set_n_eval(&mut self, n_eval: i32) {
83        self.timings.n_eval = n_eval;
84    }
85}
86
87fn write_timings(timings: &LlamaTimings, writer: &mut dyn std::fmt::Write) -> std::fmt::Result {
88    writeln!(writer, "load time = {:.2} ms", timings.t_load_ms())?;
89
90    if timings.n_p_eval() > 0 {
91        writeln!(
92            writer,
93            "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
94            timings.t_p_eval_ms(),
95            timings.n_p_eval(),
96            timings.t_p_eval_ms() / f64::from(timings.n_p_eval()),
97            1e3 / timings.t_p_eval_ms() * f64::from(timings.n_p_eval())
98        )?;
99    } else {
100        writeln!(
101            writer,
102            "prompt eval time = {:.2} ms / 0 tokens",
103            timings.t_p_eval_ms(),
104        )?;
105    }
106
107    if timings.n_eval() > 0 {
108        writeln!(
109            writer,
110            "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
111            timings.t_eval_ms(),
112            timings.n_eval(),
113            timings.t_eval_ms() / f64::from(timings.n_eval()),
114            1e3 / timings.t_eval_ms() * f64::from(timings.n_eval())
115        )?;
116    } else {
117        writeln!(writer, "eval time = {:.2} ms / 0 runs", timings.t_eval_ms())?;
118    }
119
120    Ok(())
121}
122
123impl Display for LlamaTimings {
124    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
125        write_timings(self, formatter)
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::LlamaTimings;
132
133    #[test]
134    fn display_format_with_valid_counts() {
135        let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
136        let output = format!("{timings}");
137
138        assert!(output.contains("load time = 2.00 ms"));
139        assert!(output.contains("prompt eval time = 3.00 ms / 5 tokens"));
140        assert!(output.contains("eval time = 4.00 ms / 6 runs"));
141    }
142
143    #[test]
144    fn display_format_handles_zero_eval_counts() {
145        let timings = LlamaTimings::new(0.0, 1.0, 2.0, 3.0, 0, 0, 0);
146        let output = format!("{timings}");
147
148        assert!(output.contains("load time = 1.00 ms"));
149        assert!(output.contains("prompt eval time = 2.00 ms / 0 tokens"));
150        assert!(output.contains("eval time = 3.00 ms / 0 runs"));
151        assert!(!output.contains("NaN"));
152        assert!(!output.contains("inf"));
153    }
154
155    #[test]
156    fn set_t_start_ms() {
157        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
158
159        timings.set_t_start_ms(42.0);
160
161        assert!((timings.t_start_ms() - 42.0).abs() < f64::EPSILON);
162    }
163
164    #[test]
165    fn set_t_load_ms() {
166        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
167
168        timings.set_t_load_ms(10.5);
169
170        assert!((timings.t_load_ms() - 10.5).abs() < f64::EPSILON);
171    }
172
173    #[test]
174    fn set_t_p_eval_ms() {
175        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
176
177        timings.set_t_p_eval_ms(7.7);
178
179        assert!((timings.t_p_eval_ms() - 7.7).abs() < f64::EPSILON);
180    }
181
182    #[test]
183    fn set_t_eval_ms() {
184        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
185
186        timings.set_t_eval_ms(3.3);
187
188        assert!((timings.t_eval_ms() - 3.3).abs() < f64::EPSILON);
189    }
190
191    #[test]
192    fn set_n_p_eval() {
193        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
194
195        timings.set_n_p_eval(100);
196
197        assert_eq!(timings.n_p_eval(), 100);
198    }
199
200    #[test]
201    fn set_n_eval() {
202        let mut timings = LlamaTimings::new(0.0, 0.0, 0.0, 0.0, 0, 0, 0);
203
204        timings.set_n_eval(200);
205
206        assert_eq!(timings.n_eval(), 200);
207    }
208
209    #[test]
210    fn write_timings_propagates_writer_errors() {
211        struct FailingWriter;
212
213        impl std::fmt::Write for FailingWriter {
214            fn write_str(&mut self, _text: &str) -> std::fmt::Result {
215                Err(std::fmt::Error)
216            }
217        }
218
219        let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
220        let result = super::write_timings(&timings, &mut FailingWriter);
221
222        assert!(result.is_err());
223    }
224
225    #[test]
226    fn write_timings_zero_p_eval_with_failing_writer() {
227        struct FailAfterNWrites {
228            remaining: usize,
229        }
230
231        impl std::fmt::Write for FailAfterNWrites {
232            fn write_str(&mut self, _text: &str) -> std::fmt::Result {
233                if self.remaining == 0 {
234                    return Err(std::fmt::Error);
235                }
236                self.remaining -= 1;
237
238                Ok(())
239            }
240        }
241
242        let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 0, 6, 1);
243        let result = super::write_timings(&timings, &mut FailAfterNWrites { remaining: 1 });
244
245        assert!(result.is_err());
246    }
247
248    #[test]
249    fn write_timings_fails_at_each_writeln_boundary() {
250        struct FailAfterNWrites {
251            remaining: usize,
252        }
253
254        impl std::fmt::Write for FailAfterNWrites {
255            fn write_str(&mut self, _text: &str) -> std::fmt::Result {
256                if self.remaining == 0 {
257                    return Err(std::fmt::Error);
258                }
259                self.remaining -= 1;
260
261                Ok(())
262            }
263        }
264
265        let with_counts = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
266        let zero_counts = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 0, 0, 1);
267
268        for writes_allowed in 0..20 {
269            let _ = super::write_timings(
270                &with_counts,
271                &mut FailAfterNWrites {
272                    remaining: writes_allowed,
273                },
274            );
275            let _ = super::write_timings(
276                &zero_counts,
277                &mut FailAfterNWrites {
278                    remaining: writes_allowed,
279                },
280            );
281        }
282    }
283}