llama_cpp_bindings/
timing.rs1use std::fmt::{Debug, Display, Formatter};
3
4#[derive(Clone, Copy, Debug)]
6pub struct LlamaTimings {
7 pub timings: llama_cpp_bindings_sys::llama_perf_context_data,
9}
10
11impl LlamaTimings {
12 #[must_use]
22 pub fn new(
23 t_start_ms: f64,
24 t_load_ms: f64,
25 t_p_eval_ms: f64,
26 t_eval_ms: f64,
27 n_p_eval: i32,
28 n_eval: i32,
29 n_reused: i32,
30 ) -> Self {
31 Self {
32 timings: llama_cpp_bindings_sys::llama_perf_context_data {
33 t_start_ms,
34 t_load_ms,
35 t_p_eval_ms,
36 t_eval_ms,
37 n_p_eval,
38 n_eval,
39 n_reused,
40 },
41 }
42 }
43
44 #[must_use]
46 pub fn t_start_ms(&self) -> f64 {
47 self.timings.t_start_ms
48 }
49
50 #[must_use]
52 pub fn t_load_ms(&self) -> f64 {
53 self.timings.t_load_ms
54 }
55
56 #[must_use]
58 pub fn t_p_eval_ms(&self) -> f64 {
59 self.timings.t_p_eval_ms
60 }
61
62 #[must_use]
64 pub fn t_eval_ms(&self) -> f64 {
65 self.timings.t_eval_ms
66 }
67
68 #[must_use]
70 pub fn n_p_eval(&self) -> i32 {
71 self.timings.n_p_eval
72 }
73
74 #[must_use]
76 pub fn n_eval(&self) -> i32 {
77 self.timings.n_eval
78 }
79
80 pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
82 self.timings.t_start_ms = t_start_ms;
83 }
84
85 pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
87 self.timings.t_load_ms = t_load_ms;
88 }
89
90 pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
92 self.timings.t_p_eval_ms = t_p_eval_ms;
93 }
94
95 pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
97 self.timings.t_eval_ms = t_eval_ms;
98 }
99
100 pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
102 self.timings.n_p_eval = n_p_eval;
103 }
104
105 pub fn set_n_eval(&mut self, n_eval: i32) {
107 self.timings.n_eval = n_eval;
108 }
109}
110
111impl Display for LlamaTimings {
112 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
113 writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
114
115 if self.n_p_eval() > 0 {
116 writeln!(
117 f,
118 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
119 self.t_p_eval_ms(),
120 self.n_p_eval(),
121 self.t_p_eval_ms() / f64::from(self.n_p_eval()),
122 1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
123 )?;
124 } else {
125 writeln!(
126 f,
127 "prompt eval time = {:.2} ms / 0 tokens",
128 self.t_p_eval_ms(),
129 )?;
130 }
131
132 if self.n_eval() > 0 {
133 writeln!(
134 f,
135 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
136 self.t_eval_ms(),
137 self.n_eval(),
138 self.t_eval_ms() / f64::from(self.n_eval()),
139 1e3 / self.t_eval_ms() * f64::from(self.n_eval())
140 )?;
141 } else {
142 writeln!(f, "eval time = {:.2} ms / 0 runs", self.t_eval_ms(),)?;
143 }
144
145 Ok(())
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::LlamaTimings;
152
153 #[test]
154 fn display_format_with_valid_counts() {
155 let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5, 6, 1);
156 let output = format!("{timings}");
157
158 assert!(output.contains("load time = 2.00 ms"));
159 assert!(output.contains("prompt eval time = 3.00 ms / 5 tokens"));
160 assert!(output.contains("eval time = 4.00 ms / 6 runs"));
161 }
162
163 #[test]
164 fn display_format_handles_zero_eval_counts() {
165 let timings = LlamaTimings::new(0.0, 1.0, 2.0, 3.0, 0, 0, 0);
166 let output = format!("{timings}");
167
168 assert!(output.contains("load time = 1.00 ms"));
169 assert!(output.contains("prompt eval time = 2.00 ms / 0 tokens"));
170 assert!(output.contains("eval time = 3.00 ms / 0 runs"));
171 assert!(!output.contains("NaN"));
172 assert!(!output.contains("inf"));
173 }
174}