1use std::fmt::{Debug, Display, Formatter};
3
4#[derive(Clone, Copy, Debug)]
6pub struct LlamaTimings {
7 pub(crate) timings: llama_cpp_sys_2::llama_perf_context_data,
8}
9
10impl LlamaTimings {
11 #[allow(clippy::too_many_arguments)]
21 #[must_use]
22 pub fn new(
23 t_start_ms: f64,
24 t_load_ms: f64,
25 t_p_eval_ms: f64,
26 t_eval_ms: f64,
27 n_p_eval: i32,
28 n_eval: i32,
29 n_reused: i32,
30 ) -> Self {
31 Self {
32 timings: llama_cpp_sys_2::llama_perf_context_data {
33 t_start_ms,
34 t_load_ms,
35 t_p_eval_ms,
36 t_eval_ms,
37 n_p_eval,
38 n_eval,
39 n_reused,
40 },
41 }
42 }
43
44 #[must_use]
46 pub fn t_start_ms(&self) -> f64 {
47 self.timings.t_start_ms
48 }
49
50 #[must_use]
52 pub fn t_load_ms(&self) -> f64 {
53 self.timings.t_load_ms
54 }
55
56 #[must_use]
58 pub fn t_p_eval_ms(&self) -> f64 {
59 self.timings.t_p_eval_ms
60 }
61
62 #[must_use]
64 pub fn t_eval_ms(&self) -> f64 {
65 self.timings.t_eval_ms
66 }
67
68 #[must_use]
70 pub fn n_p_eval(&self) -> i32 {
71 self.timings.n_p_eval
72 }
73
74 #[must_use]
76 pub fn n_eval(&self) -> i32 {
77 self.timings.n_eval
78 }
79
80 pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
82 self.timings.t_start_ms = t_start_ms;
83 }
84
85 pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
87 self.timings.t_load_ms = t_load_ms;
88 }
89
90 pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
92 self.timings.t_p_eval_ms = t_p_eval_ms;
93 }
94
95 pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
97 self.timings.t_eval_ms = t_eval_ms;
98 }
99
100 pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
102 self.timings.n_p_eval = n_p_eval;
103 }
104
105 pub fn set_n_eval(&mut self, n_eval: i32) {
107 self.timings.n_eval = n_eval;
108 }
109}
110
111impl Display for LlamaTimings {
112 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
113 writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
114 writeln!(
115 f,
116 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
117 self.t_p_eval_ms(),
118 self.n_p_eval(),
119 self.t_p_eval_ms() / f64::from(self.n_p_eval()),
120 1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
121 )?;
122 writeln!(
123 f,
124 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
125 self.t_eval_ms(),
126 self.n_eval(),
127 self.t_eval_ms() / f64::from(self.n_eval()),
128 1e3 / self.t_eval_ms() * f64::from(self.n_eval())
129 )?;
130 Ok(())
131 }
132}