1use std::fmt::{Debug, Display, Formatter};
3
4#[derive(Clone, Copy, Debug)]
6pub struct LlamaTimings {
7 pub(crate) timings: llama_cpp_sys_2::llama_perf_context_data,
8}
9
10impl LlamaTimings {
11 #[allow(clippy::too_many_arguments)]
21 #[must_use]
22 pub fn new(
23 t_start_ms: f64,
24 t_load_ms: f64,
25 t_p_eval_ms: f64,
26 t_eval_ms: f64,
27 n_p_eval: i32,
28 n_eval: i32,
29 n_reused: i32,
30 ) -> Self {
31 Self {
32 timings: llama_cpp_sys_2::llama_perf_context_data {
33 t_start_ms,
34 t_load_ms,
35 t_p_eval_ms,
36 t_eval_ms,
37 n_p_eval,
38 n_eval,
39 },
40 }
41 }
42
43 #[must_use]
45 pub fn t_start_ms(&self) -> f64 {
46 self.timings.t_start_ms
47 }
48
49 #[must_use]
51 pub fn t_load_ms(&self) -> f64 {
52 self.timings.t_load_ms
53 }
54
55 #[must_use]
57 pub fn t_p_eval_ms(&self) -> f64 {
58 self.timings.t_p_eval_ms
59 }
60
61 #[must_use]
63 pub fn t_eval_ms(&self) -> f64 {
64 self.timings.t_eval_ms
65 }
66
67 #[must_use]
69 pub fn n_p_eval(&self) -> i32 {
70 self.timings.n_p_eval
71 }
72
73 #[must_use]
75 pub fn n_eval(&self) -> i32 {
76 self.timings.n_eval
77 }
78
79 pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
81 self.timings.t_start_ms = t_start_ms;
82 }
83
84 pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
86 self.timings.t_load_ms = t_load_ms;
87 }
88
89 pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
91 self.timings.t_p_eval_ms = t_p_eval_ms;
92 }
93
94 pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
96 self.timings.t_eval_ms = t_eval_ms;
97 }
98
99 pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
101 self.timings.n_p_eval = n_p_eval;
102 }
103
104 pub fn set_n_eval(&mut self, n_eval: i32) {
106 self.timings.n_eval = n_eval;
107 }
108}
109
110impl Display for LlamaTimings {
111 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
112 writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
113 writeln!(
114 f,
115 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
116 self.t_p_eval_ms(),
117 self.n_p_eval(),
118 self.t_p_eval_ms() / f64::from(self.n_p_eval()),
119 1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
120 )?;
121 writeln!(
122 f,
123 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
124 self.t_eval_ms(),
125 self.n_eval(),
126 self.t_eval_ms() / f64::from(self.n_eval()),
127 1e3 / self.t_eval_ms() * f64::from(self.n_eval())
128 )?;
129 Ok(())
130 }
131}