llama_cpp_4/context/perf.rs
1//! Safe wrapper around `llama_perf_context_data`.
2use std::fmt::{Debug, Display, Formatter};
3
4use llama_cpp_sys_4::{ggml_time_us, llama_perf_context_print};
5
6use crate::context::LlamaContext;
7
8/// A wrapper around `llama_perf_context_data`.
9#[derive(Clone, Copy, Debug)]
10pub struct PerfContextData {
11 pub(crate) perf_context_data: llama_cpp_sys_4::llama_perf_context_data,
12}
13
14impl PerfContextData {
15 /// Create a new `PerfContextData`.
16 /// ```
17 /// # use llama_cpp_4::timing::PerfContextData;
18 /// let timings = PerfContextData::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 8, 9);
19 /// let timings_str = "load time = 3.00 ms
20 /// sample time = 4.00 ms / 7 runs (0.57 ms per token, 1750.00 tokens per second)
21 /// prompt eval time = 5.00 ms / 8 tokens (0.62 ms per token, 1600.00 tokens per second)
22 /// eval time = 6.00 ms / 9 runs (0.67 ms per token, 1500.00 tokens per second)
23 /// total time = 1.00 ms";
24 /// assert_eq!(timings_str, format!("{}", timings));
25 /// ```
26 #[allow(clippy::too_many_arguments)]
27 #[must_use]
28 pub fn new(
29 t_start_ms: f64,
30 // t_end_ms: f64,
31 t_load_ms: f64,
32 // t_sample_ms: f64,
33 t_p_eval_ms: f64,
34 t_eval_ms: f64,
35 // n_sample: i32,
36 n_p_eval: i32,
37 n_eval: i32,
38 ) -> Self {
39 Self {
40 perf_context_data: llama_cpp_sys_4::llama_perf_context_data {
41 t_start_ms,
42 // t_end_ms,
43 t_load_ms,
44 // t_sample_ms,
45 t_p_eval_ms,
46 t_eval_ms,
47 // n_sample,
48 n_p_eval,
49 n_eval,
50 n_reused: 0,
51 },
52 }
53 }
54
55 /// print llama context performance data
56 /// load time
57 /// prompt eval time
58 /// eval time
59 /// total time
60 pub fn print(ctx: &LlamaContext<'_>) {
61 unsafe {
62 llama_perf_context_print(ctx.context.as_ptr());
63 };
64 }
65
66 /// Get the start time in milliseconds.
67 #[must_use]
68 pub fn t_start_ms(&self) -> f64 {
69 self.perf_context_data.t_start_ms
70 }
71
72 /// Get the end time in milliseconds.
73 #[must_use]
74 pub fn t_end_ms(&self) -> f64 {
75 // self.perf_context_data.t_end_ms
76
77 #[allow(clippy::cast_precision_loss)]
78 {
79 1e-3 * (unsafe { ggml_time_us() }) as f64
80 }
81 }
82
83 /// Get the load time in milliseconds.
84 #[must_use]
85 pub fn t_load_ms(&self) -> f64 {
86 self.perf_context_data.t_load_ms
87 }
88
89 // /// Get the sample time in milliseconds.
90 // #[must_use]
91 // pub fn t_sample_ms(&self) -> f64 {
92 // self.perf_context_data.t_sample_ms
93 // }
94
95 /// Get the prompt evaluation time in milliseconds.
96 #[must_use]
97 pub fn t_p_eval_ms(&self) -> f64 {
98 self.perf_context_data.t_p_eval_ms
99 }
100
101 /// Get the evaluation time in milliseconds.
102 #[must_use]
103 pub fn t_eval_ms(&self) -> f64 {
104 self.perf_context_data.t_eval_ms
105 }
106
107 // /// Get the number of samples.
108 // #[must_use]
109 // pub fn n_sample(&self) -> i32 {
110 // self.perf_context_data.n_sample
111 // }
112
113 /// Get the number of prompt evaluations.
114 #[must_use]
115 pub fn n_p_eval(&self) -> i32 {
116 self.perf_context_data.n_p_eval
117 }
118
119 /// Get the number of evaluations.
120 #[must_use]
121 pub fn n_eval(&self) -> i32 {
122 self.perf_context_data.n_eval
123 }
124
125 /// Set the start time in milliseconds.
126 pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
127 self.perf_context_data.t_start_ms = t_start_ms;
128 }
129
130 // /// Set the end time in milliseconds.
131 // pub fn set_t_end_ms(&mut self, t_end_ms: f64) {
132 // self.perf_context_data.t_end_ms = t_end_ms;
133 // }
134
135 /// Set the load time in milliseconds.
136 pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
137 self.perf_context_data.t_load_ms = t_load_ms;
138 }
139
140 // /// Set the sample time in milliseconds.
141 // pub fn set_t_sample_ms(&mut self, t_sample_ms: f64) {
142 // self.perf_context_data.t_sample_ms = t_sample_ms;
143 // }
144
145 /// Set the prompt evaluation time in milliseconds.
146 pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
147 self.perf_context_data.t_p_eval_ms = t_p_eval_ms;
148 }
149
150 /// Set the evaluation time in milliseconds.
151 pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
152 self.perf_context_data.t_eval_ms = t_eval_ms;
153 }
154
155 // /// Set the number of samples.
156 // pub fn set_n_sample(&mut self, n_sample: i32) {
157 // self.perf_context_data.n_sample = n_sample;
158 // }
159
160 /// Set the number of prompt evaluations.
161 pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
162 self.perf_context_data.n_p_eval = n_p_eval;
163 }
164
165 /// Set the number of evaluations.
166 pub fn set_n_eval(&mut self, n_eval: i32) {
167 self.perf_context_data.n_eval = n_eval;
168 }
169}
170
171impl Display for PerfContextData {
172 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
173 writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
174 // writeln!(
175 // f,
176 // "sample time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
177 // self.t_sample_ms(),
178 // self.n_sample(),
179 // self.t_sample_ms() / f64::from(self.n_sample()),
180 // 1e3 / self.t_sample_ms() * f64::from(self.n_sample())
181 // )?;
182 writeln!(
183 f,
184 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
185 self.t_p_eval_ms(),
186 self.n_p_eval(),
187 self.t_p_eval_ms() / f64::from(self.n_p_eval()),
188 1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
189 )?;
190 writeln!(
191 f,
192 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
193 self.t_eval_ms(),
194 self.n_eval(),
195 self.t_eval_ms() / f64::from(self.n_eval()),
196 1e3 / self.t_eval_ms() * f64::from(self.n_eval())
197 )?;
198 write!(
199 f,
200 "total time = {:.2} ms",
201 self.t_end_ms() - self.t_start_ms()
202 )
203 }
204}