llama_cpp_4/context/perf.rs
1//! Safe wrapper around `llama_perf_context_data`.
2use std::fmt::{Debug, Display, Formatter};
3
4use llama_cpp_sys_4::{ggml_time_us, llama_perf_context_print};
5
6use crate::context::LlamaContext;
7
8/// A wrapper around `llama_perf_context_data`.
9#[derive(Clone, Copy, Debug)]
10pub struct PerfContextData {
11 pub(crate) perf_context_data: llama_cpp_sys_4::llama_perf_context_data,
12}
13
14impl PerfContextData {
15 /// Create a new `PerfContextData`.
16 /// ```
17 /// # use llama_cpp_4::context::perf::PerfContextData;
18 /// let timings = PerfContextData::new(1.0, 2.0, 3.0, 4.0, 5, 6);
19 /// assert_eq!(timings.t_load_ms(), 2.0);
20 /// assert_eq!(timings.t_p_eval_ms(), 3.0);
21 /// assert_eq!(timings.t_eval_ms(), 4.0);
22 /// assert_eq!(timings.n_p_eval(), 5);
23 /// assert_eq!(timings.n_eval(), 6);
24 /// ```
25 #[allow(clippy::too_many_arguments)]
26 #[must_use]
27 pub fn new(
28 t_start_ms: f64,
29 // t_end_ms: f64,
30 t_load_ms: f64,
31 // t_sample_ms: f64,
32 t_p_eval_ms: f64,
33 t_eval_ms: f64,
34 // n_sample: i32,
35 n_p_eval: i32,
36 n_eval: i32,
37 ) -> Self {
38 Self {
39 perf_context_data: llama_cpp_sys_4::llama_perf_context_data {
40 t_start_ms,
41 // t_end_ms,
42 t_load_ms,
43 // t_sample_ms,
44 t_p_eval_ms,
45 t_eval_ms,
46 // n_sample,
47 n_p_eval,
48 n_eval,
49 n_reused: 0,
50 },
51 }
52 }
53
54 /// print llama context performance data
55 /// load time
56 /// prompt eval time
57 /// eval time
58 /// total time
59 pub fn print(ctx: &LlamaContext<'_>) {
60 unsafe {
61 llama_perf_context_print(ctx.context.as_ptr());
62 };
63 }
64
65 /// Get the start time in milliseconds.
66 #[must_use]
67 pub fn t_start_ms(&self) -> f64 {
68 self.perf_context_data.t_start_ms
69 }
70
71 /// Get the end time in milliseconds.
72 #[must_use]
73 pub fn t_end_ms(&self) -> f64 {
74 // self.perf_context_data.t_end_ms
75
76 #[allow(clippy::cast_precision_loss)]
77 {
78 1e-3 * (unsafe { ggml_time_us() }) as f64
79 }
80 }
81
82 /// Get the load time in milliseconds.
83 #[must_use]
84 pub fn t_load_ms(&self) -> f64 {
85 self.perf_context_data.t_load_ms
86 }
87
88 // /// Get the sample time in milliseconds.
89 // #[must_use]
90 // pub fn t_sample_ms(&self) -> f64 {
91 // self.perf_context_data.t_sample_ms
92 // }
93
94 /// Get the prompt evaluation time in milliseconds.
95 #[must_use]
96 pub fn t_p_eval_ms(&self) -> f64 {
97 self.perf_context_data.t_p_eval_ms
98 }
99
100 /// Get the evaluation time in milliseconds.
101 #[must_use]
102 pub fn t_eval_ms(&self) -> f64 {
103 self.perf_context_data.t_eval_ms
104 }
105
106 // /// Get the number of samples.
107 // #[must_use]
108 // pub fn n_sample(&self) -> i32 {
109 // self.perf_context_data.n_sample
110 // }
111
112 /// Get the number of prompt evaluations.
113 #[must_use]
114 pub fn n_p_eval(&self) -> i32 {
115 self.perf_context_data.n_p_eval
116 }
117
118 /// Get the number of evaluations.
119 #[must_use]
120 pub fn n_eval(&self) -> i32 {
121 self.perf_context_data.n_eval
122 }
123
124 /// Set the start time in milliseconds.
125 pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
126 self.perf_context_data.t_start_ms = t_start_ms;
127 }
128
129 // /// Set the end time in milliseconds.
130 // pub fn set_t_end_ms(&mut self, t_end_ms: f64) {
131 // self.perf_context_data.t_end_ms = t_end_ms;
132 // }
133
134 /// Set the load time in milliseconds.
135 pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
136 self.perf_context_data.t_load_ms = t_load_ms;
137 }
138
139 // /// Set the sample time in milliseconds.
140 // pub fn set_t_sample_ms(&mut self, t_sample_ms: f64) {
141 // self.perf_context_data.t_sample_ms = t_sample_ms;
142 // }
143
144 /// Set the prompt evaluation time in milliseconds.
145 pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
146 self.perf_context_data.t_p_eval_ms = t_p_eval_ms;
147 }
148
149 /// Set the evaluation time in milliseconds.
150 pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
151 self.perf_context_data.t_eval_ms = t_eval_ms;
152 }
153
154 // /// Set the number of samples.
155 // pub fn set_n_sample(&mut self, n_sample: i32) {
156 // self.perf_context_data.n_sample = n_sample;
157 // }
158
159 /// Set the number of prompt evaluations.
160 pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
161 self.perf_context_data.n_p_eval = n_p_eval;
162 }
163
164 /// Set the number of evaluations.
165 pub fn set_n_eval(&mut self, n_eval: i32) {
166 self.perf_context_data.n_eval = n_eval;
167 }
168}
169
170impl Display for PerfContextData {
171 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
172 writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
173 // writeln!(
174 // f,
175 // "sample time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
176 // self.t_sample_ms(),
177 // self.n_sample(),
178 // self.t_sample_ms() / f64::from(self.n_sample()),
179 // 1e3 / self.t_sample_ms() * f64::from(self.n_sample())
180 // )?;
181 writeln!(
182 f,
183 "prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
184 self.t_p_eval_ms(),
185 self.n_p_eval(),
186 self.t_p_eval_ms() / f64::from(self.n_p_eval()),
187 1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
188 )?;
189 writeln!(
190 f,
191 "eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
192 self.t_eval_ms(),
193 self.n_eval(),
194 self.t_eval_ms() / f64::from(self.n_eval()),
195 1e3 / self.t_eval_ms() * f64::from(self.n_eval())
196 )?;
197 write!(
198 f,
199 "total time = {:.2} ms",
200 self.t_end_ms() - self.t_start_ms()
201 )
202 }
203}