1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
//! Safe wrapper around `llama_timings`.
use std::fmt::{Debug, Display, Formatter};
/// A wrapper around `llama_timings`.
#[derive(Clone, Copy, Debug)]
pub struct LlamaTimings {
pub(crate) timings: llama_cpp_sys_2::llama_timings,
}
impl LlamaTimings {
/// Create a new `LlamaTimings`.
/// ```
/// # use llama_cpp_2::timing::LlamaTimings;
/// let timings = LlamaTimings::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 8, 9);
/// let timings_str = "load time = 3.00 ms
/// sample time = 4.00 ms / 7 runs (0.57 ms per token, 1750.00 tokens per second)
/// prompt eval time = 5.00 ms / 8 tokens (0.62 ms per token, 1600.00 tokens per second)
/// eval time = 6.00 ms / 9 runs (0.67 ms per token, 1500.00 tokens per second)
/// total time = 1.00 ms";
/// assert_eq!(timings_str, format!("{}", timings));
/// ```
#[allow(clippy::too_many_arguments)]
#[must_use]
pub fn new(
t_start_ms: f64,
t_end_ms: f64,
t_load_ms: f64,
t_sample_ms: f64,
t_p_eval_ms: f64,
t_eval_ms: f64,
n_sample: i32,
n_p_eval: i32,
n_eval: i32,
) -> Self {
Self {
timings: llama_cpp_sys_2::llama_timings {
t_start_ms,
t_end_ms,
t_load_ms,
t_sample_ms,
t_p_eval_ms,
t_eval_ms,
n_sample,
n_p_eval,
n_eval,
},
}
}
/// Get the start time in milliseconds.
#[must_use]
pub fn t_start_ms(&self) -> f64 {
self.timings.t_start_ms
}
/// Get the end time in milliseconds.
#[must_use]
pub fn t_end_ms(&self) -> f64 {
self.timings.t_end_ms
}
/// Get the load time in milliseconds.
#[must_use]
pub fn t_load_ms(&self) -> f64 {
self.timings.t_load_ms
}
/// Get the sample time in milliseconds.
#[must_use]
pub fn t_sample_ms(&self) -> f64 {
self.timings.t_sample_ms
}
/// Get the prompt evaluation time in milliseconds.
#[must_use]
pub fn t_p_eval_ms(&self) -> f64 {
self.timings.t_p_eval_ms
}
/// Get the evaluation time in milliseconds.
#[must_use]
pub fn t_eval_ms(&self) -> f64 {
self.timings.t_eval_ms
}
/// Get the number of samples.
#[must_use]
pub fn n_sample(&self) -> i32 {
self.timings.n_sample
}
/// Get the number of prompt evaluations.
#[must_use]
pub fn n_p_eval(&self) -> i32 {
self.timings.n_p_eval
}
/// Get the number of evaluations.
#[must_use]
pub fn n_eval(&self) -> i32 {
self.timings.n_eval
}
/// Set the start time in milliseconds.
pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
self.timings.t_start_ms = t_start_ms;
}
/// Set the end time in milliseconds.
pub fn set_t_end_ms(&mut self, t_end_ms: f64) {
self.timings.t_end_ms = t_end_ms;
}
/// Set the load time in milliseconds.
pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
self.timings.t_load_ms = t_load_ms;
}
/// Set the sample time in milliseconds.
pub fn set_t_sample_ms(&mut self, t_sample_ms: f64) {
self.timings.t_sample_ms = t_sample_ms;
}
/// Set the prompt evaluation time in milliseconds.
pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
self.timings.t_p_eval_ms = t_p_eval_ms;
}
/// Set the evaluation time in milliseconds.
pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
self.timings.t_eval_ms = t_eval_ms;
}
/// Set the number of samples.
pub fn set_n_sample(&mut self, n_sample: i32) {
self.timings.n_sample = n_sample;
}
/// Set the number of prompt evaluations.
pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
self.timings.n_p_eval = n_p_eval;
}
/// Set the number of evaluations.
pub fn set_n_eval(&mut self, n_eval: i32) {
self.timings.n_eval = n_eval;
}
}
impl Display for LlamaTimings {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
writeln!(
f,
"sample time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
self.t_sample_ms(),
self.n_sample(),
self.t_sample_ms() / f64::from(self.n_sample()),
1e3 / self.t_sample_ms() * f64::from(self.n_sample())
)?;
writeln!(
f,
"prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
self.t_p_eval_ms(),
self.n_p_eval(),
self.t_p_eval_ms() / f64::from(self.n_p_eval()),
1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
)?;
writeln!(
f,
"eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
self.t_eval_ms(),
self.n_eval(),
self.t_eval_ms() / f64::from(self.n_eval()),
1e3 / self.t_eval_ms() * f64::from(self.n_eval())
)?;
write!(
f,
"total time = {:.2} ms",
self.t_end_ms() - self.t_start_ms()
)
}
}