use std::fmt::{Debug, Display, Formatter};
use llama_cpp_sys_4::{ggml_time_us, llama_perf_context_print};
use crate::context::LlamaContext;
#[derive(Clone, Copy, Debug)]
pub struct PerfContextData {
pub(crate) perf_context_data: llama_cpp_sys_4::llama_perf_context_data,
}
impl PerfContextData {
#[allow(clippy::too_many_arguments)]
#[must_use]
pub fn new(
t_start_ms: f64,
t_load_ms: f64,
t_p_eval_ms: f64,
t_eval_ms: f64,
n_p_eval: i32,
n_eval: i32,
) -> Self {
Self {
perf_context_data: llama_cpp_sys_4::llama_perf_context_data {
t_start_ms,
t_load_ms,
t_p_eval_ms,
t_eval_ms,
n_p_eval,
n_eval,
n_reused: 0,
},
}
}
pub fn print(ctx: &LlamaContext<'_>) {
unsafe {
llama_perf_context_print(ctx.context.as_ptr());
};
}
#[must_use]
pub fn t_start_ms(&self) -> f64 {
self.perf_context_data.t_start_ms
}
#[must_use]
pub fn t_end_ms(&self) -> f64 {
#[allow(clippy::cast_precision_loss)]
{
1e-3 * (unsafe { ggml_time_us() }) as f64
}
}
#[must_use]
pub fn t_load_ms(&self) -> f64 {
self.perf_context_data.t_load_ms
}
#[must_use]
pub fn t_p_eval_ms(&self) -> f64 {
self.perf_context_data.t_p_eval_ms
}
#[must_use]
pub fn t_eval_ms(&self) -> f64 {
self.perf_context_data.t_eval_ms
}
#[must_use]
pub fn n_p_eval(&self) -> i32 {
self.perf_context_data.n_p_eval
}
#[must_use]
pub fn n_eval(&self) -> i32 {
self.perf_context_data.n_eval
}
pub fn set_t_start_ms(&mut self, t_start_ms: f64) {
self.perf_context_data.t_start_ms = t_start_ms;
}
pub fn set_t_load_ms(&mut self, t_load_ms: f64) {
self.perf_context_data.t_load_ms = t_load_ms;
}
pub fn set_t_p_eval_ms(&mut self, t_p_eval_ms: f64) {
self.perf_context_data.t_p_eval_ms = t_p_eval_ms;
}
pub fn set_t_eval_ms(&mut self, t_eval_ms: f64) {
self.perf_context_data.t_eval_ms = t_eval_ms;
}
pub fn set_n_p_eval(&mut self, n_p_eval: i32) {
self.perf_context_data.n_p_eval = n_p_eval;
}
pub fn set_n_eval(&mut self, n_eval: i32) {
self.perf_context_data.n_eval = n_eval;
}
}
impl Display for PerfContextData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "load time = {:.2} ms", self.t_load_ms())?;
writeln!(
f,
"prompt eval time = {:.2} ms / {} tokens ({:.2} ms per token, {:.2} tokens per second)",
self.t_p_eval_ms(),
self.n_p_eval(),
self.t_p_eval_ms() / f64::from(self.n_p_eval()),
1e3 / self.t_p_eval_ms() * f64::from(self.n_p_eval())
)?;
writeln!(
f,
"eval time = {:.2} ms / {} runs ({:.2} ms per token, {:.2} tokens per second)",
self.t_eval_ms(),
self.n_eval(),
self.t_eval_ms() / f64::from(self.n_eval()),
1e3 / self.t_eval_ms() * f64::from(self.n_eval())
)?;
write!(
f,
"total time = {:.2} ms",
self.t_end_ms() - self.t_start_ms()
)
}
}