use std::time::{SystemTime, UNIX_EPOCH};
use serde::Serialize;
use crate::accuracy::AccuracyResult;
use crate::perplexity::PerplexityResult;
use crate::throughput::ThroughputResult;
#[derive(Debug, Serialize)]
pub struct EvalResultEntry {
pub task: String,
pub metric: String,
pub value: f32,
pub unit: String,
pub notes: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct EvalReport {
pub model_name: String,
pub timestamp: String,
pub results: Vec<EvalResultEntry>,
}
impl EvalReport {
pub fn new(model_name: &str) -> Self {
let timestamp = iso8601_now();
Self {
model_name: model_name.to_string(),
timestamp,
results: Vec::new(),
}
}
pub fn add(&mut self, entry: EvalResultEntry) {
self.results.push(entry);
}
pub fn add_perplexity(&mut self, task: &str, result: &PerplexityResult) {
self.add(EvalResultEntry {
task: task.to_string(),
metric: "perplexity".to_string(),
value: result.mean_ppl,
unit: "PPL".to_string(),
notes: Some(format!("n={}, std={:.2}", result.n_samples, result.std_ppl)),
});
}
pub fn add_accuracy(&mut self, task: &str, result: &AccuracyResult) {
self.add(EvalResultEntry {
task: task.to_string(),
metric: "accuracy".to_string(),
value: result.accuracy_pct(),
unit: "%".to_string(),
notes: Some(format!("{}/{}", result.correct, result.total)),
});
}
pub fn add_throughput(&mut self, result: &ThroughputResult) {
self.add(EvalResultEntry {
task: "throughput".to_string(),
metric: "tokens_per_second".to_string(),
value: result.tokens_per_second,
unit: "t/s".to_string(),
notes: Some(result.latency_breakdown()),
});
}
pub fn to_json(&self) -> String {
serde_json::to_string_pretty(self).unwrap_or_else(|_| "{}".to_string())
}
pub fn to_markdown(&self) -> String {
let mut md = String::new();
md.push_str(&format!("# Evaluation Report — {}\n\n", self.model_name));
md.push_str(&format!("**Timestamp:** {}\n\n", self.timestamp));
md.push_str("| Task | Metric | Value | Unit | Notes |\n");
md.push_str("|------|--------|------:|------|-------|\n");
for entry in &self.results {
let notes = entry.notes.as_deref().unwrap_or("-");
md.push_str(&format!(
"| {} | {} | {:.4} | {} | {} |\n",
entry.task, entry.metric, entry.value, entry.unit, notes
));
}
md
}
pub fn summary(&self) -> String {
let ppl = self
.results
.iter()
.find(|e| e.metric == "perplexity")
.map(|e| format!("{:.2}", e.value))
.unwrap_or_else(|| "N/A".to_string());
let acc = self
.results
.iter()
.find(|e| e.metric == "accuracy")
.map(|e| format!("{:.1}%", e.value))
.unwrap_or_else(|| "N/A".to_string());
let tps = self
.results
.iter()
.find(|e| e.metric == "tokens_per_second")
.map(|e| format!("{:.1}", e.value))
.unwrap_or_else(|| "N/A".to_string());
format!(
"Model: {} | PPL: {} | Acc: {} | TPS: {}",
self.model_name, ppl, acc, tps
)
}
}
fn iso8601_now() -> String {
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let (year, month, day, hour, minute, second) = unix_secs_to_datetime(secs);
format!(
"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
year, month, day, hour, minute, second
)
}
fn unix_secs_to_datetime(secs: u64) -> (u64, u64, u64, u64, u64, u64) {
let second = secs % 60;
let minutes = secs / 60;
let minute = minutes % 60;
let hours = minutes / 60;
let hour = hours % 24;
let days = hours / 24;
let mut year = 1970u64;
let mut remaining = days;
loop {
let days_in_year = if is_leap(year) { 366 } else { 365 };
if remaining < days_in_year {
break;
}
remaining -= days_in_year;
year += 1;
}
let leap = is_leap(year);
let month_days: [u64; 12] = [
31,
if leap { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
];
let mut month = 1u64;
for &md in &month_days {
if remaining < md {
break;
}
remaining -= md;
month += 1;
}
let day = remaining + 1;
(year, month, day, hour, minute, second)
}
#[inline]
fn is_leap(year: u64) -> bool {
(year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400)
}