1use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31use std::time::{SystemTime, UNIX_EPOCH};
32
33pub mod andon;
34pub mod dashboard;
35pub mod drift;
36pub mod export;
37pub mod gpu;
38pub mod inference;
39pub mod lineage;
40pub mod llm;
41pub mod params;
42pub mod prometheus;
43pub mod report;
44pub mod storage;
45pub mod tui;
46pub mod wasm;
47
48pub use andon::{Alert, AlertLevel, AndonConfig, AndonSystem};
50pub use dashboard::{Dashboard, DashboardConfig};
51pub use drift::{Anomaly, AnomalySeverity, DriftDetector, DriftStatus, SlidingWindowBaseline};
52pub use export::{ExportFormat, MetricsExporter};
53pub use lineage::{ChangeType, Derivation, ModelLineage, ModelMetadata};
54pub use llm::{
55 EvalResult, InMemoryLLMEvaluator, LLMError, LLMEvaluator, LLMMetrics, PromptId, PromptVersion,
56};
57pub use params::{ParamDiff, ParamLogger, ParamValue};
58pub use report::{
59 HanseiAnalyzer, IssueSeverity, MetricSummary, PostTrainingReport, TrainingIssue, Trend,
60};
61pub use storage::{InMemoryStore, JsonFileStore, MetricsStore, StorageError, StorageResult};
62pub use tui::{
63 BrailleChart, GpuTelemetry, SamplePeek, TrainingSnapshot, TrainingState, TrainingStateWriter,
64 TrainingStatus, TuiMonitor, TuiMonitorConfig,
65};
66pub use wasm::{WasmDashboard, WasmDashboardOptions, WasmMetricsCollector};
67
68#[cfg(test)]
69#[path = "tests/mod.rs"]
70mod tests;
71
72#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
78pub enum Metric {
79 Loss,
81 Accuracy,
83 LearningRate,
85 GradientNorm,
87 Epoch,
89 Batch,
91 Custom(String),
93}
94
95impl Metric {
96 pub fn as_str(&self) -> &str {
98 match self {
99 Metric::Loss => "loss",
100 Metric::Accuracy => "accuracy",
101 Metric::LearningRate => "learning_rate",
102 Metric::GradientNorm => "gradient_norm",
103 Metric::Epoch => "epoch",
104 Metric::Batch => "batch",
105 Metric::Custom(name) => name,
106 }
107 }
108
109 #[allow(clippy::should_implement_trait)]
111 pub fn from_str(s: &str) -> Option<Self> {
112 match s {
113 "loss" => Some(Metric::Loss),
114 "accuracy" => Some(Metric::Accuracy),
115 "learning_rate" => Some(Metric::LearningRate),
116 "gradient_norm" => Some(Metric::GradientNorm),
117 "epoch" => Some(Metric::Epoch),
118 "batch" => Some(Metric::Batch),
119 _ => None,
120 }
121 }
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct MetricRecord {
131 pub timestamp: u64,
133 pub metric: Metric,
135 pub value: f64,
137 pub tags: HashMap<String, String>,
139}
140
141impl MetricRecord {
142 pub fn new(metric: Metric, value: f64) -> Self {
144 let timestamp =
145 SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_millis() as u64).unwrap_or(0);
146
147 Self { timestamp, metric, value, tags: HashMap::new() }
148 }
149
150 pub fn with_tag(mut self, key: &str, value: &str) -> Self {
152 self.tags.insert(key.to_string(), value.to_string());
153 self
154 }
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct MetricStats {
164 pub count: usize,
166 pub mean: f64,
168 pub std: f64,
170 pub min: f64,
172 pub max: f64,
174 pub sum: f64,
176 pub has_nan: bool,
178 pub has_inf: bool,
180}
181
182impl Default for MetricStats {
183 fn default() -> Self {
184 Self {
185 count: 0,
186 mean: 0.0,
187 std: 0.0,
188 min: f64::INFINITY,
189 max: f64::NEG_INFINITY,
190 sum: 0.0,
191 has_nan: false,
192 has_inf: false,
193 }
194 }
195}
196
197#[derive(Debug)]
218pub struct MetricsCollector {
219 records: Vec<MetricRecord>,
221 running_stats: HashMap<Metric, RunningStats>,
223}
224
225#[derive(Debug, Clone)]
227struct RunningStats {
228 count: usize,
229 mean: f64,
230 m2: f64, min: f64,
232 max: f64,
233 sum: f64,
234 has_nan: bool,
235 has_inf: bool,
236}
237
238impl Default for RunningStats {
239 fn default() -> Self {
240 Self {
241 count: 0,
242 mean: 0.0,
243 m2: 0.0,
244 min: f64::INFINITY,
245 max: f64::NEG_INFINITY,
246 sum: 0.0,
247 has_nan: false,
248 has_inf: false,
249 }
250 }
251}
252
253impl RunningStats {
254 #[allow(dead_code)]
255 fn new() -> Self {
256 Self {
257 count: 0,
258 mean: 0.0,
259 m2: 0.0,
260 min: f64::INFINITY,
261 max: f64::NEG_INFINITY,
262 sum: 0.0,
263 has_nan: false,
264 has_inf: false,
265 }
266 }
267
268 fn update(&mut self, value: f64) {
270 if value.is_nan() {
272 self.has_nan = true;
273 return;
274 }
275 if value.is_infinite() {
276 self.has_inf = true;
277 self.min = self.min.min(value);
279 self.max = self.max.max(value);
280 return;
281 }
282
283 self.count += 1;
284 self.sum += value;
285 self.min = self.min.min(value);
286 self.max = self.max.max(value);
287
288 let delta = value - self.mean;
290 self.mean += delta / self.count as f64;
291 let delta2 = value - self.mean;
292 self.m2 += delta * delta2;
293 }
294
295 fn std(&self) -> f64 {
297 if self.count < 2 {
298 return 0.0;
299 }
300 (self.m2 / (self.count - 1) as f64).sqrt()
301 }
302
303 fn to_stats(&self) -> MetricStats {
305 MetricStats {
306 count: self.count,
307 mean: self.mean,
308 std: self.std(),
309 min: self.min,
310 max: self.max,
311 sum: self.sum,
312 has_nan: self.has_nan,
313 has_inf: self.has_inf,
314 }
315 }
316}
317
318impl MetricsCollector {
319 pub fn new() -> Self {
321 Self { records: Vec::new(), running_stats: HashMap::new() }
322 }
323
324 pub fn record(&mut self, metric: Metric, value: f64) {
326 self.records.push(MetricRecord::new(metric.clone(), value));
328
329 self.running_stats.entry(metric).or_default().update(value);
331 }
332
333 pub fn record_batch(&mut self, metrics: &[(Metric, f64)]) {
335 for (metric, value) in metrics {
336 self.record(metric.clone(), *value);
337 }
338 }
339
340 pub fn count(&self) -> usize {
342 self.records.len()
343 }
344
345 pub fn is_empty(&self) -> bool {
347 self.records.is_empty()
348 }
349
350 pub fn clear(&mut self) {
352 self.records.clear();
353 self.running_stats.clear();
354 }
355
356 pub fn summary(&self) -> HashMap<Metric, MetricStats> {
358 self.running_stats
359 .iter()
360 .map(|(metric, stats)| (metric.clone(), stats.to_stats()))
361 .collect()
362 }
363
364 pub fn to_records(&self) -> Vec<MetricRecord> {
366 self.records.clone()
367 }
368
369 pub fn to_json(&self) -> Result<String, serde_json::Error> {
371 serde_json::to_string_pretty(&self.records)
372 }
373
374 pub fn summary_to_json(&self) -> Result<String, serde_json::Error> {
376 serde_json::to_string_pretty(&self.summary())
377 }
378}
379
380impl Default for MetricsCollector {
381 fn default() -> Self {
382 Self::new()
383 }
384}
385
386pub type MetricsSummary = HashMap<Metric, MetricStats>;