#[derive(Debug, Clone)]
pub struct AsciiLossPlotter {
pub width: usize,
pub height: usize,
pub title: String,
}
impl AsciiLossPlotter {
pub fn new(width: usize, height: usize) -> Self {
Self {
width: width.max(10),
height: height.max(4),
title: String::new(),
}
}
pub fn with_title(mut self, title: impl Into<String>) -> Self {
self.title = title.into();
self
}
pub fn render(&self, values: &[(u64, f32)]) -> Vec<String> {
if values.len() < 2 {
return Vec::new();
}
self.render_curves(&[values], &["loss"])
}
pub fn render_two(&self, train: &[(u64, f32)], val: &[(u64, f32)]) -> Vec<String> {
if train.len() < 2 || val.len() < 2 {
return Vec::new();
}
self.render_curves(&[train, val], &["train", "val"])
}
fn render_curves(&self, curves: &[&[(u64, f32)]], labels: &[&str]) -> Vec<String> {
let all_vals: Vec<f32> = curves.iter().flat_map(|c| c.iter().map(|&(_, v)| v)).collect();
let y_min = all_vals.iter().cloned().fold(f32::INFINITY, f32::min);
let y_max = all_vals.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let y_pad = ((y_max - y_min) * 0.05).max(1e-6_f32);
let y_lo = y_min - y_pad;
let y_hi = y_max + y_pad;
let y_range = y_hi - y_lo;
let x_min = curves.iter().flat_map(|c| c.iter().map(|&(s, _)| s)).min().unwrap_or(0);
let x_max = curves.iter().flat_map(|c| c.iter().map(|&(s, _)| s)).max().unwrap_or(1);
let map_x = |step: u64| -> usize {
if x_max == x_min {
return self.width / 2;
}
let frac = (step - x_min) as f64 / (x_max - x_min) as f64;
((frac * (self.width - 1) as f64).round() as usize).min(self.width - 1)
};
let map_y = |v: f32| -> usize {
let frac = (v - y_lo) / y_range;
let row = (self.height as f32 - 1.0) * (1.0 - frac.clamp(0.0, 1.0));
(row.round() as usize).min(self.height - 1)
};
let glyphs: &[char] = &['*', '+', 'o', 'x', '#'];
let mut grid = vec![vec![' '; self.width]; self.height];
for (ci, curve) in curves.iter().enumerate() {
let glyph = glyphs[ci % glyphs.len()];
for &(step, v) in *curve {
let col = map_x(step);
let row = map_y(v);
grid[row][col] = glyph;
}
}
let y_label_width = 9_usize;
let separator = "─".repeat(self.width + 1);
let mut out = Vec::<String>::new();
if !self.title.is_empty() {
out.push(format!(
"{:^width$}",
self.title,
width = y_label_width + 1 + self.width
));
}
for row in 0..self.height {
let y_val = y_hi - (row as f32 / (self.height - 1) as f32) * y_range;
let y_label = format!("{:>8.4}", y_val);
let line: String = grid[row].iter().collect();
out.push(format!("{}|{}", y_label, line));
}
out.push(format!("{:>width$}+{}", "", separator, width = y_label_width));
let step_range = x_max - x_min;
let left = format!("{}", x_min);
let right = format!("{}", x_max);
let mid_step = x_min + step_range / 2;
let mid = format!("{}", mid_step);
let col_pad = y_label_width + 1;
let total = col_pad + self.width;
let mid_pos = col_pad + self.width / 2;
let mut x_axis_line = vec![' '; total];
for (i, ch) in left.chars().enumerate() {
if col_pad + i < total {
x_axis_line[col_pad + i] = ch;
}
}
let mid_start = mid_pos.saturating_sub(mid.len() / 2);
for (i, ch) in mid.chars().enumerate() {
if mid_start + i < total {
x_axis_line[mid_start + i] = ch;
}
}
let right_start = total.saturating_sub(right.len());
for (i, ch) in right.chars().enumerate() {
if right_start + i < total {
x_axis_line[right_start + i] = ch;
}
}
out.push(x_axis_line.into_iter().collect());
if labels.len() > 1 || (labels.len() == 1 && !labels[0].is_empty()) {
let legend: String = labels
.iter()
.zip(glyphs.iter())
.map(|(l, g)| format!("{}={}", g, l))
.collect::<Vec<_>>()
.join(" ");
out.push(format!("{:>width$} {}", "", legend, width = y_label_width));
}
out
}
}
#[derive(Debug, Clone)]
pub struct GradientHistogram {
pub buckets: Vec<f32>,
pub counts: Vec<usize>,
pub total_values: usize,
running_mean: f64,
running_m2: f64,
}
impl GradientHistogram {
pub fn new(num_buckets: usize, min: f32, max: f32) -> Self {
assert!(num_buckets > 0, "num_buckets must be >= 1");
assert!(min < max, "min must be less than max");
let mut buckets = Vec::with_capacity(num_buckets + 1);
for i in 0..=num_buckets {
buckets.push(min + (max - min) * (i as f32 / num_buckets as f32));
}
Self {
buckets,
counts: vec![0; num_buckets],
total_values: 0,
running_mean: 0.0,
running_m2: 0.0,
}
}
pub fn add_value(&mut self, val: f32) {
let n_buckets = self.counts.len();
let min = self.buckets[0];
let max = *self.buckets.last().unwrap_or(&min);
let range = max - min;
let bucket_idx = if range <= 0.0 {
0
} else {
let frac = (val - min) / range;
let idx = (frac * n_buckets as f32).floor() as isize;
idx.clamp(0, (n_buckets as isize) - 1) as usize
};
self.counts[bucket_idx] += 1;
self.total_values += 1;
let delta = val as f64 - self.running_mean;
self.running_mean += delta / self.total_values as f64;
let delta2 = val as f64 - self.running_mean;
self.running_m2 += delta * delta2;
}
pub fn add_values(&mut self, vals: &[f32]) {
for &v in vals {
self.add_value(v);
}
}
pub fn mean(&self) -> f32 {
if self.total_values == 0 {
return 0.0;
}
self.running_mean as f32
}
pub fn std_dev(&self) -> f32 {
if self.total_values < 2 {
return 0.0;
}
(self.running_m2 / (self.total_values - 1) as f64).sqrt() as f32
}
pub fn percentile(&self, p: f32) -> f32 {
if self.total_values == 0 {
return self.buckets[0];
}
let target = (p.clamp(0.0, 100.0) / 100.0) * self.total_values as f32;
let mut cum = 0.0_f32;
for (i, &count) in self.counts.iter().enumerate() {
let next = cum + count as f32;
if next >= target {
let lo = self.buckets[i];
let hi = self.buckets[i + 1];
let bucket_frac = if count == 0 {
0.0
} else {
(target - cum) / count as f32
};
return lo + bucket_frac * (hi - lo);
}
cum = next;
}
*self.buckets.last().unwrap_or(&self.buckets[0])
}
pub fn to_ascii_bars(&self) -> String {
if self.counts.is_empty() {
return "(empty histogram)\n".to_string();
}
let max_count = self.counts.iter().copied().max().unwrap_or(0);
let bar_width = 40_usize;
let mut out = String::new();
let n_buckets = self.counts.len();
for i in 0..n_buckets {
let lo = self.buckets[i];
let hi = self.buckets[i + 1];
let cnt = self.counts[i];
let bar_len = if max_count == 0 {
0
} else {
(cnt * bar_width) / max_count
};
let bar = "█".repeat(bar_len);
out.push_str(&format!(
"[{:>8.3e},{:>8.3e}) {:>6} |{}\n",
lo, hi, cnt, bar
));
}
out
}
}
#[derive(Debug, Clone)]
pub struct ActivationLayerStats {
pub layer_name: String,
pub mean: f32,
pub std: f32,
pub min: f32,
pub max: f32,
pub zero_fraction: f32,
pub saturation_fraction: f32,
}
impl ActivationLayerStats {
pub fn compute(layer_name: &str, activations: &[f32]) -> Self {
if activations.is_empty() {
return Self {
layer_name: layer_name.to_string(),
mean: 0.0,
std: 0.0,
min: 0.0,
max: 0.0,
zero_fraction: 1.0,
saturation_fraction: 0.0,
};
}
let n = activations.len() as f64;
let mut running_mean = 0.0_f64;
let mut running_m2 = 0.0_f64;
let mut min_val = f32::INFINITY;
let mut max_val = f32::NEG_INFINITY;
let mut zero_count = 0usize;
for (idx, &v) in activations.iter().enumerate() {
if v < min_val {
min_val = v;
}
if v > max_val {
max_val = v;
}
if v == 0.0 {
zero_count += 1;
}
let delta = v as f64 - running_mean;
running_mean += delta / (idx + 1) as f64;
let delta2 = v as f64 - running_mean;
running_m2 += delta * delta2;
}
let std_val = if activations.len() > 1 {
(running_m2 / (n - 1.0)).sqrt() as f32
} else {
0.0
};
let max_abs = min_val.abs().max(max_val.abs());
let sat_threshold = 0.99 * max_abs;
let sat_count = activations.iter().filter(|&&v| v.abs() > sat_threshold).count();
Self {
layer_name: layer_name.to_string(),
mean: running_mean as f32,
std: std_val,
min: min_val,
max: max_val,
zero_fraction: zero_count as f32 / activations.len() as f32,
saturation_fraction: sat_count as f32 / activations.len() as f32,
}
}
pub fn is_dead(&self, threshold: f32) -> bool {
self.zero_fraction > threshold.clamp(0.0, 1.0)
}
pub fn to_summary_line(&self) -> String {
format!(
"[{}] mean={:.4e} std={:.4e} min={:.4e} max={:.4e} zero={:.1}% sat={:.1}%",
self.layer_name,
self.mean,
self.std,
self.min,
self.max,
self.zero_fraction * 100.0,
self.saturation_fraction * 100.0,
)
}
}
#[derive(Debug, Clone)]
pub struct AttentionVisualizer {
pub head_idx: usize,
pub layer_idx: usize,
}
impl AttentionVisualizer {
pub fn new(head_idx: usize, layer_idx: usize) -> Self {
Self { head_idx, layer_idx }
}
pub fn render_ascii(attn_matrix: &[Vec<f32>]) -> Vec<String> {
if attn_matrix.is_empty() {
return Vec::new();
}
let blocks = [' ', '░', '▒', '▓', '█'];
let n_blocks = blocks.len() as f32;
let (global_min, global_max) = attn_matrix
.iter()
.flat_map(|row| row.iter().copied())
.fold((f32::INFINITY, f32::NEG_INFINITY), |(mn, mx), v| {
(mn.min(v), mx.max(v))
});
let range = (global_max - global_min).max(1e-9_f32);
let mut lines = Vec::with_capacity(attn_matrix.len() + 1);
lines.push(format!(
"Attn heatmap min={:.3} max={:.3}",
global_min, global_max
));
for row in attn_matrix {
let encoded: String = row
.iter()
.map(|&v| {
let idx = ((v - global_min) / range * (n_blocks - 1.0))
.round()
.clamp(0.0, n_blocks - 1.0) as usize;
blocks[idx]
})
.collect();
lines.push(encoded);
}
lines
}
pub fn entropy(attn_row: &[f32]) -> f32 {
if attn_row.is_empty() {
return 0.0;
}
let total: f32 = attn_row.iter().sum();
let scale = if total > 0.0 { total } else { 1.0 };
-attn_row
.iter()
.filter_map(|&p| {
let pn = p / scale;
if pn > 0.0 { Some(pn * pn.ln()) } else { None }
})
.sum::<f32>()
}
pub fn all_entropies(attn_matrix: &[Vec<f32>]) -> Vec<f32> {
attn_matrix.iter().map(|row| Self::entropy(row)).collect()
}
pub fn is_uniform(attn_row: &[f32], eps: f32) -> bool {
if attn_row.is_empty() {
return true;
}
let max_v = attn_row.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let min_v = attn_row.iter().cloned().fold(f32::INFINITY, f32::min);
(max_v - min_v) < eps
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_plotter_returns_empty_for_single_point() {
let plotter = AsciiLossPlotter::new(60, 15);
let data = vec![(0u64, 1.5_f32)];
assert!(plotter.render(&data).is_empty());
}
#[test]
fn test_plotter_returns_empty_for_zero_points() {
let plotter = AsciiLossPlotter::new(60, 15);
assert!(plotter.render(&[]).is_empty());
}
#[test]
fn test_plotter_two_point_render() {
let plotter = AsciiLossPlotter::new(40, 10);
let data = vec![(0u64, 2.0_f32), (10u64, 1.0_f32)];
let lines = plotter.render(&data);
assert!(!lines.is_empty(), "should produce output for 2 points");
assert!(lines.iter().any(|l| l.contains('+')));
}
#[test]
fn test_plotter_many_points_produce_canvas_height() {
let height = 12;
let plotter = AsciiLossPlotter::new(60, height);
let data: Vec<(u64, f32)> = (0..30).map(|i| (i as u64, 3.0 - i as f32 * 0.1)).collect();
let lines = plotter.render(&data);
assert!(lines.len() >= height + 2);
}
#[test]
fn test_plotter_with_title() {
let plotter = AsciiLossPlotter::new(50, 10).with_title("Training Loss");
let data: Vec<(u64, f32)> = (0..5).map(|i| (i as u64, 1.0)).collect();
let lines = plotter.render(&data);
assert!(lines[0].contains("Training Loss"));
}
#[test]
fn test_plotter_render_two_returns_empty_if_not_enough_points() {
let plotter = AsciiLossPlotter::new(40, 10);
let one_point = vec![(0u64, 1.0_f32)];
let many = vec![(0u64, 1.0_f32), (1u64, 0.9_f32)];
assert!(plotter.render_two(&one_point, &many).is_empty());
assert!(plotter.render_two(&many, &one_point).is_empty());
}
#[test]
fn test_plotter_render_two_produces_legend_markers() {
let plotter = AsciiLossPlotter::new(50, 10);
let train: Vec<(u64, f32)> = (0..5).map(|i| (i as u64, 2.0 - i as f32 * 0.3)).collect();
let val: Vec<(u64, f32)> = (0..5).map(|i| (i as u64, 2.1 - i as f32 * 0.25)).collect();
let lines = plotter.render_two(&train, &val);
let legend = lines.last().unwrap();
assert!(legend.contains("train") || legend.contains('*'));
assert!(legend.contains("val") || legend.contains('+'));
}
#[test]
fn test_histogram_bucket_assignment() {
let mut h = GradientHistogram::new(4, 0.0, 4.0);
h.add_value(0.5); h.add_value(1.5); h.add_value(2.5); h.add_value(3.5); assert_eq!(h.counts, vec![1, 1, 1, 1]);
}
#[test]
fn test_histogram_clamping_below_min() {
let mut h = GradientHistogram::new(4, 0.0, 4.0);
h.add_value(-100.0); assert_eq!(h.counts[0], 1);
}
#[test]
fn test_histogram_clamping_above_max() {
let mut h = GradientHistogram::new(4, 0.0, 4.0);
h.add_value(999.0); assert_eq!(h.counts[3], 1);
}
#[test]
fn test_histogram_mean() {
let mut h = GradientHistogram::new(10, 0.0, 10.0);
h.add_values(&[1.0, 2.0, 3.0]);
let mean = h.mean();
assert!((mean - 2.0).abs() < 1e-4, "expected mean≈2.0, got {}", mean);
}
#[test]
fn test_histogram_std_dev() {
let mut h = GradientHistogram::new(10, -10.0, 10.0);
h.add_values(&[0.0, 0.0, 0.0, 0.0]);
assert!(h.std_dev() < 1e-4);
}
#[test]
fn test_histogram_std_dev_known_value() {
let mut h = GradientHistogram::new(20, 0.0, 10.0);
h.add_values(&[2.0, 8.0]);
let std = h.std_dev();
assert!((std - (18.0_f32).sqrt()).abs() < 0.1, "std={}", std);
}
#[test]
fn test_histogram_percentile_median() {
let mut h = GradientHistogram::new(100, 0.0, 100.0);
let vals: Vec<f32> = (0..100).map(|i| i as f32).collect();
h.add_values(&vals);
let p50 = h.percentile(50.0);
assert!((p50 - 50.0).abs() < 2.0, "p50={}", p50);
}
#[test]
fn test_histogram_percentile_0_and_100() {
let mut h = GradientHistogram::new(10, 0.0, 10.0);
h.add_values(&[1.0, 2.0, 3.0, 4.0, 5.0]);
let p0 = h.percentile(0.0);
let p100 = h.percentile(100.0);
assert!(p0 <= p100, "p0={}, p100={}", p0, p100);
}
#[test]
fn test_histogram_ascii_bars_non_empty() {
let mut h = GradientHistogram::new(5, 0.0, 5.0);
h.add_values(&[0.5, 1.5, 2.5, 3.5, 4.5]);
let bars = h.to_ascii_bars();
assert!(!bars.is_empty());
assert!(bars.lines().all(|l| l.contains('|') || l.is_empty()));
}
#[test]
fn test_histogram_empty_to_ascii_bars() {
let h = GradientHistogram::new(4, 0.0, 4.0);
let bars = h.to_ascii_bars();
assert!(!bars.is_empty());
}
#[test]
fn test_activation_stats_empty() {
let stats = ActivationLayerStats::compute("empty_layer", &[]);
assert_eq!(stats.zero_fraction, 1.0);
assert_eq!(stats.mean, 0.0);
}
#[test]
fn test_activation_stats_mean() {
let vals = vec![1.0_f32, 2.0, 3.0, 4.0, 5.0];
let stats = ActivationLayerStats::compute("fc1", &vals);
assert!((stats.mean - 3.0).abs() < 1e-4, "mean={}", stats.mean);
}
#[test]
fn test_activation_stats_std() {
let vals = vec![5.0_f32; 10];
let stats = ActivationLayerStats::compute("relu", &vals);
assert!(stats.std < 1e-4, "std={}", stats.std);
}
#[test]
fn test_activation_stats_zeros() {
let vals = vec![0.0_f32, 0.0, 1.0, 2.0];
let stats = ActivationLayerStats::compute("dead_relu", &vals);
assert!((stats.zero_fraction - 0.5).abs() < 1e-4);
}
#[test]
fn test_activation_stats_is_dead() {
let vals: Vec<f32> = (0..10).map(|i| if i < 9 { 0.0 } else { 1.0 }).collect();
let stats = ActivationLayerStats::compute("mostly_dead", &vals);
assert!(stats.is_dead(0.8));
assert!(!stats.is_dead(0.95));
}
#[test]
fn test_activation_stats_summary_line_contains_name() {
let vals = vec![0.1_f32, 0.2, 0.3];
let stats = ActivationLayerStats::compute("encoder_0", &vals);
let summary = stats.to_summary_line();
assert!(summary.contains("encoder_0"));
}
#[test]
fn test_activation_stats_saturation_fraction() {
let vals = vec![1.0_f32; 5];
let stats = ActivationLayerStats::compute("saturated", &vals);
assert!(
stats.saturation_fraction >= 0.99,
"saturation={}",
stats.saturation_fraction
);
}
#[test]
fn test_attention_entropy_uniform() {
let row = vec![0.25_f32; 4];
let h = AttentionVisualizer::entropy(&row);
assert!((h - (4.0_f32).ln()).abs() < 0.01, "entropy={}", h);
}
#[test]
fn test_attention_entropy_concentrated() {
let row = vec![0.0_f32, 0.0, 1.0, 0.0];
let h = AttentionVisualizer::entropy(&row);
assert!(h < 1e-4, "entropy for concentrated distribution should be ~0, got {}", h);
}
#[test]
fn test_attention_entropy_empty_row() {
assert_eq!(AttentionVisualizer::entropy(&[]), 0.0);
}
#[test]
fn test_attention_all_entropies_per_row() {
let matrix = vec![
vec![0.25_f32; 4], vec![0.0, 0.0, 1.0, 0.0], ];
let entropies = AttentionVisualizer::all_entropies(&matrix);
assert_eq!(entropies.len(), 2);
assert!(entropies[0] > entropies[1], "uniform > concentrated");
}
#[test]
fn test_attention_is_uniform_true() {
let row = vec![0.25_f32; 4];
assert!(AttentionVisualizer::is_uniform(&row, 0.01));
}
#[test]
fn test_attention_is_uniform_false() {
let row = vec![0.9_f32, 0.05, 0.03, 0.02];
assert!(!AttentionVisualizer::is_uniform(&row, 0.01));
}
#[test]
fn test_attention_render_ascii_empty() {
let result = AttentionVisualizer::render_ascii(&[]);
assert!(result.is_empty());
}
#[test]
fn test_attention_render_ascii_shape() {
let matrix = vec![vec![0.1_f32; 5]; 4]; let lines = AttentionVisualizer::render_ascii(&matrix);
assert_eq!(lines.len(), 5);
for line in &lines[1..] {
let char_count: usize = line.chars().count();
assert_eq!(char_count, 5, "expected 5 chars, got {}", char_count);
}
}
#[test]
fn test_attention_visualizer_new() {
let av = AttentionVisualizer::new(3, 1);
assert_eq!(av.head_idx, 3);
assert_eq!(av.layer_idx, 1);
}
}