use std::collections::VecDeque;
#[derive(Debug, Clone)]
pub struct TuiConfig {
pub refresh_rate_ms: u64,
pub show_throughput_sparkline: bool,
pub show_latency_sparkline: bool,
pub show_gpu_memory: bool,
pub title: String,
pub m4_target_tok_per_sec: f64,
pub width: usize,
}
impl Default for TuiConfig {
fn default() -> Self {
Self {
refresh_rate_ms: 100,
show_throughput_sparkline: true,
show_latency_sparkline: true,
show_gpu_memory: true,
title: "realizar Inference Monitor".to_string(),
m4_target_tok_per_sec: 192.0,
width: 65,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct InferenceMetrics {
pub throughput_tok_per_sec: f64,
pub latency_ms: f64,
pub latency_p95_ms: f64,
pub gpu_memory_bytes: u64,
pub gpu_memory_total_bytes: u64,
pub batch_size: usize,
pub queue_size: usize,
pub total_tokens: u64,
pub total_requests: u64,
pub running: bool,
pub using_gpu: bool,
}
impl InferenceMetrics {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn achieves_m4_parity(&self) -> bool {
self.throughput_tok_per_sec >= 192.0
}
#[must_use]
pub fn gap_to_m4(&self) -> f64 {
if self.throughput_tok_per_sec > 0.0 {
192.0 / self.throughput_tok_per_sec
} else {
f64::INFINITY
}
}
#[must_use]
pub fn format_gpu_memory(&self) -> String {
let used_gb = self.gpu_memory_bytes as f64 / 1e9;
let total_gb = self.gpu_memory_total_bytes as f64 / 1e9;
format!("{:.1} GB / {:.1} GB", used_gb, total_gb)
}
}
#[derive(Debug, Clone)]
pub struct InferenceTui {
config: TuiConfig,
metrics: InferenceMetrics,
throughput_history: VecDeque<f64>,
latency_history: VecDeque<f64>,
max_history: usize,
}
impl InferenceTui {
#[must_use]
pub fn new(config: TuiConfig) -> Self {
Self {
config,
metrics: InferenceMetrics::default(),
throughput_history: VecDeque::new(),
latency_history: VecDeque::new(),
max_history: 40,
}
}
pub fn update(&mut self, metrics: &InferenceMetrics) {
self.metrics = metrics.clone();
self.throughput_history
.push_back(metrics.throughput_tok_per_sec);
self.latency_history.push_back(metrics.latency_ms);
while self.throughput_history.len() > self.max_history {
self.throughput_history.pop_front();
}
while self.latency_history.len() > self.max_history {
self.latency_history.pop_front();
}
}
fn sparkline(values: &VecDeque<f64>, width: usize) -> String {
const BLOCKS: [char; 8] = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];
if values.is_empty() {
return " ".repeat(width);
}
let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
let range = (max - min).max(0.001);
let mut result: String = values
.iter()
.take(width)
.map(|&v| {
let normalized = (v - min) / range;
let level = (normalized * 7.0).round().clamp(0.0, 7.0) as usize;
BLOCKS[level]
})
.collect();
while result.chars().count() < width {
result.push(' ');
}
result
}
#[must_use]
pub fn render_to_string(&self) -> String {
let w = self.config.width;
let inner_w = w - 2;
let mut lines = Vec::new();
lines.push(format!("╭{}╮", "─".repeat(w - 2)));
let title = &self.config.title;
let padding = (inner_w - title.len()) / 2;
lines.push(format!(
"│{}{}{}│",
" ".repeat(padding),
title,
" ".repeat(inner_w - padding - title.len())
));
lines.push(format!("├{}┤", "─".repeat(w - 2)));
let status_icon = if self.metrics.achieves_m4_parity() {
"✓"
} else {
"○"
};
let throughput_line = format!(
" Throughput: {:.1} tok/s {} Target: {:.0} tok/s (M4)",
self.metrics.throughput_tok_per_sec, status_icon, self.config.m4_target_tok_per_sec
);
lines.push(Self::pad_line(&throughput_line, inner_w));
let latency_line = format!(
" Latency: {:.1} ms/tok P95: {:.1} ms",
self.metrics.latency_ms, self.metrics.latency_p95_ms
);
lines.push(Self::pad_line(&latency_line, inner_w));
if self.config.show_gpu_memory {
let gpu_line = format!(" GPU Memory: {}", self.metrics.format_gpu_memory());
lines.push(Self::pad_line(&gpu_line, inner_w));
}
let batch_line = format!(
" Batch Size: {} Queue: {} pending",
self.metrics.batch_size, self.metrics.queue_size
);
lines.push(Self::pad_line(&batch_line, inner_w));
lines.push(format!("├{}┤", "─".repeat(w - 2)));
if self.config.show_throughput_sparkline {
let sparkline = Self::sparkline(&self.throughput_history, 40);
let spark_line = format!(" Throughput: {}", sparkline);
lines.push(Self::pad_line(&spark_line, inner_w));
}
if self.config.show_latency_sparkline {
let sparkline = Self::sparkline(&self.latency_history, 40);
let spark_line = format!(" Latency: {}", sparkline);
lines.push(Self::pad_line(&spark_line, inner_w));
}
lines.push(format!("├{}┤", "─".repeat(w - 2)));
let status = if self.metrics.running {
"● Running"
} else {
"○ Stopped"
};
let gpu_status = if self.metrics.using_gpu { "GPU" } else { "CPU" };
let status_line = format!(
" Status: {} [{:>3}] Tokens: {:>6} Requests: {:>4}",
status, gpu_status, self.metrics.total_tokens, self.metrics.total_requests
);
lines.push(Self::pad_line(&status_line, inner_w));
lines.push(format!("╰{}╯", "─".repeat(w - 2)));
lines.join("\n")
}
fn pad_line(content: &str, width: usize) -> String {
let content_len = content.chars().count();
if content_len >= width {
format!("│{}│", &content[..width])
} else {
format!("│{}{}│", content, " ".repeat(width - content_len))
}
}
#[must_use]
pub fn metrics(&self) -> &InferenceMetrics {
&self.metrics
}
#[must_use]
pub fn throughput_history(&self) -> &VecDeque<f64> {
&self.throughput_history
}
#[must_use]
pub fn latency_history(&self) -> &VecDeque<f64> {
&self.latency_history
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parity_090a_tui_config_defaults() {
println!("PARITY-090a: TuiConfig Default Values");
let config = TuiConfig::default();
println!(" refresh_rate_ms: {}", config.refresh_rate_ms);
println!(" m4_target_tok_per_sec: {}", config.m4_target_tok_per_sec);
println!(" width: {}", config.width);
assert_eq!(config.refresh_rate_ms, 100);
assert_eq!(config.m4_target_tok_per_sec, 192.0);
assert!(config.show_throughput_sparkline);
assert!(config.show_latency_sparkline);
}
#[test]
fn test_parity_090b_inference_metrics() {
println!("PARITY-090b: InferenceMetrics");
let metrics = InferenceMetrics {
throughput_tok_per_sec: 64.0,
latency_ms: 15.6,
latency_p95_ms: 23.4,
gpu_memory_bytes: 4_200_000_000,
gpu_memory_total_bytes: 24_000_000_000,
batch_size: 4,
queue_size: 12,
total_tokens: 1234,
total_requests: 42,
running: true,
using_gpu: true,
};
println!(" throughput: {:.1} tok/s", metrics.throughput_tok_per_sec);
println!(" achieves_m4: {}", metrics.achieves_m4_parity());
println!(" gap_to_m4: {:.2}x", metrics.gap_to_m4());
println!(" gpu_memory: {}", metrics.format_gpu_memory());
assert!(!metrics.achieves_m4_parity());
assert!((metrics.gap_to_m4() - 3.0).abs() < 0.1);
assert!(metrics.format_gpu_memory().contains("4.2 GB"));
}
#[test]
fn test_parity_090c_m4_parity_detection() {
println!("PARITY-090c: M4 Parity Detection");
let test_cases = [
(64.0, false, "Baseline - not M4"),
(150.0, false, "Batch threshold - not M4"),
(192.0, true, "Exactly M4"),
(256.0, true, "Above M4"),
];
for (throughput, expected, description) in test_cases {
let metrics = InferenceMetrics {
throughput_tok_per_sec: throughput,
..Default::default()
};
let achieves = metrics.achieves_m4_parity();
println!(" {}: {} tok/s → M4={}", description, throughput, achieves);
assert_eq!(achieves, expected, "{}", description);
}
}
#[test]
fn test_parity_091a_tui_creation_update() {
println!("PARITY-091a: TUI Creation and Update");
let config = TuiConfig::default();
let mut tui = InferenceTui::new(config);
let metrics = InferenceMetrics {
throughput_tok_per_sec: 64.0,
latency_ms: 15.6,
running: true,
..Default::default()
};
tui.update(&metrics);
assert_eq!(tui.metrics().throughput_tok_per_sec, 64.0);
assert_eq!(tui.throughput_history().len(), 1);
}
#[test]
fn test_parity_091b_sparkline_generation() {
println!("PARITY-091b: Sparkline Generation");
let mut history = VecDeque::new();
for i in 0..20 {
history.push_back((i as f64) * 10.0);
}
let sparkline = InferenceTui::sparkline(&history, 20);
println!(" Sparkline: {}", sparkline);
assert_eq!(sparkline.chars().count(), 20);
assert!(sparkline.contains('▁')); assert!(sparkline.contains('█')); }
#[test]
fn test_parity_091c_tui_render_structure() {
println!("PARITY-091c: TUI Render Output Structure");
let config = TuiConfig::default();
let mut tui = InferenceTui::new(config);
for i in 0..10 {
let metrics = InferenceMetrics {
throughput_tok_per_sec: 50.0 + (i as f64) * 5.0,
latency_ms: 20.0 - (i as f64),
batch_size: 4,
queue_size: 12,
total_tokens: 1234,
total_requests: 42,
running: true,
using_gpu: true,
..Default::default()
};
tui.update(&metrics);
}
let output = tui.render_to_string();
println!("{}", output);
assert!(output.contains("╭"), "Should have top border");
assert!(output.contains("╰"), "Should have bottom border");
assert!(
output.contains("realizar Inference Monitor"),
"Should have title"
);
assert!(output.contains("Throughput:"), "Should show throughput");
assert!(output.contains("Latency:"), "Should show latency");
assert!(output.contains("tok/s"), "Should show tok/s unit");
assert!(output.contains("● Running"), "Should show running status");
}
#[test]
fn test_parity_091d_visual_regression_baseline() {
println!("PARITY-091d: Visual Regression Baseline");
let config = TuiConfig {
width: 65,
..Default::default()
};
let mut tui = InferenceTui::new(config);
let metrics = InferenceMetrics {
throughput_tok_per_sec: 64.2,
latency_ms: 15.6,
latency_p95_ms: 23.4,
gpu_memory_bytes: 4_200_000_000,
gpu_memory_total_bytes: 24_000_000_000,
batch_size: 4,
queue_size: 12,
total_tokens: 1234,
total_requests: 42,
running: true,
using_gpu: true,
};
tui.update(&metrics);
let output = tui.render_to_string();
println!("=== GOLDEN BASELINE ===");
println!("{}", output);
println!("=== END BASELINE ===");
let lines: Vec<&str> = output.lines().collect();
assert!(lines.len() >= 10, "Should have at least 10 lines");
assert!(lines[0].starts_with('╭'));
assert!(lines[0].ends_with('╮'));
assert!(lines.last().unwrap().starts_with('╰'));
assert!(lines.last().unwrap().ends_with('╯'));
assert!(
output.contains("64.2 tok/s"),
"Should show throughput value"
);
assert!(output.contains("15.6 ms/tok"), "Should show latency value");
assert!(output.contains("1234"), "Should show token count");
}
#[test]
fn test_parity_091e_history_accumulation() {
println!("PARITY-091e: History Accumulation");
let config = TuiConfig::default();
let mut tui = InferenceTui::new(config);
for i in 0..50 {
let metrics = InferenceMetrics {
throughput_tok_per_sec: (i as f64) * 2.0,
latency_ms: 100.0 - (i as f64),
..Default::default()
};
tui.update(&metrics);
}
assert_eq!(tui.throughput_history().len(), 40);
assert_eq!(tui.latency_history().len(), 40);
assert!((tui.throughput_history().back().unwrap() - 98.0).abs() < 0.1);
}
#[test]
fn test_parity_091f_empty_sparkline() {
println!("PARITY-091f: Empty Sparkline Handling");
let empty: VecDeque<f64> = VecDeque::new();
let sparkline = InferenceTui::sparkline(&empty, 20);
println!(" Empty sparkline: '{}'", sparkline);
assert_eq!(sparkline.len(), 20);
assert!(sparkline.chars().all(|c| c == ' '));
}
}