zero_tui/widgets/
calibration.rs

1//! Calibration bar — a single-row gauge showing how closely the
2//! engine's stated confidence tracks realized outcome.
3//!
4//! The model: over the last N graded decisions, average the
5//! engine's stated confidence (`predicted`) and the fraction of
6//! those decisions whose outcome matched the engine's verdict
7//! (`observed`). A well-calibrated engine has
8//! `|predicted - observed| → 0`.
9//!
10//! # Layout (width ≥ 40)
11//!
12//! ```text
13//!  calib  [■■■■■■■■■■□□□□□□□□□□]  pred 72% / obs 68%  n=134
14//! ```
15//!
16//! - The bar is always the fixed-width (`BAR_CELLS`) cell gauge;
17//!   shading shows the **observed** rate so the eye anchors on
18//!   what actually happened.
19//! - Text to the right shows both numbers explicitly. We never
20//!   show only one — the whole point of the widget is the gap.
21//! - `n=` is the decision count contributing to this figure; the
22//!   operator uses it to gauge statistical weight.
23//! - Color tracks the **gap**, not either side:
24//!   - `|pred - obs| ≤ 5pp` → primary (well-calibrated).
25//!   - `|pred - obs| ≤ 15pp` → caution.
26//!   - `> 15pp` → alert.
27//!
28//! # Honest "insufficient data" state
29//!
30//! Below `MIN_SAMPLES` the widget renders the low-contrast
31//! notice:
32//!
33//! ```text
34//!  calib  (insufficient data — need ≥30 graded decisions, have 12)
35//! ```
36//!
37//! No gauge is drawn. Showing a bar with a wobbling two-sample
38//! average would be confidently wrong, which is exactly the
39//! failure mode the widget is built to prevent.
40//!
41//! # Width guard
42//!
43//! When `area.width < 30` only the label and the count render.
44//! Drawing a sub-10-cell gauge gives no usable information and
45//! invites misreading.
46
47use ratatui::buffer::Buffer;
48use ratatui::layout::Rect;
49use ratatui::style::{Modifier, Style};
50use ratatui::text::{Line, Span};
51use ratatui::widgets::Widget;
52
53use crate::theme::Theme;
54
55/// Minimum graded decisions before we show a bar. Below this,
56/// the honest "insufficient data" state renders instead.
57pub const MIN_SAMPLES: usize = 30;
58
59/// Fixed cell-width of the bar portion. Chosen so each cell is
60/// exactly 5 percentage points.
61pub const BAR_CELLS: usize = 20;
62
63/// Observed/predicted calibration sample, as surfaced by a
64/// future `/calibration` or `/evaluate --report` engine call.
65/// We keep the shape minimal here so the widget can be unit
66/// tested without pulling in the full engine client model.
67#[derive(Debug, Clone, Copy, Default)]
68pub struct CalibrationSample {
69    pub predicted: f64,
70    pub observed: f64,
71    pub n_samples: usize,
72}
73
74#[derive(Debug)]
75pub struct CalibrationBar {
76    pub sample: Option<CalibrationSample>,
77    pub theme: Theme,
78}
79
80impl Widget for CalibrationBar {
81    fn render(self, area: Rect, buf: &mut Buffer) {
82        if area.height == 0 || area.width == 0 {
83            return;
84        }
85        let row = Rect {
86            x: area.x,
87            y: area.y,
88            width: area.width,
89            height: 1,
90        };
91
92        let Some(sample) = self.sample else {
93            Line::from(vec![
94                Span::styled(" calib  ", Style::default().fg(self.theme.primary)),
95                Span::styled(
96                    "(no calibration data yet — engine has not reported a sample)",
97                    Style::default().fg(self.theme.metadata),
98                ),
99            ])
100            .render(row, buf);
101            return;
102        };
103
104        if sample.n_samples < MIN_SAMPLES {
105            Line::from(vec![
106                Span::styled(" calib  ", Style::default().fg(self.theme.primary)),
107                Span::styled(
108                    format!(
109                        "(insufficient data — need ≥{MIN_SAMPLES} graded decisions, have {})",
110                        sample.n_samples
111                    ),
112                    Style::default().fg(self.theme.metadata),
113                ),
114            ])
115            .render(row, buf);
116            return;
117        }
118
119        let pred = clamp_unit(sample.predicted);
120        let obs = clamp_unit(sample.observed);
121        let gap = (pred - obs).abs();
122
123        let gap_style = if gap <= 0.05 {
124            Style::default().fg(self.theme.primary)
125        } else if gap <= 0.15 {
126            Style::default().fg(self.theme.caution)
127        } else {
128            Style::default()
129                .fg(self.theme.alert)
130                .add_modifier(Modifier::BOLD)
131        };
132
133        let mut spans = vec![Span::styled(
134            " calib  ",
135            Style::default().fg(self.theme.primary),
136        )];
137
138        if usize::from(area.width) >= 30 {
139            let bar = render_bar(obs, pred);
140            spans.push(Span::styled("[", Style::default().fg(self.theme.metadata)));
141            spans.push(Span::styled(bar, gap_style));
142            spans.push(Span::styled("] ", Style::default().fg(self.theme.metadata)));
143        }
144
145        spans.push(Span::styled(
146            format!(" pred {}% / obs {}%", pct(pred), pct(obs)),
147            gap_style,
148        ));
149        spans.push(Span::styled(
150            format!("  n={}", sample.n_samples),
151            Style::default().fg(self.theme.metadata),
152        ));
153
154        Line::from(spans).render(row, buf);
155    }
156}
157
158fn clamp_unit(v: f64) -> f64 {
159    v.clamp(0.0, 1.0)
160}
161
162fn pct(v: f64) -> i32 {
163    // Bounded in `[0, 100]` after `clamp_unit` — the cast cannot
164    // truncate or wrap.
165    #[allow(clippy::cast_possible_truncation)]
166    let p = (clamp_unit(v) * 100.0).round() as i32;
167    p
168}
169
170/// Map a fractional `[0.0, 1.0]` rate to a cell index in the
171/// fixed `BAR_CELLS`-wide gauge. The cast is bounded (`BAR_CELLS`
172/// fits well within the `f64` mantissa and the rounded product
173/// is in `[0, BAR_CELLS]`), but we still clamp with `.min()` as
174/// a last-line defense against a denormal-valued input.
175#[allow(
176    clippy::cast_possible_truncation,
177    clippy::cast_sign_loss,
178    clippy::cast_precision_loss
179)]
180fn cells_for(rate: f64) -> usize {
181    let scaled = (rate * BAR_CELLS as f64).round();
182    let as_usize = if scaled.is_finite() && scaled >= 0.0 {
183        scaled as usize
184    } else {
185        0
186    };
187    as_usize.min(BAR_CELLS)
188}
189
190/// Render the bar: fill up to `observed`, then mark `predicted`
191/// with a distinguishable glyph if it falls in a different cell.
192/// When the two land in the same cell, the observed glyph wins
193/// (we don't double-draw).
194fn render_bar(observed: f64, predicted: f64) -> String {
195    let obs_cells = cells_for(observed);
196    let pred_cells = cells_for(predicted);
197    let mut s = String::with_capacity(BAR_CELLS);
198    for i in 0..BAR_CELLS {
199        let filled = i < obs_cells;
200        let is_pred_marker = pred_cells > 0 && i + 1 == pred_cells && pred_cells != obs_cells;
201        s.push(match (filled, is_pred_marker) {
202            // Observed fill covers the predicted cell — drop the
203            // marker so the bar reads cleanly.
204            (true, _) => '■',
205            (false, true) => '│',
206            (false, false) => '□',
207        });
208    }
209    s
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use ratatui::Terminal;
216    use ratatui::backend::TestBackend;
217
218    fn render(sample: Option<CalibrationSample>, width: u16) -> String {
219        let backend = TestBackend::new(width, 1);
220        let mut term = Terminal::new(backend).expect("term");
221        term.draw(|f| {
222            let w = CalibrationBar {
223                sample,
224                theme: Theme::default(),
225            };
226            f.render_widget(w, f.area());
227        })
228        .expect("draw");
229        let buf = term.backend().buffer().clone();
230        (0..buf.area.width)
231            .map(|x| buf[(x, 0)].symbol().to_string())
232            .collect::<String>()
233            .trim_end()
234            .to_string()
235    }
236
237    #[test]
238    fn none_sample_renders_no_data_state() {
239        let line = render(None, 80);
240        assert!(line.contains("calib"));
241        assert!(line.contains("no calibration data"));
242        assert!(!line.contains("pred"), "must not show fake numbers");
243        assert!(!line.contains('■'), "must not draw fake bar");
244    }
245
246    #[test]
247    fn below_min_samples_renders_insufficient_data_state() {
248        let sample = CalibrationSample {
249            predicted: 0.7,
250            observed: 0.65,
251            n_samples: 12,
252        };
253        let line = render(Some(sample), 80);
254        assert!(line.contains("insufficient data"));
255        assert!(line.contains("have 12"));
256        assert!(!line.contains('■'), "bar must not render below MIN_SAMPLES");
257    }
258
259    #[test]
260    fn above_min_samples_renders_bar_and_numbers() {
261        let sample = CalibrationSample {
262            predicted: 0.72,
263            observed: 0.68,
264            n_samples: 134,
265        };
266        let line = render(Some(sample), 80);
267        assert!(line.contains('■'), "expected filled bar cells: {line:?}");
268        assert!(line.contains("pred 72%"), "pred missing: {line:?}");
269        assert!(line.contains("obs 68%"), "obs missing: {line:?}");
270        assert!(line.contains("n=134"), "n missing: {line:?}");
271    }
272
273    #[test]
274    fn narrow_width_drops_bar_but_keeps_numbers() {
275        let sample = CalibrationSample {
276            predicted: 0.5,
277            observed: 0.5,
278            n_samples: 100,
279        };
280        let line = render(Some(sample), 29);
281        assert!(
282            !line.contains('■'),
283            "bar should not render at width<30: {line:?}"
284        );
285        assert!(line.contains("pred 50%"), "pred still required: {line:?}");
286    }
287
288    #[test]
289    fn pct_clamps_out_of_range() {
290        assert_eq!(pct(-0.1), 0);
291        assert_eq!(pct(1.5), 100);
292        assert_eq!(pct(0.5), 50);
293    }
294
295    #[test]
296    fn bar_observed_cells_match_rounded_fraction() {
297        let s = render_bar(0.5, 0.5);
298        let filled = s.chars().filter(|c| *c == '■').count();
299        assert_eq!(filled, 10, "50% observed should fill 10/{BAR_CELLS} cells");
300    }
301
302    #[test]
303    fn bar_predicted_marker_visible_when_gap_nonzero() {
304        let s = render_bar(0.5, 0.8);
305        // Observed fills 10 cells, predicted marker lives in cell 16.
306        let filled = s.chars().filter(|c| *c == '■').count();
307        let markers = s.chars().filter(|c| *c == '│').count();
308        assert_eq!(filled, 10);
309        assert_eq!(markers, 1, "predicted marker must render when > observed");
310    }
311}
zero_tui/widgets/calibration.rs

zero_tui/widgets/
calibration.rs