Skip to main content

perfgate_app/
watch.rs

1//! Watch use case — re-run benchmarks on file changes with live terminal output.
2//!
3//! This module provides the core logic for the `perfgate watch` command.
4//! It runs benchmarks, compares against a baseline, and tracks trend history
5//! for display in a live terminal UI.
6//!
7//! The filesystem-watching layer lives in the CLI crate; this module handles
8//! the benchmark execution cycle and result formatting.
9
10use crate::{CheckRequest, CheckUseCase, Clock, format_metric, format_value};
11use perfgate_adapters::{HostProbe, ProcessRunner};
12use perfgate_types::{
13    ConfigFile, HostMismatchPolicy, Metric, MetricStatus, RunReceipt, ToolInfo, VerdictStatus,
14};
15use std::collections::BTreeMap;
16use std::path::PathBuf;
17use std::time::Instant;
18
19// Re-export CheckOutcome for external use
20pub use crate::CheckOutcome;
21
22/// Request for a single watch iteration.
23#[derive(Debug, Clone)]
24pub struct WatchRunRequest {
25    /// The loaded configuration file.
26    pub config: ConfigFile,
27
28    /// Name of the bench to run.
29    pub bench_name: String,
30
31    /// Output directory for artifacts.
32    pub out_dir: PathBuf,
33
34    /// Optional baseline receipt (already loaded).
35    pub baseline: Option<RunReceipt>,
36
37    /// Path to the baseline file.
38    pub baseline_path: Option<PathBuf>,
39
40    /// Tool info for receipts.
41    pub tool: ToolInfo,
42
43    /// Environment variables for the benchmark.
44    pub env: Vec<(String, String)>,
45
46    /// Max bytes captured from stdout/stderr per run.
47    pub output_cap_bytes: usize,
48
49    /// Policy for handling host mismatches.
50    pub host_mismatch_policy: HostMismatchPolicy,
51}
52
53/// Result of a single watch iteration.
54#[derive(Debug, Clone)]
55pub struct WatchRunResult {
56    /// The check outcome from this iteration.
57    pub outcome: CheckOutcome,
58
59    /// How long the benchmark took to run.
60    pub elapsed: std::time::Duration,
61}
62
63/// Trend direction for a metric across watch iterations.
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum TrendDirection {
66    /// Performance is improving (getting faster/smaller).
67    Improving,
68    /// Performance is degrading (getting slower/larger).
69    Degrading,
70    /// Performance is stable (within noise).
71    Stable,
72}
73
74/// A snapshot of metric trends accumulated over watch iterations.
75#[derive(Debug, Clone)]
76pub struct MetricTrend {
77    /// Recent delta percentages for this metric.
78    pub history: Vec<f64>,
79    /// The current trend direction.
80    pub direction: TrendDirection,
81}
82
83/// Accumulated state across watch iterations.
84#[derive(Debug, Clone)]
85pub struct WatchState {
86    /// Number of completed iterations.
87    pub iteration_count: u32,
88    /// Metric trend data.
89    pub trends: BTreeMap<Metric, MetricTrend>,
90    /// Last run result (if any).
91    pub last_result: Option<WatchRunResult>,
92    /// When the last run completed.
93    pub last_run_time: Option<Instant>,
94    /// Total number of passes.
95    pub pass_count: u32,
96    /// Total number of warnings.
97    pub warn_count: u32,
98    /// Total number of failures.
99    pub fail_count: u32,
100}
101
102impl WatchState {
103    /// Create a new empty watch state.
104    pub fn new() -> Self {
105        Self {
106            iteration_count: 0,
107            trends: BTreeMap::new(),
108            last_result: None,
109            last_run_time: None,
110            pass_count: 0,
111            warn_count: 0,
112            fail_count: 0,
113        }
114    }
115
116    /// Update the state with a new watch run result.
117    pub fn update(&mut self, result: WatchRunResult) {
118        self.iteration_count += 1;
119        self.last_run_time = Some(Instant::now());
120
121        // Update verdict counts
122        if let Some(compare) = &result.outcome.compare_receipt {
123            match compare.verdict.status {
124                VerdictStatus::Pass | VerdictStatus::Skip => self.pass_count += 1,
125                VerdictStatus::Warn => self.warn_count += 1,
126                VerdictStatus::Fail => self.fail_count += 1,
127            }
128
129            // Update metric trends
130            for (metric, delta) in &compare.deltas {
131                let trend = self.trends.entry(*metric).or_insert_with(|| MetricTrend {
132                    history: Vec::new(),
133                    direction: TrendDirection::Stable,
134                });
135                trend.history.push(delta.pct);
136                // Keep at most 20 entries in trend history
137                if trend.history.len() > MAX_TREND_HISTORY {
138                    trend.history.remove(0);
139                }
140                trend.direction = compute_trend_direction(&trend.history);
141            }
142        } else {
143            // No baseline means pass
144            self.pass_count += 1;
145        }
146
147        self.last_result = Some(result);
148    }
149}
150
151impl Default for WatchState {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157/// Maximum number of trend history entries to keep per metric.
158const MAX_TREND_HISTORY: usize = 20;
159
160/// Threshold below which a trend is considered stable (1% change).
161const STABLE_THRESHOLD: f64 = 0.01;
162
163/// Compute trend direction from a history of delta percentages.
164///
165/// Uses a simple moving average of the last 3 entries to determine direction.
166pub fn compute_trend_direction(history: &[f64]) -> TrendDirection {
167    if history.len() < 2 {
168        return TrendDirection::Stable;
169    }
170
171    let window = if history.len() >= 3 {
172        &history[history.len() - 3..]
173    } else {
174        history
175    };
176
177    let avg: f64 = window.iter().sum::<f64>() / window.len() as f64;
178
179    if avg.abs() < STABLE_THRESHOLD {
180        TrendDirection::Stable
181    } else if avg > 0.0 {
182        // Positive pct means current > baseline, which is a regression for "lower is better"
183        // metrics. But since pct is already direction-aware from the comparison logic,
184        // positive = worse, negative = better.
185        TrendDirection::Degrading
186    } else {
187        TrendDirection::Improving
188    }
189}
190
191/// Execute a single watch iteration: run the benchmark and compare against baseline.
192pub fn execute_watch_run<R: ProcessRunner + Clone, H: HostProbe + Clone, C: Clock + Clone>(
193    runner: R,
194    host_probe: H,
195    clock: C,
196    request: &WatchRunRequest,
197) -> anyhow::Result<WatchRunResult> {
198    let start = Instant::now();
199
200    let usecase = CheckUseCase::new(runner, host_probe, clock);
201    let outcome = usecase.execute(CheckRequest {
202        config: request.config.clone(),
203        bench_name: request.bench_name.clone(),
204        out_dir: request.out_dir.clone(),
205        baseline: request.baseline.clone(),
206        baseline_path: request.baseline_path.clone(),
207        require_baseline: false,
208        fail_on_warn: false,
209        noise_threshold: None,
210        noise_policy: None,
211        tool: request.tool.clone(),
212        env: request.env.clone(),
213        output_cap_bytes: request.output_cap_bytes,
214        allow_nonzero: false,
215        host_mismatch_policy: request.host_mismatch_policy,
216        significance_alpha: None,
217        significance_min_samples: 8,
218        require_significance: false,
219    })?;
220
221    let elapsed = start.elapsed();
222    Ok(WatchRunResult { outcome, elapsed })
223}
224
225/// Format the trend direction as a terminal-friendly string.
226pub fn trend_arrow(direction: TrendDirection) -> &'static str {
227    match direction {
228        TrendDirection::Improving => ">> improving",
229        TrendDirection::Degrading => ">> degrading",
230        TrendDirection::Stable => ">> stable",
231    }
232}
233
234/// Format the verdict status as a terminal-friendly string.
235pub fn verdict_display(status: VerdictStatus) -> &'static str {
236    match status {
237        VerdictStatus::Pass => "PASS",
238        VerdictStatus::Warn => "WARN",
239        VerdictStatus::Fail => "FAIL",
240        VerdictStatus::Skip => "SKIP",
241    }
242}
243
244/// Render the watch state as a plain-text terminal display.
245///
246/// Returns a vector of lines to print. The caller is responsible for
247/// clearing the screen and printing these lines.
248pub fn render_watch_display(state: &WatchState, bench_name: &str, status: &str) -> Vec<String> {
249    let mut lines = Vec::new();
250
251    lines.push(format!(
252        "perfgate watch | bench: {} | status: {}",
253        bench_name, status
254    ));
255    lines.push(format!(
256        "iterations: {} | pass: {} | warn: {} | fail: {}",
257        state.iteration_count, state.pass_count, state.warn_count, state.fail_count
258    ));
259
260    if let Some(last_run_time) = state.last_run_time {
261        let ago = last_run_time.elapsed();
262        lines.push(format!("last run: {}s ago", ago.as_secs()));
263    }
264
265    lines.push(String::new());
266
267    if let Some(result) = &state.last_result {
268        if let Some(compare) = &result.outcome.compare_receipt {
269            lines.push(format!(
270                "verdict: {} (ran in {:.1}s)",
271                verdict_display(compare.verdict.status),
272                result.elapsed.as_secs_f64()
273            ));
274            lines.push(String::new());
275
276            // Table header
277            lines.push(format!(
278                "{:<20} {:>12} {:>12} {:>10} {:>8}  {}",
279                "Metric", "Baseline", "Current", "Delta", "Status", "Trend"
280            ));
281            lines.push("-".repeat(80));
282
283            for (metric, delta) in &compare.deltas {
284                let status_str = match delta.status {
285                    MetricStatus::Pass => "pass",
286                    MetricStatus::Warn => "WARN",
287                    MetricStatus::Fail => "FAIL",
288                    MetricStatus::Skip => "skip",
289                };
290
291                let trend_str = state
292                    .trends
293                    .get(metric)
294                    .map(|t| trend_arrow(t.direction))
295                    .unwrap_or("");
296
297                lines.push(format!(
298                    "{:<20} {:>12} {:>12} {:>9}% {:>8}  {}",
299                    format_metric(*metric),
300                    format_value(*metric, delta.baseline),
301                    format_value(*metric, delta.current),
302                    format!("{:+.1}", delta.pct * 100.0),
303                    status_str,
304                    trend_str,
305                ));
306            }
307
308            if !compare.verdict.reasons.is_empty() {
309                lines.push(String::new());
310                for reason in &compare.verdict.reasons {
311                    lines.push(format!("  {}", reason));
312                }
313            }
314        } else {
315            lines.push(format!(
316                "no baseline (ran in {:.1}s)",
317                result.elapsed.as_secs_f64()
318            ));
319
320            // Show raw run stats
321            let receipt = &result.outcome.run_receipt;
322            lines.push(String::new());
323            lines.push(format!("{:<20} {:>12}", "Metric", "Value"));
324            lines.push("-".repeat(35));
325
326            lines.push(format!(
327                "{:<20} {:>12}",
328                "wall_ms",
329                format!("{}", receipt.stats.wall_ms.median)
330            ));
331            if let Some(cpu) = &receipt.stats.cpu_ms {
332                lines.push(format!(
333                    "{:<20} {:>12}",
334                    "cpu_ms",
335                    format!("{}", cpu.median)
336                ));
337            }
338            if let Some(rss) = &receipt.stats.max_rss_kb {
339                lines.push(format!(
340                    "{:<20} {:>12}",
341                    "max_rss_kb",
342                    format!("{}", rss.median)
343                ));
344            }
345        }
346
347        // Show warnings from the outcome
348        if !result.outcome.warnings.is_empty() {
349            lines.push(String::new());
350            for w in &result.outcome.warnings {
351                lines.push(format!("warning: {}", w));
352            }
353        }
354    } else {
355        lines.push("waiting for first run...".to_string());
356    }
357
358    lines.push(String::new());
359    lines.push("press Ctrl+C to stop".to_string());
360
361    lines
362}
363
364/// Debounce helper: tracks incoming events and determines when to trigger.
365#[derive(Debug)]
366pub struct Debouncer {
367    /// Debounce interval in milliseconds.
368    debounce_ms: u64,
369    /// When the last event was received.
370    last_event: Option<Instant>,
371    /// Whether an event is pending.
372    pending: bool,
373}
374
375impl Debouncer {
376    /// Create a new debouncer with the given interval in milliseconds.
377    pub fn new(debounce_ms: u64) -> Self {
378        Self {
379            debounce_ms,
380            last_event: None,
381            pending: false,
382        }
383    }
384
385    /// Record an incoming event.
386    pub fn event(&mut self) {
387        self.last_event = Some(Instant::now());
388        self.pending = true;
389    }
390
391    /// Check if the debounce interval has elapsed since the last event.
392    /// If so, consume the pending flag and return true.
393    pub fn should_trigger(&mut self) -> bool {
394        if !self.pending {
395            return false;
396        }
397        if let Some(last) = self.last_event
398            && last.elapsed().as_millis() >= self.debounce_ms as u128
399        {
400            self.pending = false;
401            return true;
402        }
403        false
404    }
405
406    /// Return the remaining time until debounce triggers, or None if not pending.
407    pub fn remaining_ms(&self) -> Option<u64> {
408        if !self.pending {
409            return None;
410        }
411        if let Some(last) = self.last_event {
412            let elapsed = last.elapsed().as_millis() as u64;
413            if elapsed >= self.debounce_ms {
414                Some(0)
415            } else {
416                Some(self.debounce_ms - elapsed)
417            }
418        } else {
419            None
420        }
421    }
422
423    /// Returns true if there is a pending event that hasn't triggered yet.
424    pub fn is_pending(&self) -> bool {
425        self.pending
426    }
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432    use std::thread;
433    use std::time::Duration;
434
435    #[test]
436    fn debouncer_new_is_not_pending() {
437        let mut d = Debouncer::new(500);
438        assert!(!d.is_pending());
439        assert!(!d.should_trigger());
440    }
441
442    #[test]
443    fn debouncer_event_sets_pending() {
444        let mut d = Debouncer::new(500);
445        d.event();
446        assert!(d.is_pending());
447    }
448
449    #[test]
450    fn debouncer_does_not_trigger_immediately() {
451        let mut d = Debouncer::new(100);
452        d.event();
453        // Should not trigger immediately
454        assert!(!d.should_trigger());
455        assert!(d.is_pending());
456    }
457
458    #[test]
459    fn debouncer_triggers_after_interval() {
460        let mut d = Debouncer::new(50);
461        d.event();
462        thread::sleep(Duration::from_millis(60));
463        assert!(d.should_trigger());
464        // After triggering, pending should be false
465        assert!(!d.is_pending());
466    }
467
468    #[test]
469    fn debouncer_resets_on_new_event() {
470        let mut d = Debouncer::new(80);
471        d.event();
472        thread::sleep(Duration::from_millis(40));
473        // New event resets the timer
474        d.event();
475        // Should not trigger yet (only 0ms since last event)
476        assert!(!d.should_trigger());
477        thread::sleep(Duration::from_millis(90));
478        assert!(d.should_trigger());
479    }
480
481    #[test]
482    fn debouncer_remaining_ms_when_not_pending() {
483        let d = Debouncer::new(500);
484        assert_eq!(d.remaining_ms(), None);
485    }
486
487    #[test]
488    fn debouncer_remaining_ms_when_pending() {
489        let mut d = Debouncer::new(200);
490        d.event();
491        let remaining = d.remaining_ms().unwrap();
492        // Should be close to 200ms (allow some tolerance)
493        assert!(remaining <= 200);
494        assert!(remaining > 150);
495    }
496
497    #[test]
498    fn debouncer_remaining_ms_after_elapsed() {
499        let mut d = Debouncer::new(30);
500        d.event();
501        thread::sleep(Duration::from_millis(40));
502        assert_eq!(d.remaining_ms(), Some(0));
503    }
504
505    #[test]
506    fn trend_direction_stable_for_empty() {
507        assert_eq!(compute_trend_direction(&[]), TrendDirection::Stable);
508    }
509
510    #[test]
511    fn trend_direction_stable_for_single() {
512        assert_eq!(compute_trend_direction(&[0.05]), TrendDirection::Stable);
513    }
514
515    #[test]
516    fn trend_direction_degrading_for_positive() {
517        assert_eq!(
518            compute_trend_direction(&[0.05, 0.06, 0.07]),
519            TrendDirection::Degrading
520        );
521    }
522
523    #[test]
524    fn trend_direction_improving_for_negative() {
525        assert_eq!(
526            compute_trend_direction(&[-0.05, -0.06, -0.07]),
527            TrendDirection::Improving
528        );
529    }
530
531    #[test]
532    fn trend_direction_stable_for_small_values() {
533        assert_eq!(
534            compute_trend_direction(&[0.001, -0.002, 0.003]),
535            TrendDirection::Stable
536        );
537    }
538
539    #[test]
540    fn trend_uses_last_three_entries() {
541        // History has old degrading values but recent improving values
542        let history = vec![0.10, 0.15, 0.20, -0.05, -0.06, -0.07];
543        assert_eq!(compute_trend_direction(&history), TrendDirection::Improving);
544    }
545
546    #[test]
547    fn watch_state_default_is_empty() {
548        let state = WatchState::default();
549        assert_eq!(state.iteration_count, 0);
550        assert_eq!(state.pass_count, 0);
551        assert_eq!(state.warn_count, 0);
552        assert_eq!(state.fail_count, 0);
553        assert!(state.last_result.is_none());
554        assert!(state.last_run_time.is_none());
555        assert!(state.trends.is_empty());
556    }
557
558    #[test]
559    fn render_watch_display_waiting() {
560        let state = WatchState::new();
561        let lines = render_watch_display(&state, "my-bench", "idle");
562        assert!(lines.iter().any(|l| l.contains("my-bench")));
563        assert!(lines.iter().any(|l| l.contains("idle")));
564        assert!(lines.iter().any(|l| l.contains("waiting for first run")));
565        assert!(lines.iter().any(|l| l.contains("Ctrl+C")));
566    }
567
568    #[test]
569    fn trend_arrow_formatting() {
570        assert_eq!(trend_arrow(TrendDirection::Improving), ">> improving");
571        assert_eq!(trend_arrow(TrendDirection::Degrading), ">> degrading");
572        assert_eq!(trend_arrow(TrendDirection::Stable), ">> stable");
573    }
574
575    #[test]
576    fn verdict_display_formatting() {
577        assert_eq!(verdict_display(VerdictStatus::Pass), "PASS");
578        assert_eq!(verdict_display(VerdictStatus::Warn), "WARN");
579        assert_eq!(verdict_display(VerdictStatus::Fail), "FAIL");
580        assert_eq!(verdict_display(VerdictStatus::Skip), "SKIP");
581    }
582}