Skip to main content

brainwires_autonomy/
metrics.rs

1//! Metrics and reporting for autonomous improvement sessions.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::Duration;
7
8use crate::safety::SafetyStop;
9
10/// Per-strategy task metrics tracking generation, attempt, and success counts.
11#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12pub struct StrategyMetrics {
13    /// Number of tasks generated by this strategy.
14    pub tasks_generated: u32,
15    /// Number of tasks attempted.
16    pub tasks_attempted: u32,
17    /// Number of tasks that succeeded.
18    pub tasks_succeeded: u32,
19}
20
21/// Comparison result between two execution paths.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ComparisonResult {
24    /// Whether both paths succeeded.
25    pub both_succeeded: bool,
26    /// Whether both paths failed.
27    pub both_failed: bool,
28    /// Whether the diffs match.
29    pub diffs_match: bool,
30    /// Difference in iteration counts.
31    pub iteration_delta: i32,
32    /// Errors specific to the bridge path.
33    pub bridge_specific_errors: Vec<String>,
34}
35
36/// Session-wide metrics tracking aggregate task counts, costs, and per-strategy breakdowns.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SessionMetrics {
39    /// Timestamp when the session started.
40    pub start_time: DateTime<Utc>,
41    /// Total number of tasks attempted.
42    pub tasks_attempted: u32,
43    /// Total number of tasks that succeeded.
44    pub tasks_succeeded: u32,
45    /// Total number of tasks that failed.
46    pub tasks_failed: u32,
47    /// Metrics broken down by strategy name.
48    pub per_strategy: HashMap<String, StrategyMetrics>,
49    /// Results of dual-path comparisons.
50    pub comparisons: Vec<ComparisonResult>,
51    /// Estimated total cost in USD.
52    pub total_cost: f64,
53    /// Total iteration count across all tasks.
54    pub total_iterations: u32,
55    /// Commit hashes produced during the session.
56    pub commits: Vec<String>,
57}
58
59impl SessionMetrics {
60    /// Create a new session metrics instance starting now.
61    pub fn new() -> Self {
62        Self {
63            start_time: Utc::now(),
64            tasks_attempted: 0,
65            tasks_succeeded: 0,
66            tasks_failed: 0,
67            per_strategy: HashMap::new(),
68            comparisons: Vec::new(),
69            total_cost: 0.0,
70            total_iterations: 0,
71            commits: Vec::new(),
72        }
73    }
74
75    /// Record a task attempt for the given strategy.
76    pub fn record_attempt(&mut self, strategy: &str) {
77        self.tasks_attempted += 1;
78        self.per_strategy
79            .entry(strategy.to_string())
80            .or_default()
81            .tasks_attempted += 1;
82    }
83
84    /// Record a successful task for the given strategy with its iteration count.
85    pub fn record_success(&mut self, strategy: &str, iterations: u32) {
86        self.tasks_succeeded += 1;
87        self.total_iterations += iterations;
88        self.per_strategy
89            .entry(strategy.to_string())
90            .or_default()
91            .tasks_succeeded += 1;
92    }
93
94    /// Record a failed task for the given strategy.
95    pub fn record_failure(&mut self, strategy: &str) {
96        self.tasks_failed += 1;
97        self.per_strategy
98            .entry(strategy.to_string())
99            .or_default()
100            .tasks_attempted += 1;
101    }
102
103    /// Record the number of tasks generated by a strategy.
104    pub fn record_generated(&mut self, strategy: &str, count: u32) {
105        self.per_strategy
106            .entry(strategy.to_string())
107            .or_default()
108            .tasks_generated += count;
109    }
110
111    /// Record a dual-path comparison result.
112    pub fn record_comparison(&mut self, comparison: ComparisonResult) {
113        self.comparisons.push(comparison);
114    }
115
116    /// Record a commit hash produced during the session.
117    pub fn record_commit(&mut self, hash: String) {
118        self.commits.push(hash);
119    }
120
121    /// Compute the success rate as a fraction between 0.0 and 1.0.
122    pub fn success_rate(&self) -> f64 {
123        if self.tasks_attempted == 0 {
124            0.0
125        } else {
126            self.tasks_succeeded as f64 / self.tasks_attempted as f64
127        }
128    }
129}
130
131impl Default for SessionMetrics {
132    fn default() -> Self {
133        Self::new()
134    }
135}
136
137/// Complete session report combining metrics, duration, and stop reason,
138/// with serialization to both JSON and Markdown formats.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct SessionReport {
141    /// Aggregated session metrics.
142    pub metrics: SessionMetrics,
143    /// Total wall-clock duration of the session.
144    #[serde(with = "duration_serde")]
145    pub duration: Duration,
146    /// Human-readable reason the session stopped, if a safety stop occurred.
147    pub safety_stop_reason: Option<String>,
148}
149
150mod duration_serde {
151    use serde::{Deserialize, Deserializer, Serialize, Serializer};
152    use std::time::Duration;
153
154    pub fn serialize<S>(d: &Duration, s: S) -> Result<S::Ok, S::Error>
155    where
156        S: Serializer,
157    {
158        d.as_secs_f64().serialize(s)
159    }
160
161    pub fn deserialize<'de, D>(d: D) -> Result<Duration, D::Error>
162    where
163        D: Deserializer<'de>,
164    {
165        let secs = f64::deserialize(d)?;
166        Ok(Duration::from_secs_f64(secs))
167    }
168}
169
170impl SessionReport {
171    /// Create a new session report from metrics, duration, and optional stop reason.
172    pub fn new(
173        metrics: SessionMetrics,
174        duration: Duration,
175        stop_reason: Option<SafetyStop>,
176    ) -> Self {
177        Self {
178            metrics,
179            duration,
180            safety_stop_reason: stop_reason.map(|r| r.to_string()),
181        }
182    }
183
184    /// Serialize the report to pretty-printed JSON.
185    pub fn to_json(&self) -> Result<String, serde_json::Error> {
186        serde_json::to_string_pretty(self)
187    }
188
189    /// Render the report as a Markdown document.
190    pub fn to_markdown(&self) -> String {
191        let mut md = String::new();
192        md.push_str("# Self-Improvement Session Report\n\n");
193        md.push_str(&format!(
194            "**Date**: {}\n",
195            self.metrics.start_time.format("%Y-%m-%d %H:%M:%S UTC")
196        ));
197        md.push_str(&format!(
198            "**Duration**: {:.1}s\n",
199            self.duration.as_secs_f64()
200        ));
201        md.push_str(&format!(
202            "**Success Rate**: {:.1}%\n\n",
203            self.metrics.success_rate() * 100.0
204        ));
205
206        md.push_str("## Summary\n\n");
207        md.push_str("| Metric | Value |\n|--------|-------|\n");
208        md.push_str(&format!(
209            "| Tasks Attempted | {} |\n",
210            self.metrics.tasks_attempted
211        ));
212        md.push_str(&format!(
213            "| Tasks Succeeded | {} |\n",
214            self.metrics.tasks_succeeded
215        ));
216        md.push_str(&format!(
217            "| Tasks Failed | {} |\n",
218            self.metrics.tasks_failed
219        ));
220        md.push_str(&format!(
221            "| Total Iterations | {} |\n",
222            self.metrics.total_iterations
223        ));
224        md.push_str(&format!(
225            "| Estimated Cost | ${:.4} |\n",
226            self.metrics.total_cost
227        ));
228        md.push_str(&format!("| Commits | {} |\n", self.metrics.commits.len()));
229
230        if !self.metrics.per_strategy.is_empty() {
231            md.push_str("\n## Per-Strategy Breakdown\n\n");
232            md.push_str("| Strategy | Generated | Attempted | Succeeded |\n");
233            md.push_str("|----------|-----------|-----------|----------|\n");
234            for (name, stats) in &self.metrics.per_strategy {
235                md.push_str(&format!(
236                    "| {} | {} | {} | {} |\n",
237                    name, stats.tasks_generated, stats.tasks_attempted, stats.tasks_succeeded
238                ));
239            }
240        }
241
242        if !self.metrics.comparisons.is_empty() {
243            md.push_str("\n## Dual-Path Comparisons\n\n");
244            let both_ok = self
245                .metrics
246                .comparisons
247                .iter()
248                .filter(|c| c.both_succeeded)
249                .count();
250            let diffs_match = self
251                .metrics
252                .comparisons
253                .iter()
254                .filter(|c| c.diffs_match)
255                .count();
256            md.push_str(&format!(
257                "- Both paths succeeded: {}/{}\n",
258                both_ok,
259                self.metrics.comparisons.len()
260            ));
261            md.push_str(&format!("- Diffs matched: {}/{}\n", diffs_match, both_ok));
262        }
263
264        if let Some(ref reason) = self.safety_stop_reason {
265            md.push_str(&format!("\n## Stop Reason\n\n{reason}\n"));
266        }
267
268        if !self.metrics.commits.is_empty() {
269            md.push_str("\n## Commits\n\n");
270            for hash in &self.metrics.commits {
271                md.push_str(&format!("- `{hash}`\n"));
272            }
273        }
274
275        md
276    }
277
278    /// Save the report to both JSON and Markdown files in the given directory.
279    pub fn save(&self, output_dir: &str) -> anyhow::Result<()> {
280        std::fs::create_dir_all(output_dir)?;
281        let timestamp = self.metrics.start_time.format("%Y%m%d-%H%M%S").to_string();
282
283        let json_path = format!("{output_dir}/session-{timestamp}.json");
284        std::fs::write(&json_path, self.to_json()?)?;
285
286        let md_path = format!("{output_dir}/session-{timestamp}.md");
287        std::fs::write(&md_path, self.to_markdown())?;
288
289        tracing::info!("Session report saved to {json_path} and {md_path}");
290        Ok(())
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn session_metrics_new_starts_empty() {
300        let m = SessionMetrics::new();
301        assert_eq!(m.tasks_attempted, 0);
302        assert_eq!(m.tasks_succeeded, 0);
303        assert_eq!(m.tasks_failed, 0);
304        assert!(m.per_strategy.is_empty());
305        assert!(m.commits.is_empty());
306    }
307
308    #[test]
309    fn record_attempt_increments_counters() {
310        let mut m = SessionMetrics::new();
311        m.record_attempt("clippy");
312        m.record_attempt("clippy");
313        m.record_attempt("todo");
314        assert_eq!(m.tasks_attempted, 3);
315        assert_eq!(m.per_strategy["clippy"].tasks_attempted, 2);
316        assert_eq!(m.per_strategy["todo"].tasks_attempted, 1);
317    }
318
319    #[test]
320    fn record_success_increments_and_tracks_iterations() {
321        let mut m = SessionMetrics::new();
322        m.record_success("clippy", 5);
323        m.record_success("clippy", 10);
324        assert_eq!(m.tasks_succeeded, 2);
325        assert_eq!(m.total_iterations, 15);
326        assert_eq!(m.per_strategy["clippy"].tasks_succeeded, 2);
327    }
328
329    #[test]
330    fn record_failure_increments_counters() {
331        let mut m = SessionMetrics::new();
332        m.record_failure("clippy");
333        assert_eq!(m.tasks_failed, 1);
334        // record_failure also increments tasks_attempted on the strategy
335        assert_eq!(m.per_strategy["clippy"].tasks_attempted, 1);
336    }
337
338    #[test]
339    fn success_rate_zero_when_no_attempts() {
340        let m = SessionMetrics::new();
341        assert!((m.success_rate() - 0.0).abs() < f64::EPSILON);
342    }
343
344    #[test]
345    fn success_rate_correct_value() {
346        let mut m = SessionMetrics::new();
347        m.record_attempt("a");
348        m.record_attempt("a");
349        m.record_attempt("a");
350        m.record_attempt("a");
351        m.record_success("a", 1);
352        m.record_success("a", 1);
353        m.record_success("a", 1);
354        // 3 successes out of 4 attempts
355        assert!((m.success_rate() - 0.75).abs() < f64::EPSILON);
356    }
357
358    #[test]
359    fn session_report_to_markdown_produces_valid_output() {
360        let mut metrics = SessionMetrics::new();
361        metrics.record_attempt("clippy");
362        metrics.record_success("clippy", 5);
363        metrics.record_commit("abc123".to_string());
364
365        let report = SessionReport::new(metrics, Duration::from_secs(42), None);
366        let md = report.to_markdown();
367
368        assert!(md.contains("# Self-Improvement Session Report"));
369        assert!(md.contains("Tasks Attempted"));
370        assert!(md.contains("Tasks Succeeded"));
371        assert!(md.contains("42.0s"));
372        assert!(md.contains("`abc123`"));
373    }
374
375    #[test]
376    fn session_report_to_json_produces_valid_json() {
377        let metrics = SessionMetrics::new();
378        let report = SessionReport::new(metrics, Duration::from_secs(10), None);
379        let json_str = report.to_json().expect("to_json should succeed");
380        let parsed: serde_json::Value =
381            serde_json::from_str(&json_str).expect("should be valid JSON");
382        assert!(parsed.get("metrics").is_some());
383        assert!(parsed.get("duration").is_some());
384    }
385}