Skip to main content

ralph/
execution_history.rs

1//! Historical execution duration tracking for ETA estimation.
2//!
3//! Responsibilities:
4//! - Record phase durations for completed task executions.
5//! - Provide weighted average calculations for ETA estimation.
6//! - Persist data to `.ralph/cache/execution_history.json`.
7//!
8//! Not handled here:
9//! - Real-time progress tracking (see `app_execution.rs`).
10//! - Actual rendering of progress indicators.
11//!
12//! Invariants/assumptions:
13//! - Historical data is keyed by (runner, model, phase_count) for accuracy.
14//! - Recent runs are weighted higher (exponential decay).
15//! - Maximum 100 entries per key to prevent unbounded growth.
16
17use crate::constants::versions::EXECUTION_HISTORY_VERSION;
18use crate::progress::ExecutionPhase;
19use anyhow::{Context, Result};
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::fs;
23use std::io::Write;
24use std::path::Path;
25use std::time::Duration;
26
27/// Root execution history data structure.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct ExecutionHistory {
30    /// Schema version for migrations.
31    pub version: u32,
32    /// Historical execution entries.
33    pub entries: Vec<ExecutionEntry>,
34}
35
36impl Default for ExecutionHistory {
37    fn default() -> Self {
38        Self {
39            version: EXECUTION_HISTORY_VERSION,
40            entries: Vec::new(),
41        }
42    }
43}
44
45/// A single execution entry recording phase durations.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct ExecutionEntry {
48    /// When the execution completed (RFC3339).
49    pub timestamp: String,
50    /// Task ID that was executed.
51    pub task_id: String,
52    /// Runner used (e.g., "codex", "claude").
53    pub runner: String,
54    /// Model used (e.g., "sonnet", "gpt-4").
55    pub model: String,
56    /// Number of phases configured (1, 2, or 3).
57    pub phase_count: u8,
58    /// Duration for each completed phase.
59    pub phase_durations: HashMap<ExecutionPhase, Duration>,
60    /// Total execution duration.
61    pub total_duration: Duration,
62}
63
64/// Load execution history from cache directory.
65pub fn load_execution_history(cache_dir: &Path) -> Result<ExecutionHistory> {
66    let path = cache_dir.join("execution_history.json");
67
68    if !path.exists() {
69        return Ok(ExecutionHistory::default());
70    }
71
72    let content = fs::read_to_string(&path)
73        .with_context(|| format!("Failed to read execution history from {}", path.display()))?;
74
75    let history: ExecutionHistory = serde_json::from_str(&content)
76        .with_context(|| format!("Failed to parse execution history from {}", path.display()))?;
77
78    Ok(history)
79}
80
81/// Save execution history to cache directory.
82pub fn save_execution_history(history: &ExecutionHistory, cache_dir: &Path) -> Result<()> {
83    let path = cache_dir.join("execution_history.json");
84
85    // Ensure cache directory exists
86    fs::create_dir_all(cache_dir)
87        .with_context(|| format!("Failed to create cache directory {}", cache_dir.display()))?;
88
89    let content =
90        serde_json::to_string_pretty(history).context("Failed to serialize execution history")?;
91
92    // Atomic write: write to temp file then rename
93    let temp_path = path.with_extension("tmp");
94    let mut file = fs::File::create(&temp_path)
95        .with_context(|| format!("Failed to create temp file {}", temp_path.display()))?;
96    file.write_all(content.as_bytes())
97        .with_context(|| format!("Failed to write to temp file {}", temp_path.display()))?;
98    file.flush()
99        .with_context(|| format!("Failed to flush temp file {}", temp_path.display()))?;
100    drop(file);
101
102    fs::rename(&temp_path, &path)
103        .with_context(|| format!("Failed to rename temp file to {}", path.display()))?;
104
105    Ok(())
106}
107
108/// Record a completed execution to history.
109pub fn record_execution(
110    task_id: &str,
111    runner: &str,
112    model: &str,
113    phase_count: u8,
114    phase_durations: HashMap<ExecutionPhase, Duration>,
115    total_duration: Duration,
116    cache_dir: &Path,
117) -> Result<()> {
118    let mut history = load_execution_history(cache_dir)?;
119
120    let entry = ExecutionEntry {
121        timestamp: crate::timeutil::now_utc_rfc3339_or_fallback(),
122        task_id: task_id.to_string(),
123        runner: runner.to_string(),
124        model: model.to_string(),
125        phase_count,
126        phase_durations,
127        total_duration,
128    };
129
130    history.entries.push(entry);
131
132    // Prune old entries if we exceed the limit
133    prune_old_entries(&mut history);
134
135    save_execution_history(&history, cache_dir)?;
136    Ok(())
137}
138
139/// Prune oldest entries to keep history bounded.
140fn prune_old_entries(history: &mut ExecutionHistory) {
141    const MAX_ENTRIES: usize = 100;
142
143    if history.entries.len() <= MAX_ENTRIES {
144        return;
145    }
146
147    // Sort by timestamp (newest first) and keep only MAX_ENTRIES
148    history
149        .entries
150        .sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
151    history.entries.truncate(MAX_ENTRIES);
152}
153
154/// Calculate weighted average duration for a specific phase.
155///
156/// Uses exponential weighting where recent entries are weighted higher.
157/// weight = 0.9^(age_in_days)
158pub fn weighted_average_duration(
159    history: &ExecutionHistory,
160    runner: &str,
161    model: &str,
162    phase_count: u8,
163    phase: ExecutionPhase,
164) -> Option<Duration> {
165    let relevant_entries: Vec<_> = history
166        .entries
167        .iter()
168        .filter(|e| {
169            e.runner == runner
170                && e.model == model
171                && e.phase_count == phase_count
172                && e.phase_durations.contains_key(&phase)
173        })
174        .collect();
175
176    if relevant_entries.is_empty() {
177        return None;
178    }
179
180    let now = std::time::SystemTime::now()
181        .duration_since(std::time::UNIX_EPOCH)
182        .unwrap_or_default()
183        .as_secs() as f64;
184
185    let mut total_weight = 0.0;
186    let mut weighted_sum = 0.0;
187
188    for entry in relevant_entries {
189        let entry_secs = parse_timestamp_to_secs(&entry.timestamp).unwrap_or(now as u64) as f64;
190        let age_days = (now - entry_secs) / (24.0 * 3600.0);
191        let weight = 0.9_f64.powf(age_days);
192
193        if let Some(duration) = entry.phase_durations.get(&phase) {
194            weighted_sum += duration.as_secs_f64() * weight;
195            total_weight += weight;
196        }
197    }
198
199    if total_weight == 0.0 {
200        return None;
201    }
202
203    let avg_secs = weighted_sum / total_weight;
204    Some(Duration::from_secs_f64(avg_secs))
205}
206
207/// Get historical average durations for all phases.
208pub fn get_phase_averages(
209    history: &ExecutionHistory,
210    runner: &str,
211    model: &str,
212    phase_count: u8,
213) -> HashMap<ExecutionPhase, Duration> {
214    let mut averages = HashMap::new();
215
216    for phase in [
217        ExecutionPhase::Planning,
218        ExecutionPhase::Implementation,
219        ExecutionPhase::Review,
220    ] {
221        if let Some(avg) = weighted_average_duration(history, runner, model, phase_count, phase) {
222            averages.insert(phase, avg);
223        }
224    }
225
226    averages
227}
228
229/// Parse RFC3339 timestamp to Unix seconds using proper RFC3339 parsing.
230///
231/// Uses the timeutil module for accurate parsing that correctly handles:
232/// - Leap years
233/// - Variable month lengths
234/// - Timezone offsets
235fn parse_timestamp_to_secs(timestamp: &str) -> Option<u64> {
236    let dt = crate::timeutil::parse_rfc3339_opt(timestamp)?;
237    let ts = dt.unix_timestamp();
238    // Defensive: pre-epoch timestamps are not expected in execution history
239    // but we guard against overflow when casting negative i64 to u64
240    (ts >= 0).then_some(ts as u64)
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use tempfile::TempDir;
247
248    #[test]
249    fn test_load_empty_history() {
250        let temp = TempDir::new().unwrap();
251        let history = load_execution_history(temp.path()).unwrap();
252        assert!(history.entries.is_empty());
253        assert_eq!(history.version, EXECUTION_HISTORY_VERSION);
254    }
255
256    #[test]
257    fn test_save_and_load_history() {
258        let temp = TempDir::new().unwrap();
259        let mut history = ExecutionHistory::default();
260
261        history.entries.push(ExecutionEntry {
262            timestamp: "2026-01-31T12:00:00Z".to_string(),
263            task_id: "RQ-0001".to_string(),
264            runner: "codex".to_string(),
265            model: "sonnet".to_string(),
266            phase_count: 3,
267            phase_durations: {
268                let mut d = HashMap::new();
269                d.insert(ExecutionPhase::Planning, Duration::from_secs(60));
270                d.insert(ExecutionPhase::Implementation, Duration::from_secs(120));
271                d.insert(ExecutionPhase::Review, Duration::from_secs(30));
272                d
273            },
274            total_duration: Duration::from_secs(210),
275        });
276
277        save_execution_history(&history, temp.path()).unwrap();
278        let loaded = load_execution_history(temp.path()).unwrap();
279
280        assert_eq!(loaded.entries.len(), 1);
281        assert_eq!(loaded.entries[0].task_id, "RQ-0001");
282        assert_eq!(loaded.entries[0].phase_count, 3);
283    }
284
285    #[test]
286    fn test_record_execution() {
287        let temp = TempDir::new().unwrap();
288        let mut durations = HashMap::new();
289        durations.insert(ExecutionPhase::Planning, Duration::from_secs(60));
290
291        record_execution(
292            "RQ-0001",
293            "codex",
294            "sonnet",
295            3,
296            durations,
297            Duration::from_secs(60),
298            temp.path(),
299        )
300        .unwrap();
301
302        let history = load_execution_history(temp.path()).unwrap();
303        assert_eq!(history.entries.len(), 1);
304        assert_eq!(history.entries[0].runner, "codex");
305        // CRITICAL: Timestamp must never be empty (regression test for RQ-0636)
306        assert!(
307            !history.entries[0].timestamp.is_empty(),
308            "Timestamp should never be empty"
309        );
310    }
311
312    #[test]
313    fn test_prune_old_entries() {
314        let mut history = ExecutionHistory::default();
315
316        // Add 150 entries
317        for i in 0..150 {
318            history.entries.push(ExecutionEntry {
319                timestamp: format!("2026-01-{:02}T12:00:00Z", (i % 30) + 1),
320                task_id: format!("RQ-{:04}", i),
321                runner: "codex".to_string(),
322                model: "sonnet".to_string(),
323                phase_count: 3,
324                phase_durations: HashMap::new(),
325                total_duration: Duration::from_secs(60),
326            });
327        }
328
329        prune_old_entries(&mut history);
330        assert_eq!(history.entries.len(), 100);
331    }
332
333    #[test]
334    fn test_weighted_average_duration() {
335        let mut history = ExecutionHistory::default();
336
337        // Add entries with different timestamps
338        history.entries.push(ExecutionEntry {
339            timestamp: "2026-01-31T12:00:00Z".to_string(), // Recent
340            task_id: "RQ-0001".to_string(),
341            runner: "codex".to_string(),
342            model: "sonnet".to_string(),
343            phase_count: 3,
344            phase_durations: {
345                let mut d = HashMap::new();
346                d.insert(ExecutionPhase::Planning, Duration::from_secs(100));
347                d
348            },
349            total_duration: Duration::from_secs(100),
350        });
351
352        history.entries.push(ExecutionEntry {
353            timestamp: "2026-01-30T12:00:00Z".to_string(), // Older
354            task_id: "RQ-0002".to_string(),
355            runner: "codex".to_string(),
356            model: "sonnet".to_string(),
357            phase_count: 3,
358            phase_durations: {
359                let mut d = HashMap::new();
360                d.insert(ExecutionPhase::Planning, Duration::from_secs(200));
361                d
362            },
363            total_duration: Duration::from_secs(200),
364        });
365
366        let avg =
367            weighted_average_duration(&history, "codex", "sonnet", 3, ExecutionPhase::Planning);
368        assert!(avg.is_some());
369        // Recent entry (100s) should be weighted higher than older (200s)
370        let avg_secs = avg.unwrap().as_secs();
371        assert!(
372            avg_secs < 150,
373            "Weighted average should favor recent entries"
374        );
375    }
376
377    #[test]
378    fn test_weighted_average_no_matching_entries() {
379        let history = ExecutionHistory::default();
380        let avg =
381            weighted_average_duration(&history, "codex", "sonnet", 3, ExecutionPhase::Planning);
382        assert!(avg.is_none());
383    }
384
385    #[test]
386    fn test_get_phase_averages() {
387        let mut history = ExecutionHistory::default();
388
389        history.entries.push(ExecutionEntry {
390            timestamp: "2026-01-31T12:00:00Z".to_string(),
391            task_id: "RQ-0001".to_string(),
392            runner: "codex".to_string(),
393            model: "sonnet".to_string(),
394            phase_count: 3,
395            phase_durations: {
396                let mut d = HashMap::new();
397                d.insert(ExecutionPhase::Planning, Duration::from_secs(60));
398                d.insert(ExecutionPhase::Implementation, Duration::from_secs(120));
399                d
400            },
401            total_duration: Duration::from_secs(180),
402        });
403
404        let averages = get_phase_averages(&history, "codex", "sonnet", 3);
405        assert_eq!(averages.len(), 2);
406        assert_eq!(
407            averages.get(&ExecutionPhase::Planning),
408            Some(&Duration::from_secs(60))
409        );
410        assert_eq!(
411            averages.get(&ExecutionPhase::Implementation),
412            Some(&Duration::from_secs(120))
413        );
414    }
415
416    #[test]
417    fn test_parse_timestamp_to_secs() {
418        let secs = parse_timestamp_to_secs("2026-01-31T12:00:00Z");
419        assert!(secs.is_some());
420
421        let secs_with_ms = parse_timestamp_to_secs("2026-01-31T12:00:00.123Z");
422        assert!(secs_with_ms.is_some());
423
424        let invalid = parse_timestamp_to_secs("invalid");
425        assert!(invalid.is_none());
426    }
427
428    #[test]
429    fn test_parse_timestamp_accuracy_vs_timeutil() {
430        // Test that our parsing matches timeutil::parse_rfc3339 exactly
431        let test_cases = [
432            "2026-01-31T12:00:00Z",
433            "2026-01-31T12:00:00.123Z",
434            "2026-01-31T12:00:00.123456789Z",
435            "2020-02-29T00:00:00Z", // Leap year
436            "1970-01-01T00:00:00Z", // Unix epoch
437            "2000-12-31T23:59:59Z",
438        ];
439
440        for ts in &test_cases {
441            let parsed = parse_timestamp_to_secs(ts);
442            let expected = crate::timeutil::parse_rfc3339(ts)
443                .ok()
444                .map(|dt| dt.unix_timestamp() as u64);
445
446            assert_eq!(
447                parsed, expected,
448                "parse_timestamp_to_secs({}) should match timeutil::parse_rfc3339",
449                ts
450            );
451        }
452    }
453
454    #[test]
455    fn test_parse_timestamp_leap_year_accuracy() {
456        // Leap day 2020 should be exactly 1 day after Feb 28
457        let feb28 = parse_timestamp_to_secs("2020-02-28T00:00:00Z").unwrap();
458        let feb29 = parse_timestamp_to_secs("2020-02-29T00:00:00Z").unwrap();
459        let mar01 = parse_timestamp_to_secs("2020-03-01T00:00:00Z").unwrap();
460
461        // Feb 29 is exactly 86400 seconds after Feb 28
462        assert_eq!(
463            feb29 - feb28,
464            86400,
465            "Leap day should be exactly 1 day after Feb 28"
466        );
467        // Mar 01 is exactly 86400 seconds after Feb 29
468        assert_eq!(
469            mar01 - feb29,
470            86400,
471            "Mar 01 should be exactly 1 day after Feb 29"
472        );
473    }
474
475    #[test]
476    fn test_weighted_average_monotonic_decay() {
477        // Regression test: ensure weight decreases monotonically with age
478        let mut history = ExecutionHistory::default();
479
480        // Add entries at 5-day intervals (oldest first: Jan 11, 16, 21, 26, 31)
481        for i in 0..5 {
482            let day = 11 + i * 5; // 11, 16, 21, 26, 31
483            let timestamp = format!("2026-01-{:02}T12:00:00Z", day);
484            history.entries.push(ExecutionEntry {
485                timestamp,
486                task_id: format!("RQ-{}", i),
487                runner: "codex".to_string(),
488                model: "sonnet".to_string(),
489                phase_count: 3,
490                phase_durations: {
491                    let mut d = HashMap::new();
492                    d.insert(ExecutionPhase::Planning, Duration::from_secs(100));
493                    d
494                },
495                total_duration: Duration::from_secs(100),
496            });
497        }
498
499        // Calculate weighted average
500        let avg =
501            weighted_average_duration(&history, "codex", "sonnet", 3, ExecutionPhase::Planning);
502
503        assert!(avg.is_some(), "Should have a weighted average");
504
505        // Verify that older entries have smaller weights
506        // The most recent entry (2026-01-31) should have highest weight
507        // The oldest entry (2026-01-11) should have lowest weight
508        let now = std::time::SystemTime::now()
509            .duration_since(std::time::UNIX_EPOCH)
510            .unwrap_or_default()
511            .as_secs() as f64;
512
513        let mut weights = vec![];
514        for entry in &history.entries {
515            let entry_secs = parse_timestamp_to_secs(&entry.timestamp).unwrap_or(now as u64) as f64;
516            let age_days = (now - entry_secs) / (24.0 * 3600.0);
517            let weight = 0.9_f64.powf(age_days);
518            weights.push((entry.timestamp.clone(), weight));
519        }
520
521        // Entries are added oldest first (Jan 11 -> Jan 31), so weights should
522        // increase as we go through the list (older = smaller weight)
523        for i in 1..weights.len() {
524            assert!(
525                weights[i - 1].1 <= weights[i].1,
526                "Weight should increase as entries get newer (older entries have lower weight): {:?} vs {:?}",
527                weights[i - 1],
528                weights[i]
529            );
530        }
531    }
532
533    #[test]
534    fn test_parse_timestamp_with_subseconds() {
535        // Subseconds should be parsed correctly (truncated to whole seconds)
536        let without_ms = parse_timestamp_to_secs("2026-01-31T12:00:00Z").unwrap();
537        let with_ms = parse_timestamp_to_secs("2026-01-31T12:00:00.500Z").unwrap();
538        let with_many_ms = parse_timestamp_to_secs("2026-01-31T12:00:00.999999Z").unwrap();
539
540        // Unix timestamp is whole seconds only
541        assert_eq!(
542            without_ms, with_ms,
543            "Subseconds should not affect unix timestamp"
544        );
545        assert_eq!(
546            without_ms, with_many_ms,
547            "Subseconds should not affect unix timestamp"
548        );
549    }
550}