Skip to main content

harness_rs_loop/
replay.rs

1//! Session record + replay (DESIGN.md §15 v0.2+).
2//!
3//! Two halves:
4//! - [`SessionRecorder`] is a [`Hook`] that captures every lifecycle event
5//!   to a JSONL file. Wire it via `AgentLoop::with_hook` and you get a
6//!   complete trace of what the agent did.
7//! - [`read_session`] + [`replay_as_mock`] reconstruct a deterministic
8//!   `MockModel` from a recorded log so you can replay the run offline,
9//!   verify changes, or debug failures without rerunning against a real LLM.
10
11use harness_core::{
12    Action, CompactionStage, Event, Hook, HookOutcome, ModelOutput, ToolResult, World,
13};
14use serde::{Deserialize, Serialize};
15use std::fs::OpenOptions;
16use std::io::Write;
17use std::path::Path;
18use std::sync::Mutex;
19
20/// One event in the recorded session. Owned (no borrows) so it round-trips
21/// through serde without lifetime gymnastics.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23#[serde(tag = "kind", rename_all = "snake_case")]
24pub enum SessionEvent {
25    Start       { ts_ms: i64, source: String },
26    PreModel    { ts_ms: i64, history_len: usize, tools_count: usize },
27    PostModel   { ts_ms: i64, output: ModelOutput },
28    PreTool     { ts_ms: i64, action: Action },
29    PostTool    { ts_ms: i64, call_id: String, result: ToolResult },
30    Sensor      { ts_ms: i64, id: String, signals: usize },
31    PreCompact  { ts_ms: i64, stage: CompactionStage },
32    PostCompact { ts_ms: i64, stage: CompactionStage },
33    Heartbeat   { ts_ms: i64, iter: u32 },
34    End         { ts_ms: i64 },
35}
36
37/// Hook that serialises every relevant lifecycle event into a JSONL file.
38///
39/// Failures (locked mutex, I/O errors) are logged via `tracing::warn` but
40/// never panic — recording is a best-effort observability layer, not a
41/// correctness path.
42pub struct SessionRecorder {
43    file: Mutex<std::fs::File>,
44}
45
46impl SessionRecorder {
47    /// Open the file for append (creating it if needed).
48    pub fn new(path: &Path) -> std::io::Result<Self> {
49        if let Some(parent) = path.parent() {
50            std::fs::create_dir_all(parent)?;
51        }
52        let f = OpenOptions::new().create(true).append(true).open(path)?;
53        Ok(Self { file: Mutex::new(f) })
54    }
55
56    fn write(&self, ev: &SessionEvent) {
57        let Ok(mut f) = self.file.lock() else { return; };
58        match serde_json::to_string(ev) {
59            Ok(s) => {
60                if let Err(e) = writeln!(f, "{s}") {
61                    tracing::warn!(error=%e, "session recorder write failed");
62                }
63            }
64            Err(e) => tracing::warn!(error=%e, "session recorder serialize failed"),
65        }
66    }
67}
68
69impl Hook for SessionRecorder {
70    fn name(&self) -> &str { "session-recorder" }
71    fn matches(&self, _ev: &Event<'_>) -> bool { true }
72
73    fn fire(&self, ev: &Event<'_>, world: &mut World) -> HookOutcome {
74        let ts = world.clock.now_ms();
75        let session_ev = match ev {
76            Event::SessionStart { source } => Some(SessionEvent::Start {
77                ts_ms: ts,
78                source: format!("{source:?}"),
79            }),
80            Event::PreModel { ctx } => Some(SessionEvent::PreModel {
81                ts_ms: ts,
82                history_len: ctx.history.len(),
83                tools_count: ctx.tools.len(),
84            }),
85            Event::PostModel { out } => Some(SessionEvent::PostModel {
86                ts_ms: ts,
87                output: (*out).clone(),
88            }),
89            Event::PreToolUse { action } => Some(SessionEvent::PreTool {
90                ts_ms: ts,
91                action: (*action).clone(),
92            }),
93            Event::PostToolUse { action, result } => Some(SessionEvent::PostTool {
94                ts_ms: ts,
95                call_id: action.call_id.clone(),
96                result: (*result).clone(),
97            }),
98            Event::PostSensor { sensor, signals } => Some(SessionEvent::Sensor {
99                ts_ms: ts,
100                id: (*sensor).clone(),
101                signals: signals.len(),
102            }),
103            Event::PreCompact { stage } => Some(SessionEvent::PreCompact {
104                ts_ms: ts,
105                stage: *stage,
106            }),
107            Event::PostCompact { stage } => Some(SessionEvent::PostCompact {
108                ts_ms: ts,
109                stage: *stage,
110            }),
111            Event::Heartbeat { iter } => Some(SessionEvent::Heartbeat {
112                ts_ms: ts,
113                iter: *iter,
114            }),
115            Event::SessionEnd => Some(SessionEvent::End { ts_ms: ts }),
116            _ => None,
117        };
118        if let Some(e) = session_ev {
119            self.write(&e);
120        }
121        HookOutcome::Allow
122    }
123}
124
125/// Read a recorded JSONL session log back into memory.
126///
127/// Tolerates malformed lines (logged, skipped) so a partially-corrupted log
128/// still yields usable replay material.
129pub fn read_session(path: &Path) -> std::io::Result<Vec<SessionEvent>> {
130    let content = std::fs::read_to_string(path)?;
131    let mut events = Vec::new();
132    for (i, line) in content.lines().enumerate() {
133        let line = line.trim();
134        if line.is_empty() { continue; }
135        match serde_json::from_str(line) {
136            Ok(e) => events.push(e),
137            Err(err) => tracing::warn!(line=i+1, error=%err, "session log line skipped"),
138        }
139    }
140    Ok(events)
141}
142
143/// Build a [`harness_models::MockModel`] that returns each recorded
144/// `PostModel` output in order. Pair with a fresh `AgentLoop` to replay the
145/// run.
146pub fn replay_as_mock(events: &[SessionEvent]) -> harness_models::MockModel {
147    use harness_models::{MockModel, MockResponse};
148    let mut m = MockModel::new().with_name("replay");
149    for e in events {
150        if let SessionEvent::PostModel { output, .. } = e {
151            m = m.script(MockResponse {
152                text:        output.text.clone(),
153                tool_calls:  output.tool_calls.clone(),
154                stop_reason: output.stop_reason,
155                input_tokens:  output.usage.input_tokens,
156                output_tokens: output.usage.output_tokens,
157                reasoning: output.reasoning.clone(),
158            });
159        }
160    }
161    m
162}
163
164/// Backwards-compatible alias.
165pub fn replay_as_mock_via_events(events: &[SessionEvent]) -> harness_models::MockModel {
166    replay_as_mock(events)
167}
168
169/// Stats from a single session — handy summary for the `harness trace` CLI.
170#[derive(Debug, Clone, Default)]
171pub struct SessionStats {
172    pub events:      usize,
173    pub model_calls: usize,
174    pub tool_calls:  usize,
175    pub iters:       u32,
176    pub input_tokens:  u32,
177    pub output_tokens: u32,
178    pub stages_run:  usize,
179    pub duration_ms: i64,
180}
181
182impl SessionStats {
183    pub fn from(events: &[SessionEvent]) -> Self {
184        let mut s = Self { events: events.len(), ..Default::default() };
185        let mut first_ts: Option<i64> = None;
186        let mut last_ts: Option<i64> = None;
187        for e in events {
188            let ts = match e {
189                SessionEvent::Start { ts_ms, .. }
190                | SessionEvent::PreModel { ts_ms, .. }
191                | SessionEvent::PostModel { ts_ms, .. }
192                | SessionEvent::PreTool { ts_ms, .. }
193                | SessionEvent::PostTool { ts_ms, .. }
194                | SessionEvent::Sensor { ts_ms, .. }
195                | SessionEvent::PreCompact { ts_ms, .. }
196                | SessionEvent::PostCompact { ts_ms, .. }
197                | SessionEvent::Heartbeat { ts_ms, .. }
198                | SessionEvent::End { ts_ms } => *ts_ms,
199            };
200            if first_ts.is_none() { first_ts = Some(ts); }
201            last_ts = Some(ts);
202
203            match e {
204                SessionEvent::PostModel { output, .. } => {
205                    s.model_calls += 1;
206                    s.input_tokens  += output.usage.input_tokens;
207                    s.output_tokens += output.usage.output_tokens;
208                }
209                SessionEvent::PreTool { .. } => s.tool_calls += 1,
210                SessionEvent::PostCompact { .. } => s.stages_run += 1,
211                SessionEvent::Heartbeat { iter, .. } => s.iters = s.iters.max(*iter + 1),
212                _ => {}
213            }
214        }
215        s.duration_ms = match (first_ts, last_ts) {
216            (Some(a), Some(b)) => b - a,
217            _ => 0,
218        };
219        s
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    fn sample_log() -> Vec<SessionEvent> {
228        vec![
229            SessionEvent::Start { ts_ms: 0, source: "Startup".into() },
230            SessionEvent::Heartbeat { ts_ms: 1, iter: 0 },
231            SessionEvent::PreModel { ts_ms: 2, history_len: 1, tools_count: 3 },
232            SessionEvent::PostModel { ts_ms: 100, output: ModelOutput {
233                text: Some("hi".into()),
234                tool_calls: Vec::new(),
235                usage: Default::default(),
236                stop_reason: harness_core::StopReason::EndTurn,
237                reasoning: None,
238            } },
239            SessionEvent::End { ts_ms: 110 },
240        ]
241    }
242
243    #[test]
244    fn stats_compute_correctly() {
245        let s = SessionStats::from(&sample_log());
246        assert_eq!(s.events, 5);
247        assert_eq!(s.model_calls, 1);
248        assert_eq!(s.iters, 1);
249        assert_eq!(s.duration_ms, 110);
250    }
251
252    #[test]
253    fn round_trip_via_serde() {
254        let original = sample_log();
255        let json: Vec<String> = original
256            .iter()
257            .map(|e| serde_json::to_string(e).unwrap())
258            .collect();
259        let parsed: Vec<SessionEvent> = json
260            .iter()
261            .map(|s| serde_json::from_str::<SessionEvent>(s).unwrap())
262            .collect();
263        assert_eq!(parsed.len(), original.len());
264        assert!(matches!(parsed[3], SessionEvent::PostModel { ref output, .. } if output.text.as_deref() == Some("hi")));
265    }
266}
267
268/// Tiny helper used by the CLI: convert a single event to a single line of
269/// pretty-printed text (does NOT include the timestamp prefix).
270pub fn format_event_short(e: &SessionEvent) -> String {
271    match e {
272        SessionEvent::Start { source, .. } => format!("session start ({source})"),
273        SessionEvent::Heartbeat { iter, .. } => format!("iter {iter}"),
274        SessionEvent::PreModel { history_len, tools_count, .. } => {
275            format!("→ model (history={history_len}, tools={tools_count})")
276        }
277        SessionEvent::PostModel { output, .. } => {
278            let calls = output.tool_calls.len();
279            let txt = output.text.as_deref().unwrap_or("").chars().take(60).collect::<String>();
280            if calls > 0 {
281                format!(
282                    "← model: {} tool_call(s) [{}/{} tok]",
283                    calls, output.usage.input_tokens, output.usage.output_tokens
284                )
285            } else {
286                format!(
287                    "← model: {:?} [{}/{} tok]",
288                    txt, output.usage.input_tokens, output.usage.output_tokens
289                )
290            }
291        }
292        SessionEvent::PreTool { action, .. } => format!("  → tool {} args={}", action.tool, action.args),
293        SessionEvent::PostTool { call_id, result, .. } => {
294            format!("  ← tool {} ok={}", call_id, result.ok)
295        }
296        SessionEvent::Sensor { id, signals, .. } => format!("  ⚑ sensor {id}: {signals} signal(s)"),
297        SessionEvent::PreCompact { stage, .. } => format!("  ⇩ pre-compact {stage:?}"),
298        SessionEvent::PostCompact { stage, .. } => format!("  ⇧ post-compact {stage:?}"),
299        SessionEvent::End { .. } => "session end".into(),
300    }
301}