Skip to main content

ai_memory/recover/parsers/
claude_code_jsonl.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! Claude Code transcript-JSONL parser. The transcript file is one
5//! JSON object per line; each object carries at least `timestamp`
6//! (ISO-8601 Z) and `type` (`user` / `assistant` / `tool_use` /
7//! `tool_result` / etc.) plus type-specific payload fields.
8//!
9//! This parser swallows per-line errors (a malformed line is a
10//! warning, not a fatal); the partial result is what
11//! `recover_from_transcript` writes. See the v0.7.0 #1389
12//! implementation slice §C2 for the verbatim line-shape reference
13//! and the surviving `f755c061-...jsonl` example dossier path.
14
15use crate::models::field_names;
16use std::fs::File;
17use std::io::{BufRead, BufReader};
18use std::path::Path;
19
20use serde_json::Value;
21use sha2::{Digest, Sha256};
22
23use super::{ParseError, ParsedTurn, ToolCallSummary, TranscriptParser, TurnRole};
24
25/// Zero-sized parser implementing [`TranscriptParser`] for the
26/// Claude Code transcript format.
27pub struct ClaudeCodeJsonlParser;
28
29impl TranscriptParser for ClaudeCodeJsonlParser {
30    fn parse(&self, path: &Path, since_iso: Option<&str>) -> Result<Vec<ParsedTurn>, ParseError> {
31        let f = File::open(path).map_err(|e| ParseError::Read(e.to_string()))?;
32        let reader = BufReader::new(f);
33        let mut turns = Vec::new();
34
35        for line_res in reader.lines() {
36            let Ok(line) = line_res else {
37                // Per the parser-trait contract, we swallow read
38                // errors and continue. SessionStart-hook integration
39                // can't tolerate a single bad line wedging recovery.
40                continue;
41            };
42            if line.trim().is_empty() {
43                continue;
44            }
45            let Ok(v) = serde_json::from_str::<Value>(&line) else {
46                continue;
47            };
48            let Some(parsed) = parse_one_turn(&v, &line) else {
49                continue;
50            };
51            if let Some(filter) = since_iso {
52                if parsed.timestamp_iso.as_str() < filter {
53                    continue;
54                }
55            }
56            turns.push(parsed);
57        }
58
59        Ok(turns)
60    }
61}
62
63/// Parse one JSONL line into a [`ParsedTurn`]. Returns `None` for
64/// line shapes we don't recognize (e.g., `permission-mode` toggles,
65/// `last-prompt` sentinels). The dedup-sha is computed from the
66/// verbatim line content so cross-version line-shape drift doesn't
67/// re-atomise already-stored turns.
68fn parse_one_turn(v: &Value, raw_line: &str) -> Option<ParsedTurn> {
69    let timestamp_iso = v.get("timestamp")?.as_str()?.to_string();
70    let type_tag = v.get("type")?.as_str()?;
71    let role = match type_tag {
72        "user" => TurnRole::User,
73        "assistant" => TurnRole::Assistant,
74        "tool_use" => TurnRole::ToolUse,
75        "tool_result" => TurnRole::ToolResult,
76        _ => TurnRole::Other,
77    };
78
79    let mut content_text = String::new();
80    let mut tool_calls = Vec::new();
81
82    // Claude Code transcripts carry the user/assistant text under
83    // `message.content`; that field is either a string (legacy) or
84    // an array of typed blocks (current).
85    if let Some(msg) = v.get("message") {
86        let content = msg.get("content");
87        match content {
88            Some(Value::String(s)) => content_text.push_str(s),
89            Some(Value::Array(blocks)) => {
90                for b in blocks {
91                    if let Some(t) = b.get("type").and_then(Value::as_str) {
92                        match t {
93                            "text" => {
94                                if let Some(s) = b.get("text").and_then(Value::as_str) {
95                                    if !content_text.is_empty() {
96                                        content_text.push('\n');
97                                    }
98                                    content_text.push_str(s);
99                                }
100                            }
101                            "tool_use" => {
102                                let tool = b
103                                    .get("name")
104                                    .and_then(Value::as_str)
105                                    .unwrap_or("?")
106                                    .to_string();
107                                let brief = tool_use_brief(b);
108                                tool_calls.push(ToolCallSummary { tool, brief });
109                            }
110                            _ => {}
111                        }
112                    }
113                }
114            }
115            _ => {}
116        }
117    }
118
119    // Some line shapes (esp. user-side wrapper events for
120    // tool_result) carry text directly under top-level `content`.
121    if content_text.is_empty() {
122        if let Some(s) = v.get("content").and_then(Value::as_str) {
123            content_text.push_str(s);
124        }
125    }
126
127    // If we have neither text nor tool calls, the line is not
128    // recovery-worthy (typically `last-prompt` or `permission-mode`
129    // sentinels). Return None to skip.
130    if content_text.is_empty() && tool_calls.is_empty() {
131        return None;
132    }
133
134    let line_sha256_hex = sha256_hex(raw_line);
135
136    // #1573 — surface the host session identifier so the dedup layer
137    // can key on `(host_session_id, host_turn_index)` when available.
138    // Claude Code JSONL carries `sessionId` per line but NO numeric
139    // turn counter, so `host_turn_index` stays `None` here (a line
140    // ordinal is not a substitute — see the `ParsedTurn` field doc).
141    let host_session_id = v
142        .get("sessionId")
143        .and_then(Value::as_str)
144        .map(ToString::to_string);
145
146    Some(ParsedTurn {
147        timestamp_iso,
148        role,
149        content_text,
150        tool_calls,
151        line_sha256_hex,
152        host_session_id,
153        host_turn_index: None,
154    })
155}
156
157/// Best-effort one-line brief for a tool-use payload. Picks the
158/// most informative field (`description` / `command` / `file_path`
159/// / first arg key) and truncates to 200 chars.
160fn tool_use_brief(b: &Value) -> String {
161    let input = b.get("input");
162    let pick = |key: &str| -> Option<String> {
163        input
164            .and_then(|i| i.get(key))
165            .and_then(Value::as_str)
166            .map(ToString::to_string)
167    };
168    let brief = pick(field_names::DESCRIPTION)
169        .or_else(|| pick("command"))
170        .or_else(|| pick("file_path"))
171        .or_else(|| pick("query"))
172        .or_else(|| {
173            input
174                .and_then(Value::as_object)
175                .and_then(|m| m.iter().next().map(|(k, v)| format!("{k}={v}")))
176        })
177        .unwrap_or_default();
178    truncate(&brief, 200)
179}
180
181fn truncate(s: &str, max: usize) -> String {
182    if s.len() <= max {
183        s.to_string()
184    } else {
185        let mut out = s.chars().take(max).collect::<String>();
186        out.push('…');
187        out
188    }
189}
190
191fn sha256_hex(input: &str) -> String {
192    let mut h = Sha256::new();
193    h.update(input.as_bytes());
194    format!("{:x}", h.finalize())
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use std::io::Write;
201
202    #[test]
203    fn parses_typed_user_text_block() {
204        let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":[{"type":"text","text":"hello"}]}}"#;
205        let v: Value = serde_json::from_str(line).unwrap();
206        let p = parse_one_turn(&v, line).unwrap();
207        assert_eq!(p.role, TurnRole::User);
208        assert_eq!(p.content_text, "hello");
209        assert_eq!(p.timestamp_iso, "2026-05-28T12:00:00Z");
210        assert!(p.tool_calls.is_empty());
211        assert_eq!(p.line_sha256_hex.len(), 64);
212    }
213
214    #[test]
215    fn parses_assistant_with_tool_use_blocks() {
216        let line = r#"{"timestamp":"2026-05-28T12:01:00Z","type":"assistant","message":{"content":[{"type":"text","text":"running command"},{"type":"tool_use","name":"Bash","input":{"command":"ls","description":"list files"}}]}}"#;
217        let v: Value = serde_json::from_str(line).unwrap();
218        let p = parse_one_turn(&v, line).unwrap();
219        assert_eq!(p.role, TurnRole::Assistant);
220        assert_eq!(p.content_text, "running command");
221        assert_eq!(p.tool_calls.len(), 1);
222        assert_eq!(p.tool_calls[0].tool, "Bash");
223        assert_eq!(p.tool_calls[0].brief, "list files");
224    }
225
226    #[test]
227    fn skips_sentinel_lines() {
228        // The `last-prompt` and `permission-mode` lines have neither
229        // text content nor tool_use blocks; recovery should skip
230        // them.
231        let line = r#"{"type":"last-prompt"}"#;
232        let v: Value = serde_json::from_str(line).unwrap();
233        assert!(parse_one_turn(&v, line).is_none());
234    }
235
236    #[test]
237    fn since_filter_excludes_earlier_lines() {
238        let mut f = tempfile::NamedTempFile::new().unwrap();
239        writeln!(
240            f,
241            r#"{{"timestamp":"2026-05-28T10:00:00Z","type":"user","message":{{"content":"a"}}}}"#
242        )
243        .unwrap();
244        writeln!(
245            f,
246            r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"b"}}}}"#
247        )
248        .unwrap();
249        let parser = ClaudeCodeJsonlParser;
250        let turns = parser
251            .parse(f.path(), Some("2026-05-28T11:00:00Z"))
252            .unwrap();
253        assert_eq!(turns.len(), 1);
254        assert_eq!(turns[0].content_text, "b");
255    }
256
257    #[test]
258    fn sha256_dedup_is_stable_for_same_line() {
259        let s = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"x"}}"#;
260        let a = sha256_hex(s);
261        let b = sha256_hex(s);
262        assert_eq!(a, b);
263        assert_eq!(a.len(), 64);
264    }
265
266    // Coverage uplift (2026-06-12): per-line skip branches, the
267    // tool_use_brief field-picking ladder, the truncate over-max arm,
268    // and the parse()-level malformed-line tolerance.
269
270    #[test]
271    fn parse_one_turn_requires_timestamp_and_type() {
272        let v: Value = serde_json::from_str(r#"{"type":"user"}"#).unwrap();
273        assert!(parse_one_turn(&v, "{}").is_none());
274        let v2: Value = serde_json::from_str(r#"{"timestamp":"2026-05-28T12:00:00Z"}"#).unwrap();
275        assert!(parse_one_turn(&v2, "{}").is_none());
276    }
277
278    #[test]
279    fn parse_one_turn_classifies_tool_roles_and_other() {
280        for (tag, want) in [
281            ("tool_use", TurnRole::ToolUse),
282            ("tool_result", TurnRole::ToolResult),
283            ("system", TurnRole::Other),
284        ] {
285            let line = format!(
286                r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"{tag}","message":{{"content":"body"}}}}"#
287            );
288            let v: Value = serde_json::from_str(&line).unwrap();
289            let p = parse_one_turn(&v, &line).unwrap();
290            assert_eq!(p.role, want, "tag {tag}");
291        }
292    }
293
294    #[test]
295    fn parse_one_turn_legacy_string_content_and_top_level_content() {
296        let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"legacy string"}}"#;
297        let v: Value = serde_json::from_str(line).unwrap();
298        assert_eq!(
299            parse_one_turn(&v, line).unwrap().content_text,
300            "legacy string"
301        );
302
303        let line2 = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"tool_result","content":"top level body"}"#;
304        let v2: Value = serde_json::from_str(line2).unwrap();
305        assert_eq!(
306            parse_one_turn(&v2, line2).unwrap().content_text,
307            "top level body"
308        );
309    }
310
311    #[test]
312    fn parse_one_turn_captures_session_id() {
313        let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","sessionId":"sess-xyz","message":{"content":"hi"}}"#;
314        let v: Value = serde_json::from_str(line).unwrap();
315        let p = parse_one_turn(&v, line).unwrap();
316        assert_eq!(p.host_session_id.as_deref(), Some("sess-xyz"));
317        assert!(p.host_turn_index.is_none());
318    }
319
320    #[test]
321    fn tool_use_brief_field_picking_ladder() {
322        let b = serde_json::json!({"name":"X","input":{"description":"d","command":"c"}});
323        assert_eq!(tool_use_brief(&b), "d");
324        let b = serde_json::json!({"name":"X","input":{"command":"ls -la"}});
325        assert_eq!(tool_use_brief(&b), "ls -la");
326        let b = serde_json::json!({"name":"Read","input":{"file_path":"/a/b.rs"}});
327        assert_eq!(tool_use_brief(&b), "/a/b.rs");
328        let b = serde_json::json!({"name":"Search","input":{"query":"needle"}});
329        assert_eq!(tool_use_brief(&b), "needle");
330        let b = serde_json::json!({"name":"Z","input":{"weird":"value"}});
331        assert_eq!(tool_use_brief(&b), "weird=\"value\"");
332        let b = serde_json::json!({"name":"Z"});
333        assert_eq!(tool_use_brief(&b), "");
334    }
335
336    #[test]
337    fn truncate_appends_ellipsis_over_max() {
338        assert_eq!(truncate("abc", 200), "abc");
339        let long: String = "x".repeat(250);
340        let out = truncate(&long, 200);
341        assert!(out.ends_with('…'));
342        assert_eq!(out.chars().count(), 201);
343    }
344
345    #[test]
346    fn parse_skips_blank_and_malformed_lines_but_keeps_good_ones() {
347        use std::io::Write;
348        let mut f = tempfile::NamedTempFile::new().unwrap();
349        writeln!(f).unwrap();
350        writeln!(f, "not json at all").unwrap();
351        writeln!(f, r#"{{"type":"last-prompt"}}"#).unwrap();
352        writeln!(
353            f,
354            r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"good"}}}}"#
355        )
356        .unwrap();
357        f.flush().unwrap();
358        let turns = ClaudeCodeJsonlParser.parse(f.path(), None).unwrap();
359        assert_eq!(turns.len(), 1, "only the well-formed content turn survives");
360        assert_eq!(turns[0].content_text, "good");
361    }
362
363    #[test]
364    fn parse_open_error_surfaces_read_error() {
365        let missing = std::path::Path::new("/nonexistent/dir/does-not-exist.jsonl");
366        let err = ClaudeCodeJsonlParser.parse(missing, None).unwrap_err();
367        assert!(matches!(err, ParseError::Read(_)));
368    }
369}