ai_memory/recover/parsers/
claude_code_jsonl.rs1use crate::models::field_names;
16use std::fs::File;
17use std::io::{BufRead, BufReader};
18use std::path::Path;
19
20use serde_json::Value;
21use sha2::{Digest, Sha256};
22
23use super::{ParseError, ParsedTurn, ToolCallSummary, TranscriptParser, TurnRole};
24
25pub struct ClaudeCodeJsonlParser;
28
29impl TranscriptParser for ClaudeCodeJsonlParser {
30 fn parse(&self, path: &Path, since_iso: Option<&str>) -> Result<Vec<ParsedTurn>, ParseError> {
31 let f = File::open(path).map_err(|e| ParseError::Read(e.to_string()))?;
32 let reader = BufReader::new(f);
33 let mut turns = Vec::new();
34
35 for line_res in reader.lines() {
36 let Ok(line) = line_res else {
37 continue;
41 };
42 if line.trim().is_empty() {
43 continue;
44 }
45 let Ok(v) = serde_json::from_str::<Value>(&line) else {
46 continue;
47 };
48 let Some(parsed) = parse_one_turn(&v, &line) else {
49 continue;
50 };
51 if let Some(filter) = since_iso {
52 if parsed.timestamp_iso.as_str() < filter {
53 continue;
54 }
55 }
56 turns.push(parsed);
57 }
58
59 Ok(turns)
60 }
61}
62
63fn parse_one_turn(v: &Value, raw_line: &str) -> Option<ParsedTurn> {
69 let timestamp_iso = v.get("timestamp")?.as_str()?.to_string();
70 let type_tag = v.get("type")?.as_str()?;
71 let role = match type_tag {
72 "user" => TurnRole::User,
73 "assistant" => TurnRole::Assistant,
74 "tool_use" => TurnRole::ToolUse,
75 "tool_result" => TurnRole::ToolResult,
76 _ => TurnRole::Other,
77 };
78
79 let mut content_text = String::new();
80 let mut tool_calls = Vec::new();
81
82 if let Some(msg) = v.get("message") {
86 let content = msg.get("content");
87 match content {
88 Some(Value::String(s)) => content_text.push_str(s),
89 Some(Value::Array(blocks)) => {
90 for b in blocks {
91 if let Some(t) = b.get("type").and_then(Value::as_str) {
92 match t {
93 "text" => {
94 if let Some(s) = b.get("text").and_then(Value::as_str) {
95 if !content_text.is_empty() {
96 content_text.push('\n');
97 }
98 content_text.push_str(s);
99 }
100 }
101 "tool_use" => {
102 let tool = b
103 .get("name")
104 .and_then(Value::as_str)
105 .unwrap_or("?")
106 .to_string();
107 let brief = tool_use_brief(b);
108 tool_calls.push(ToolCallSummary { tool, brief });
109 }
110 _ => {}
111 }
112 }
113 }
114 }
115 _ => {}
116 }
117 }
118
119 if content_text.is_empty() {
122 if let Some(s) = v.get("content").and_then(Value::as_str) {
123 content_text.push_str(s);
124 }
125 }
126
127 if content_text.is_empty() && tool_calls.is_empty() {
131 return None;
132 }
133
134 let line_sha256_hex = sha256_hex(raw_line);
135
136 let host_session_id = v
142 .get("sessionId")
143 .and_then(Value::as_str)
144 .map(ToString::to_string);
145
146 Some(ParsedTurn {
147 timestamp_iso,
148 role,
149 content_text,
150 tool_calls,
151 line_sha256_hex,
152 host_session_id,
153 host_turn_index: None,
154 })
155}
156
157fn tool_use_brief(b: &Value) -> String {
161 let input = b.get("input");
162 let pick = |key: &str| -> Option<String> {
163 input
164 .and_then(|i| i.get(key))
165 .and_then(Value::as_str)
166 .map(ToString::to_string)
167 };
168 let brief = pick(field_names::DESCRIPTION)
169 .or_else(|| pick("command"))
170 .or_else(|| pick("file_path"))
171 .or_else(|| pick("query"))
172 .or_else(|| {
173 input
174 .and_then(Value::as_object)
175 .and_then(|m| m.iter().next().map(|(k, v)| format!("{k}={v}")))
176 })
177 .unwrap_or_default();
178 truncate(&brief, 200)
179}
180
181fn truncate(s: &str, max: usize) -> String {
182 if s.len() <= max {
183 s.to_string()
184 } else {
185 let mut out = s.chars().take(max).collect::<String>();
186 out.push('…');
187 out
188 }
189}
190
191fn sha256_hex(input: &str) -> String {
192 let mut h = Sha256::new();
193 h.update(input.as_bytes());
194 format!("{:x}", h.finalize())
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use std::io::Write;
201
202 #[test]
203 fn parses_typed_user_text_block() {
204 let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":[{"type":"text","text":"hello"}]}}"#;
205 let v: Value = serde_json::from_str(line).unwrap();
206 let p = parse_one_turn(&v, line).unwrap();
207 assert_eq!(p.role, TurnRole::User);
208 assert_eq!(p.content_text, "hello");
209 assert_eq!(p.timestamp_iso, "2026-05-28T12:00:00Z");
210 assert!(p.tool_calls.is_empty());
211 assert_eq!(p.line_sha256_hex.len(), 64);
212 }
213
214 #[test]
215 fn parses_assistant_with_tool_use_blocks() {
216 let line = r#"{"timestamp":"2026-05-28T12:01:00Z","type":"assistant","message":{"content":[{"type":"text","text":"running command"},{"type":"tool_use","name":"Bash","input":{"command":"ls","description":"list files"}}]}}"#;
217 let v: Value = serde_json::from_str(line).unwrap();
218 let p = parse_one_turn(&v, line).unwrap();
219 assert_eq!(p.role, TurnRole::Assistant);
220 assert_eq!(p.content_text, "running command");
221 assert_eq!(p.tool_calls.len(), 1);
222 assert_eq!(p.tool_calls[0].tool, "Bash");
223 assert_eq!(p.tool_calls[0].brief, "list files");
224 }
225
226 #[test]
227 fn skips_sentinel_lines() {
228 let line = r#"{"type":"last-prompt"}"#;
232 let v: Value = serde_json::from_str(line).unwrap();
233 assert!(parse_one_turn(&v, line).is_none());
234 }
235
236 #[test]
237 fn since_filter_excludes_earlier_lines() {
238 let mut f = tempfile::NamedTempFile::new().unwrap();
239 writeln!(
240 f,
241 r#"{{"timestamp":"2026-05-28T10:00:00Z","type":"user","message":{{"content":"a"}}}}"#
242 )
243 .unwrap();
244 writeln!(
245 f,
246 r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"b"}}}}"#
247 )
248 .unwrap();
249 let parser = ClaudeCodeJsonlParser;
250 let turns = parser
251 .parse(f.path(), Some("2026-05-28T11:00:00Z"))
252 .unwrap();
253 assert_eq!(turns.len(), 1);
254 assert_eq!(turns[0].content_text, "b");
255 }
256
257 #[test]
258 fn sha256_dedup_is_stable_for_same_line() {
259 let s = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"x"}}"#;
260 let a = sha256_hex(s);
261 let b = sha256_hex(s);
262 assert_eq!(a, b);
263 assert_eq!(a.len(), 64);
264 }
265
266 #[test]
271 fn parse_one_turn_requires_timestamp_and_type() {
272 let v: Value = serde_json::from_str(r#"{"type":"user"}"#).unwrap();
273 assert!(parse_one_turn(&v, "{}").is_none());
274 let v2: Value = serde_json::from_str(r#"{"timestamp":"2026-05-28T12:00:00Z"}"#).unwrap();
275 assert!(parse_one_turn(&v2, "{}").is_none());
276 }
277
278 #[test]
279 fn parse_one_turn_classifies_tool_roles_and_other() {
280 for (tag, want) in [
281 ("tool_use", TurnRole::ToolUse),
282 ("tool_result", TurnRole::ToolResult),
283 ("system", TurnRole::Other),
284 ] {
285 let line = format!(
286 r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"{tag}","message":{{"content":"body"}}}}"#
287 );
288 let v: Value = serde_json::from_str(&line).unwrap();
289 let p = parse_one_turn(&v, &line).unwrap();
290 assert_eq!(p.role, want, "tag {tag}");
291 }
292 }
293
294 #[test]
295 fn parse_one_turn_legacy_string_content_and_top_level_content() {
296 let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"legacy string"}}"#;
297 let v: Value = serde_json::from_str(line).unwrap();
298 assert_eq!(
299 parse_one_turn(&v, line).unwrap().content_text,
300 "legacy string"
301 );
302
303 let line2 = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"tool_result","content":"top level body"}"#;
304 let v2: Value = serde_json::from_str(line2).unwrap();
305 assert_eq!(
306 parse_one_turn(&v2, line2).unwrap().content_text,
307 "top level body"
308 );
309 }
310
311 #[test]
312 fn parse_one_turn_captures_session_id() {
313 let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","sessionId":"sess-xyz","message":{"content":"hi"}}"#;
314 let v: Value = serde_json::from_str(line).unwrap();
315 let p = parse_one_turn(&v, line).unwrap();
316 assert_eq!(p.host_session_id.as_deref(), Some("sess-xyz"));
317 assert!(p.host_turn_index.is_none());
318 }
319
320 #[test]
321 fn tool_use_brief_field_picking_ladder() {
322 let b = serde_json::json!({"name":"X","input":{"description":"d","command":"c"}});
323 assert_eq!(tool_use_brief(&b), "d");
324 let b = serde_json::json!({"name":"X","input":{"command":"ls -la"}});
325 assert_eq!(tool_use_brief(&b), "ls -la");
326 let b = serde_json::json!({"name":"Read","input":{"file_path":"/a/b.rs"}});
327 assert_eq!(tool_use_brief(&b), "/a/b.rs");
328 let b = serde_json::json!({"name":"Search","input":{"query":"needle"}});
329 assert_eq!(tool_use_brief(&b), "needle");
330 let b = serde_json::json!({"name":"Z","input":{"weird":"value"}});
331 assert_eq!(tool_use_brief(&b), "weird=\"value\"");
332 let b = serde_json::json!({"name":"Z"});
333 assert_eq!(tool_use_brief(&b), "");
334 }
335
336 #[test]
337 fn truncate_appends_ellipsis_over_max() {
338 assert_eq!(truncate("abc", 200), "abc");
339 let long: String = "x".repeat(250);
340 let out = truncate(&long, 200);
341 assert!(out.ends_with('…'));
342 assert_eq!(out.chars().count(), 201);
343 }
344
345 #[test]
346 fn parse_skips_blank_and_malformed_lines_but_keeps_good_ones() {
347 use std::io::Write;
348 let mut f = tempfile::NamedTempFile::new().unwrap();
349 writeln!(f).unwrap();
350 writeln!(f, "not json at all").unwrap();
351 writeln!(f, r#"{{"type":"last-prompt"}}"#).unwrap();
352 writeln!(
353 f,
354 r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"good"}}}}"#
355 )
356 .unwrap();
357 f.flush().unwrap();
358 let turns = ClaudeCodeJsonlParser.parse(f.path(), None).unwrap();
359 assert_eq!(turns.len(), 1, "only the well-formed content turn survives");
360 assert_eq!(turns[0].content_text, "good");
361 }
362
363 #[test]
364 fn parse_open_error_surfaces_read_error() {
365 let missing = std::path::Path::new("/nonexistent/dir/does-not-exist.jsonl");
366 let err = ClaudeCodeJsonlParser.parse(missing, None).unwrap_err();
367 assert!(matches!(err, ParseError::Read(_)));
368 }
369}