use crate::models::field_names;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use serde_json::Value;
use sha2::{Digest, Sha256};
use super::{ParseError, ParsedTurn, ToolCallSummary, TranscriptParser, TurnRole};
pub struct ClaudeCodeJsonlParser;
impl TranscriptParser for ClaudeCodeJsonlParser {
fn parse(&self, path: &Path, since_iso: Option<&str>) -> Result<Vec<ParsedTurn>, ParseError> {
let f = File::open(path).map_err(|e| ParseError::Read(e.to_string()))?;
let reader = BufReader::new(f);
let mut turns = Vec::new();
for line_res in reader.lines() {
let Ok(line) = line_res else {
continue;
};
if line.trim().is_empty() {
continue;
}
let Ok(v) = serde_json::from_str::<Value>(&line) else {
continue;
};
let Some(parsed) = parse_one_turn(&v, &line) else {
continue;
};
if let Some(filter) = since_iso {
if parsed.timestamp_iso.as_str() < filter {
continue;
}
}
turns.push(parsed);
}
Ok(turns)
}
}
fn parse_one_turn(v: &Value, raw_line: &str) -> Option<ParsedTurn> {
let timestamp_iso = v.get("timestamp")?.as_str()?.to_string();
let type_tag = v.get("type")?.as_str()?;
let role = match type_tag {
"user" => TurnRole::User,
"assistant" => TurnRole::Assistant,
"tool_use" => TurnRole::ToolUse,
"tool_result" => TurnRole::ToolResult,
_ => TurnRole::Other,
};
let mut content_text = String::new();
let mut tool_calls = Vec::new();
if let Some(msg) = v.get("message") {
let content = msg.get("content");
match content {
Some(Value::String(s)) => content_text.push_str(s),
Some(Value::Array(blocks)) => {
for b in blocks {
if let Some(t) = b.get("type").and_then(Value::as_str) {
match t {
"text" => {
if let Some(s) = b.get("text").and_then(Value::as_str) {
if !content_text.is_empty() {
content_text.push('\n');
}
content_text.push_str(s);
}
}
"tool_use" => {
let tool = b
.get("name")
.and_then(Value::as_str)
.unwrap_or("?")
.to_string();
let brief = tool_use_brief(b);
tool_calls.push(ToolCallSummary { tool, brief });
}
_ => {}
}
}
}
}
_ => {}
}
}
if content_text.is_empty() {
if let Some(s) = v.get("content").and_then(Value::as_str) {
content_text.push_str(s);
}
}
if content_text.is_empty() && tool_calls.is_empty() {
return None;
}
let line_sha256_hex = sha256_hex(raw_line);
let host_session_id = v
.get("sessionId")
.and_then(Value::as_str)
.map(ToString::to_string);
Some(ParsedTurn {
timestamp_iso,
role,
content_text,
tool_calls,
line_sha256_hex,
host_session_id,
host_turn_index: None,
})
}
fn tool_use_brief(b: &Value) -> String {
let input = b.get("input");
let pick = |key: &str| -> Option<String> {
input
.and_then(|i| i.get(key))
.and_then(Value::as_str)
.map(ToString::to_string)
};
let brief = pick(field_names::DESCRIPTION)
.or_else(|| pick("command"))
.or_else(|| pick("file_path"))
.or_else(|| pick("query"))
.or_else(|| {
input
.and_then(Value::as_object)
.and_then(|m| m.iter().next().map(|(k, v)| format!("{k}={v}")))
})
.unwrap_or_default();
truncate(&brief, 200)
}
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
let mut out = s.chars().take(max).collect::<String>();
out.push('…');
out
}
}
fn sha256_hex(input: &str) -> String {
let mut h = Sha256::new();
h.update(input.as_bytes());
format!("{:x}", h.finalize())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn parses_typed_user_text_block() {
let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":[{"type":"text","text":"hello"}]}}"#;
let v: Value = serde_json::from_str(line).unwrap();
let p = parse_one_turn(&v, line).unwrap();
assert_eq!(p.role, TurnRole::User);
assert_eq!(p.content_text, "hello");
assert_eq!(p.timestamp_iso, "2026-05-28T12:00:00Z");
assert!(p.tool_calls.is_empty());
assert_eq!(p.line_sha256_hex.len(), 64);
}
#[test]
fn parses_assistant_with_tool_use_blocks() {
let line = r#"{"timestamp":"2026-05-28T12:01:00Z","type":"assistant","message":{"content":[{"type":"text","text":"running command"},{"type":"tool_use","name":"Bash","input":{"command":"ls","description":"list files"}}]}}"#;
let v: Value = serde_json::from_str(line).unwrap();
let p = parse_one_turn(&v, line).unwrap();
assert_eq!(p.role, TurnRole::Assistant);
assert_eq!(p.content_text, "running command");
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].tool, "Bash");
assert_eq!(p.tool_calls[0].brief, "list files");
}
#[test]
fn skips_sentinel_lines() {
let line = r#"{"type":"last-prompt"}"#;
let v: Value = serde_json::from_str(line).unwrap();
assert!(parse_one_turn(&v, line).is_none());
}
#[test]
fn since_filter_excludes_earlier_lines() {
let mut f = tempfile::NamedTempFile::new().unwrap();
writeln!(
f,
r#"{{"timestamp":"2026-05-28T10:00:00Z","type":"user","message":{{"content":"a"}}}}"#
)
.unwrap();
writeln!(
f,
r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"b"}}}}"#
)
.unwrap();
let parser = ClaudeCodeJsonlParser;
let turns = parser
.parse(f.path(), Some("2026-05-28T11:00:00Z"))
.unwrap();
assert_eq!(turns.len(), 1);
assert_eq!(turns[0].content_text, "b");
}
#[test]
fn sha256_dedup_is_stable_for_same_line() {
let s = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"x"}}"#;
let a = sha256_hex(s);
let b = sha256_hex(s);
assert_eq!(a, b);
assert_eq!(a.len(), 64);
}
#[test]
fn parse_one_turn_requires_timestamp_and_type() {
let v: Value = serde_json::from_str(r#"{"type":"user"}"#).unwrap();
assert!(parse_one_turn(&v, "{}").is_none());
let v2: Value = serde_json::from_str(r#"{"timestamp":"2026-05-28T12:00:00Z"}"#).unwrap();
assert!(parse_one_turn(&v2, "{}").is_none());
}
#[test]
fn parse_one_turn_classifies_tool_roles_and_other() {
for (tag, want) in [
("tool_use", TurnRole::ToolUse),
("tool_result", TurnRole::ToolResult),
("system", TurnRole::Other),
] {
let line = format!(
r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"{tag}","message":{{"content":"body"}}}}"#
);
let v: Value = serde_json::from_str(&line).unwrap();
let p = parse_one_turn(&v, &line).unwrap();
assert_eq!(p.role, want, "tag {tag}");
}
}
#[test]
fn parse_one_turn_legacy_string_content_and_top_level_content() {
let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{"content":"legacy string"}}"#;
let v: Value = serde_json::from_str(line).unwrap();
assert_eq!(
parse_one_turn(&v, line).unwrap().content_text,
"legacy string"
);
let line2 = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"tool_result","content":"top level body"}"#;
let v2: Value = serde_json::from_str(line2).unwrap();
assert_eq!(
parse_one_turn(&v2, line2).unwrap().content_text,
"top level body"
);
}
#[test]
fn parse_one_turn_captures_session_id() {
let line = r#"{"timestamp":"2026-05-28T12:00:00Z","type":"user","sessionId":"sess-xyz","message":{"content":"hi"}}"#;
let v: Value = serde_json::from_str(line).unwrap();
let p = parse_one_turn(&v, line).unwrap();
assert_eq!(p.host_session_id.as_deref(), Some("sess-xyz"));
assert!(p.host_turn_index.is_none());
}
#[test]
fn tool_use_brief_field_picking_ladder() {
let b = serde_json::json!({"name":"X","input":{"description":"d","command":"c"}});
assert_eq!(tool_use_brief(&b), "d");
let b = serde_json::json!({"name":"X","input":{"command":"ls -la"}});
assert_eq!(tool_use_brief(&b), "ls -la");
let b = serde_json::json!({"name":"Read","input":{"file_path":"/a/b.rs"}});
assert_eq!(tool_use_brief(&b), "/a/b.rs");
let b = serde_json::json!({"name":"Search","input":{"query":"needle"}});
assert_eq!(tool_use_brief(&b), "needle");
let b = serde_json::json!({"name":"Z","input":{"weird":"value"}});
assert_eq!(tool_use_brief(&b), "weird=\"value\"");
let b = serde_json::json!({"name":"Z"});
assert_eq!(tool_use_brief(&b), "");
}
#[test]
fn truncate_appends_ellipsis_over_max() {
assert_eq!(truncate("abc", 200), "abc");
let long: String = "x".repeat(250);
let out = truncate(&long, 200);
assert!(out.ends_with('…'));
assert_eq!(out.chars().count(), 201);
}
#[test]
fn parse_skips_blank_and_malformed_lines_but_keeps_good_ones() {
use std::io::Write;
let mut f = tempfile::NamedTempFile::new().unwrap();
writeln!(f).unwrap();
writeln!(f, "not json at all").unwrap();
writeln!(f, r#"{{"type":"last-prompt"}}"#).unwrap();
writeln!(
f,
r#"{{"timestamp":"2026-05-28T12:00:00Z","type":"user","message":{{"content":"good"}}}}"#
)
.unwrap();
f.flush().unwrap();
let turns = ClaudeCodeJsonlParser.parse(f.path(), None).unwrap();
assert_eq!(turns.len(), 1, "only the well-formed content turn survives");
assert_eq!(turns[0].content_text, "good");
}
#[test]
fn parse_open_error_surfaces_read_error() {
let missing = std::path::Path::new("/nonexistent/dir/does-not-exist.jsonl");
let err = ClaudeCodeJsonlParser.parse(missing, None).unwrap_err();
assert!(matches!(err, ParseError::Read(_)));
}
}