tmai_core/transcript/
parser.rs

1//! JSONL line parser for Claude Code transcripts.
2//!
3//! Each line in the transcript is a JSON object with a `type` field
4//! indicating whether it's a user or assistant message.
5
6use super::types::TranscriptRecord;
7
8/// Parse a single JSONL line into TranscriptRecords.
9///
10/// Returns a Vec because one JSONL line (e.g. a user message with tool_result
11/// blocks) can produce multiple records.
12pub fn parse_jsonl_line(line: &str) -> Vec<TranscriptRecord> {
13    let line = line.trim();
14    if line.is_empty() {
15        return Vec::new();
16    }
17
18    let value: serde_json::Value = match serde_json::from_str(line) {
19        Ok(v) => v,
20        Err(_) => return Vec::new(),
21    };
22
23    // Extract top-level uuid and timestamp present on all records
24    let uuid = value.get("uuid").and_then(|v| v.as_str()).map(String::from);
25    let timestamp = value
26        .get("timestamp")
27        .and_then(|v| v.as_str())
28        .map(String::from);
29
30    let msg_type = match value.get("type").and_then(|t| t.as_str()) {
31        Some(t) => t,
32        None => return Vec::new(),
33    };
34
35    match msg_type {
36        "user" => parse_user_message(&value, uuid, timestamp),
37        "assistant" => parse_assistant_message(&value, uuid, timestamp),
38        _ => Vec::new(),
39    }
40}
41
42/// Parse a user message record.
43///
44/// User messages contain text content and may also contain tool_result blocks
45/// (tool execution results are sent back as part of user messages in the API).
46fn parse_user_message(
47    value: &serde_json::Value,
48    uuid: Option<String>,
49    timestamp: Option<String>,
50) -> Vec<TranscriptRecord> {
51    let message = match value.get("message") {
52        Some(m) => m,
53        None => return Vec::new(),
54    };
55    let content = match message.get("content") {
56        Some(c) => c,
57        None => return Vec::new(),
58    };
59
60    // Simple string content
61    if let Some(s) = content.as_str() {
62        if s.is_empty() {
63            return Vec::new();
64        }
65        return vec![TranscriptRecord::User {
66            text: s.to_string(),
67            uuid,
68            timestamp,
69        }];
70    }
71
72    // Array content: extract text blocks and tool_result blocks
73    let arr = match content.as_array() {
74        Some(a) => a,
75        None => return Vec::new(),
76    };
77
78    let mut records = Vec::new();
79    let mut text_parts = Vec::new();
80
81    for block in arr {
82        let block_type = match block.get("type").and_then(|t| t.as_str()) {
83            Some(t) => t,
84            None => continue,
85        };
86
87        match block_type {
88            "text" => {
89                if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
90                    if !text.is_empty() {
91                        text_parts.push(text.to_string());
92                    }
93                }
94            }
95            "tool_result" => {
96                let output = extract_tool_result_content(block);
97                if !output.is_empty() {
98                    let is_error = block.get("is_error").and_then(|v| v.as_bool());
99                    records.push(TranscriptRecord::ToolResult {
100                        output_summary: truncate_for_preview(&output, 200),
101                        is_error,
102                        uuid: uuid.clone(),
103                        timestamp: timestamp.clone(),
104                    });
105                }
106            }
107            _ => {}
108        }
109    }
110
111    if !text_parts.is_empty() {
112        // Insert user text before tool results
113        records.insert(
114            0,
115            TranscriptRecord::User {
116                text: text_parts.join("\n"),
117                uuid: uuid.clone(),
118                timestamp: timestamp.clone(),
119            },
120        );
121    }
122
123    records
124}
125
126/// Extract text content from a tool_result block
127fn extract_tool_result_content(block: &serde_json::Value) -> String {
128    let content = match block.get("content") {
129        Some(c) => c,
130        None => return String::new(),
131    };
132
133    if let Some(s) = content.as_str() {
134        return s.to_string();
135    }
136
137    if let Some(arr) = content.as_array() {
138        return arr
139            .iter()
140            .filter_map(|b| {
141                if b.get("type")?.as_str()? == "text" {
142                    b.get("text")?.as_str().map(|s| s.to_string())
143                } else {
144                    None
145                }
146            })
147            .collect::<Vec<_>>()
148            .join("\n");
149    }
150
151    String::new()
152}
153
154/// Parse an assistant message record.
155///
156/// Each content block becomes a separate TranscriptRecord:
157/// - "text" → AssistantText
158/// - "thinking" → Thinking
159/// - "tool_use" → ToolUse (with input_summary and input_full)
160fn parse_assistant_message(
161    value: &serde_json::Value,
162    uuid: Option<String>,
163    timestamp: Option<String>,
164) -> Vec<TranscriptRecord> {
165    let message = match value.get("message") {
166        Some(m) => m,
167        None => return Vec::new(),
168    };
169    let content = match message.get("content").and_then(|c| c.as_array()) {
170        Some(a) => a,
171        None => return Vec::new(),
172    };
173
174    let mut records = Vec::new();
175
176    for block in content {
177        let block_type = match block.get("type").and_then(|t| t.as_str()) {
178            Some(t) => t,
179            None => continue,
180        };
181
182        match block_type {
183            "text" => {
184                if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
185                    if !text.is_empty() {
186                        records.push(TranscriptRecord::AssistantText {
187                            text: text.to_string(),
188                            uuid: uuid.clone(),
189                            timestamp: timestamp.clone(),
190                        });
191                    }
192                }
193            }
194            "thinking" => {
195                if let Some(text) = block.get("thinking").and_then(|t| t.as_str()) {
196                    if !text.is_empty() {
197                        records.push(TranscriptRecord::Thinking {
198                            text: text.to_string(),
199                            uuid: uuid.clone(),
200                            timestamp: timestamp.clone(),
201                        });
202                    }
203                }
204            }
205            "tool_use" => {
206                let tool_name = block
207                    .get("name")
208                    .and_then(|n| n.as_str())
209                    .unwrap_or("Unknown")
210                    .to_string();
211                let input = block.get("input");
212                let input_summary = summarize_tool_input_json(&tool_name, input);
213                let input_full = input.cloned();
214                records.push(TranscriptRecord::ToolUse {
215                    tool_name,
216                    input_summary,
217                    input_full,
218                    uuid: uuid.clone(),
219                    timestamp: timestamp.clone(),
220                });
221            }
222            _ => {}
223        }
224    }
225
226    records
227}
228
229/// Summarize tool input JSON for display
230fn summarize_tool_input_json(tool_name: &str, input: Option<&serde_json::Value>) -> String {
231    let input = match input {
232        Some(v) => v,
233        None => return String::new(),
234    };
235
236    let key = match tool_name {
237        "Bash" => "command",
238        "Edit" | "Read" | "Write" => "file_path",
239        "Grep" => "pattern",
240        "Glob" => "pattern",
241        "Agent" => "description",
242        _ => "command",
243    };
244
245    input
246        .get(key)
247        .and_then(|v| v.as_str())
248        .map(|s| truncate_for_preview(s, 80))
249        .unwrap_or_default()
250}
251
252/// Truncate text for preview, keeping first line.
253/// Uses char-based counting to avoid panicking on multi-byte UTF-8 boundaries.
254fn truncate_for_preview(s: &str, max_len: usize) -> String {
255    let first_line = s.lines().next().unwrap_or(s);
256    let char_count = first_line.chars().count();
257    if char_count > max_len {
258        let truncated: String = first_line.chars().take(max_len).collect();
259        format!("{}...", truncated)
260    } else {
261        first_line.to_string()
262    }
263}
264
265/// Extract model ID from a transcript JSONL file.
266///
267/// Reads the first few lines looking for an assistant message with `message.model`.
268/// Returns the model ID string (e.g., "claude-opus-4-6").
269pub fn extract_model_id(path: &str) -> Option<String> {
270    let file = std::fs::File::open(path).ok()?;
271    let reader = std::io::BufReader::new(file);
272    // Only scan first 20 lines — model appears in first assistant message
273    for line in std::io::BufRead::lines(reader).take(20) {
274        let line = line.ok()?;
275        let value: serde_json::Value = serde_json::from_str(line.trim()).ok()?;
276        if value.get("type")?.as_str()? == "assistant" {
277            if let Some(model) = value
278                .get("message")
279                .and_then(|m| m.get("model"))
280                .and_then(|m| m.as_str())
281            {
282                return Some(model.to_string());
283            }
284        }
285    }
286    None
287}
288
289/// Convert a model ID to a short display name (e.g., "claude-opus-4-6" → "Opus 4.6")
290pub fn model_display_name(model_id: &str) -> String {
291    // Common model ID patterns
292    if model_id.contains("opus") {
293        if model_id.contains("4-6") {
294            "Opus 4.6".to_string()
295        } else if model_id.contains("4-5") {
296            "Opus 4.5".to_string()
297        } else {
298            "Opus".to_string()
299        }
300    } else if model_id.contains("sonnet") {
301        if model_id.contains("4-6") {
302            "Sonnet 4.6".to_string()
303        } else if model_id.contains("4-5") {
304            "Sonnet 4.5".to_string()
305        } else if model_id.contains("3-5") || model_id.contains("3.5") {
306            "Sonnet 3.5".to_string()
307        } else {
308            "Sonnet".to_string()
309        }
310    } else if model_id.contains("haiku") {
311        if model_id.contains("4-5") {
312            "Haiku 4.5".to_string()
313        } else {
314            "Haiku".to_string()
315        }
316    } else {
317        // Fallback: use last meaningful segment
318        model_id
319            .split(['/', '-'])
320            .next_back()
321            .unwrap_or(model_id)
322            .to_string()
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_parse_user_message_string() {
332        let line = r#"{"type":"user","uuid":"abc-123","timestamp":"2026-04-02T12:00:00Z","message":{"content":"Hello world"}}"#;
333        let records = parse_jsonl_line(line);
334        assert_eq!(records.len(), 1);
335        match &records[0] {
336            TranscriptRecord::User {
337                text,
338                uuid,
339                timestamp,
340            } => {
341                assert_eq!(text, "Hello world");
342                assert_eq!(uuid.as_deref(), Some("abc-123"));
343                assert_eq!(timestamp.as_deref(), Some("2026-04-02T12:00:00Z"));
344            }
345            _ => panic!("Expected User record"),
346        }
347    }
348
349    #[test]
350    fn test_parse_user_message_array() {
351        let line = r#"{"type":"user","message":{"content":[{"type":"text","text":"Hello"},{"type":"text","text":"World"}]}}"#;
352        let records = parse_jsonl_line(line);
353        assert_eq!(records.len(), 1);
354        match &records[0] {
355            TranscriptRecord::User { text, .. } => assert_eq!(text, "Hello\nWorld"),
356            _ => panic!("Expected User record"),
357        }
358    }
359
360    #[test]
361    fn test_parse_user_message_with_tool_result() {
362        let line = r#"{"type":"user","uuid":"u1","message":{"content":[{"type":"tool_result","tool_use_id":"tu1","content":"test output","is_error":false}]}}"#;
363        let records = parse_jsonl_line(line);
364        assert_eq!(records.len(), 1);
365        match &records[0] {
366            TranscriptRecord::ToolResult {
367                output_summary,
368                is_error,
369                ..
370            } => {
371                assert_eq!(output_summary, "test output");
372                assert_eq!(*is_error, Some(false));
373            }
374            _ => panic!("Expected ToolResult record"),
375        }
376    }
377
378    #[test]
379    fn test_parse_assistant_text() {
380        let line = r#"{"type":"assistant","uuid":"a1","timestamp":"2026-04-02T12:01:00Z","message":{"content":[{"type":"text","text":"I'll help you."}]}}"#;
381        let records = parse_jsonl_line(line);
382        assert_eq!(records.len(), 1);
383        match &records[0] {
384            TranscriptRecord::AssistantText {
385                text,
386                uuid,
387                timestamp,
388            } => {
389                assert_eq!(text, "I'll help you.");
390                assert_eq!(uuid.as_deref(), Some("a1"));
391                assert_eq!(timestamp.as_deref(), Some("2026-04-02T12:01:00Z"));
392            }
393            _ => panic!("Expected AssistantText record"),
394        }
395    }
396
397    #[test]
398    fn test_parse_assistant_tool_use() {
399        let line = r#"{"type":"assistant","uuid":"a2","message":{"content":[{"type":"tool_use","name":"Bash","input":{"command":"ls -la"}}]}}"#;
400        let records = parse_jsonl_line(line);
401        assert_eq!(records.len(), 1);
402        match &records[0] {
403            TranscriptRecord::ToolUse {
404                tool_name,
405                input_summary,
406                input_full,
407                ..
408            } => {
409                assert_eq!(tool_name, "Bash");
410                assert_eq!(input_summary, "ls -la");
411                assert!(input_full.is_some());
412                assert_eq!(
413                    input_full.as_ref().unwrap().get("command").unwrap(),
414                    "ls -la"
415                );
416            }
417            _ => panic!("Expected ToolUse record"),
418        }
419    }
420
421    #[test]
422    fn test_parse_thinking() {
423        let line = r#"{"type":"assistant","uuid":"a3","message":{"content":[{"type":"thinking","thinking":"Let me analyze this..."}]}}"#;
424        let records = parse_jsonl_line(line);
425        assert_eq!(records.len(), 1);
426        match &records[0] {
427            TranscriptRecord::Thinking { text, uuid, .. } => {
428                assert_eq!(text, "Let me analyze this...");
429                assert_eq!(uuid.as_deref(), Some("a3"));
430            }
431            _ => panic!("Expected Thinking record"),
432        }
433    }
434
435    #[test]
436    fn test_parse_empty_line() {
437        assert!(parse_jsonl_line("").is_empty());
438        assert!(parse_jsonl_line("  ").is_empty());
439    }
440
441    #[test]
442    fn test_parse_invalid_json() {
443        assert!(parse_jsonl_line("not json").is_empty());
444    }
445
446    #[test]
447    fn test_parse_unknown_type() {
448        let line = r#"{"type":"system","data":"info"}"#;
449        assert!(parse_jsonl_line(line).is_empty());
450    }
451
452    #[test]
453    fn test_parse_no_uuid_timestamp() {
454        let line = r#"{"type":"user","message":{"content":"Hi"}}"#;
455        let records = parse_jsonl_line(line);
456        assert_eq!(records.len(), 1);
457        match &records[0] {
458            TranscriptRecord::User {
459                uuid, timestamp, ..
460            } => {
461                assert!(uuid.is_none());
462                assert!(timestamp.is_none());
463            }
464            _ => panic!("Expected User record"),
465        }
466    }
467
468    #[test]
469    fn test_model_display_name() {
470        assert_eq!(model_display_name("claude-opus-4-6"), "Opus 4.6");
471        assert_eq!(model_display_name("claude-sonnet-4-6"), "Sonnet 4.6");
472        assert_eq!(
473            model_display_name("claude-sonnet-4-5-20250514"),
474            "Sonnet 4.5"
475        );
476        assert_eq!(model_display_name("claude-haiku-4-5-20251001"), "Haiku 4.5");
477        assert_eq!(model_display_name("claude-opus-4-5-20250918"), "Opus 4.5");
478        assert_eq!(
479            model_display_name("claude-3-5-sonnet-20241022"),
480            "Sonnet 3.5"
481        );
482        assert_eq!(model_display_name("gpt-4o"), "4o");
483    }
484
485    #[test]
486    fn test_extract_model_id_from_file() {
487        use std::io::Write;
488        let tmp = tempfile::NamedTempFile::new().unwrap();
489        let path = tmp.path().to_str().unwrap().to_string();
490        {
491            let mut f = std::fs::File::create(&path).unwrap();
492            writeln!(f, r#"{{"type":"user","message":{{"content":"hi"}}}}"#).unwrap();
493            writeln!(f, r#"{{"type":"assistant","message":{{"model":"claude-opus-4-6","content":[{{"type":"text","text":"hello"}}]}}}}"#).unwrap();
494        }
495        assert_eq!(extract_model_id(&path), Some("claude-opus-4-6".to_string()));
496    }
497}
tmai_core/transcript/parser.rs

tmai_core/transcript/
parser.rs