Skip to main content

tmai_core/transcript/
parser.rs

1//! JSONL line parser for Claude Code transcripts.
2//!
3//! Each line in the transcript is a JSON object with a `type` field
4//! indicating whether it's a user or assistant message.
5
6use super::types::TranscriptRecord;
7
8/// Parse a single JSONL line into a TranscriptRecord
9pub fn parse_jsonl_line(line: &str) -> Option<TranscriptRecord> {
10    let line = line.trim();
11    if line.is_empty() {
12        return None;
13    }
14
15    let value: serde_json::Value = serde_json::from_str(line).ok()?;
16    let msg_type = value.get("type")?.as_str()?;
17
18    match msg_type {
19        "user" => parse_user_message(&value),
20        "assistant" => parse_assistant_message(&value),
21        "result" => parse_result_message(&value),
22        _ => None,
23    }
24}
25
26/// Parse a user message record
27fn parse_user_message(value: &serde_json::Value) -> Option<TranscriptRecord> {
28    // User messages have message.content as a string or array
29    let message = value.get("message")?;
30    let content = message.get("content")?;
31
32    let text = if let Some(s) = content.as_str() {
33        s.to_string()
34    } else if let Some(arr) = content.as_array() {
35        // Extract text from content blocks
36        arr.iter()
37            .filter_map(|block| {
38                if block.get("type")?.as_str()? == "text" {
39                    block.get("text")?.as_str().map(|s| s.to_string())
40                } else {
41                    None
42                }
43            })
44            .collect::<Vec<_>>()
45            .join("\n")
46    } else {
47        return None;
48    };
49
50    if text.is_empty() {
51        return None;
52    }
53
54    Some(TranscriptRecord::User { text })
55}
56
57/// Parse an assistant message record
58///
59/// Returns multiple records: text blocks and tool_use blocks
60fn parse_assistant_message(value: &serde_json::Value) -> Option<TranscriptRecord> {
61    let message = value.get("message")?;
62    let content = message.get("content")?;
63
64    if let Some(arr) = content.as_array() {
65        let mut text_parts = Vec::new();
66
67        for block in arr {
68            let block_type = match block.get("type").and_then(|t| t.as_str()) {
69                Some(t) => t,
70                None => continue,
71            };
72
73            match block_type {
74                "text" => {
75                    if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
76                        if !text.is_empty() {
77                            text_parts.push(text.to_string());
78                        }
79                    }
80                }
81                "tool_use" => {
82                    // We could emit ToolUse records here but for simplicity
83                    // we'll capture them in the text representation
84                    let tool_name = block
85                        .get("name")
86                        .and_then(|n| n.as_str())
87                        .unwrap_or("Unknown");
88                    let input_summary = summarize_tool_input_json(tool_name, block.get("input"));
89                    text_parts.push(format!("[⚙ {}: {}]", tool_name, input_summary));
90                }
91                "thinking" => {
92                    // Skip thinking blocks
93                }
94                _ => {}
95            }
96        }
97
98        if text_parts.is_empty() {
99            return None;
100        }
101
102        Some(TranscriptRecord::AssistantText {
103            text: text_parts.join("\n"),
104        })
105    } else {
106        None
107    }
108}
109
110/// Parse a result message (tool result)
111fn parse_result_message(value: &serde_json::Value) -> Option<TranscriptRecord> {
112    let result = value.get("result")?;
113    let output = if let Some(s) = result.as_str() {
114        s.to_string()
115    } else if let Some(arr) = result.as_array() {
116        arr.iter()
117            .filter_map(|block| {
118                if block.get("type")?.as_str()? == "text" {
119                    block.get("text")?.as_str().map(|s| s.to_string())
120                } else {
121                    None
122                }
123            })
124            .collect::<Vec<_>>()
125            .join("\n")
126    } else {
127        return None;
128    };
129
130    if output.is_empty() {
131        return None;
132    }
133
134    Some(TranscriptRecord::ToolResult {
135        output_summary: truncate_for_preview(&output, 200),
136    })
137}
138
139/// Summarize tool input JSON for display
140fn summarize_tool_input_json(tool_name: &str, input: Option<&serde_json::Value>) -> String {
141    let input = match input {
142        Some(v) => v,
143        None => return String::new(),
144    };
145
146    let key = match tool_name {
147        "Bash" => "command",
148        "Edit" | "Read" | "Write" => "file_path",
149        "Grep" => "pattern",
150        "Glob" => "pattern",
151        "Agent" => "description",
152        _ => "command",
153    };
154
155    input
156        .get(key)
157        .and_then(|v| v.as_str())
158        .map(|s| truncate_for_preview(s, 80))
159        .unwrap_or_default()
160}
161
162/// Truncate text for preview, keeping first line.
163/// Uses char-based counting to avoid panicking on multi-byte UTF-8 boundaries.
164fn truncate_for_preview(s: &str, max_len: usize) -> String {
165    let first_line = s.lines().next().unwrap_or(s);
166    let char_count = first_line.chars().count();
167    if char_count > max_len {
168        let truncated: String = first_line.chars().take(max_len).collect();
169        format!("{}...", truncated)
170    } else {
171        first_line.to_string()
172    }
173}
174
175/// Extract model ID from a transcript JSONL file.
176///
177/// Reads the first few lines looking for an assistant message with `message.model`.
178/// Returns the model ID string (e.g., "claude-opus-4-6").
179pub fn extract_model_id(path: &str) -> Option<String> {
180    let file = std::fs::File::open(path).ok()?;
181    let reader = std::io::BufReader::new(file);
182    // Only scan first 20 lines — model appears in first assistant message
183    for line in std::io::BufRead::lines(reader).take(20) {
184        let line = line.ok()?;
185        let value: serde_json::Value = serde_json::from_str(line.trim()).ok()?;
186        if value.get("type")?.as_str()? == "assistant" {
187            if let Some(model) = value
188                .get("message")
189                .and_then(|m| m.get("model"))
190                .and_then(|m| m.as_str())
191            {
192                return Some(model.to_string());
193            }
194        }
195    }
196    None
197}
198
199/// Convert a model ID to a short display name (e.g., "claude-opus-4-6" → "Opus 4.6")
200pub fn model_display_name(model_id: &str) -> String {
201    // Common model ID patterns
202    if model_id.contains("opus") {
203        if model_id.contains("4-6") {
204            "Opus 4.6".to_string()
205        } else if model_id.contains("4-5") {
206            "Opus 4.5".to_string()
207        } else {
208            "Opus".to_string()
209        }
210    } else if model_id.contains("sonnet") {
211        if model_id.contains("4-6") {
212            "Sonnet 4.6".to_string()
213        } else if model_id.contains("4-5") {
214            "Sonnet 4.5".to_string()
215        } else if model_id.contains("3-5") || model_id.contains("3.5") {
216            "Sonnet 3.5".to_string()
217        } else {
218            "Sonnet".to_string()
219        }
220    } else if model_id.contains("haiku") {
221        if model_id.contains("4-5") {
222            "Haiku 4.5".to_string()
223        } else {
224            "Haiku".to_string()
225        }
226    } else {
227        // Fallback: use last meaningful segment
228        model_id
229            .split(['/', '-'])
230            .next_back()
231            .unwrap_or(model_id)
232            .to_string()
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    #[test]
241    fn test_parse_user_message_string() {
242        let line = r#"{"type":"user","message":{"content":"Hello world"}}"#;
243        let record = parse_jsonl_line(line).unwrap();
244        match record {
245            TranscriptRecord::User { text } => assert_eq!(text, "Hello world"),
246            _ => panic!("Expected User record"),
247        }
248    }
249
250    #[test]
251    fn test_parse_user_message_array() {
252        let line = r#"{"type":"user","message":{"content":[{"type":"text","text":"Hello"},{"type":"text","text":"World"}]}}"#;
253        let record = parse_jsonl_line(line).unwrap();
254        match record {
255            TranscriptRecord::User { text } => assert_eq!(text, "Hello\nWorld"),
256            _ => panic!("Expected User record"),
257        }
258    }
259
260    #[test]
261    fn test_parse_assistant_text() {
262        let line = r#"{"type":"assistant","message":{"content":[{"type":"text","text":"I'll help you."}]}}"#;
263        let record = parse_jsonl_line(line).unwrap();
264        match record {
265            TranscriptRecord::AssistantText { text } => assert_eq!(text, "I'll help you."),
266            _ => panic!("Expected AssistantText record"),
267        }
268    }
269
270    #[test]
271    fn test_parse_assistant_with_tool_use() {
272        let line = r#"{"type":"assistant","message":{"content":[{"type":"text","text":"Let me check."},{"type":"tool_use","name":"Bash","input":{"command":"ls"}}]}}"#;
273        let record = parse_jsonl_line(line).unwrap();
274        match record {
275            TranscriptRecord::AssistantText { text } => {
276                assert!(text.contains("Let me check."));
277                assert!(text.contains("⚙ Bash: ls"));
278            }
279            _ => panic!("Expected AssistantText record"),
280        }
281    }
282
283    #[test]
284    fn test_parse_thinking_skipped() {
285        let line = r#"{"type":"assistant","message":{"content":[{"type":"thinking","text":"Hmm..."},{"type":"text","text":"Result"}]}}"#;
286        let record = parse_jsonl_line(line).unwrap();
287        match record {
288            TranscriptRecord::AssistantText { text } => {
289                assert!(!text.contains("Hmm"));
290                assert_eq!(text, "Result");
291            }
292            _ => panic!("Expected AssistantText record"),
293        }
294    }
295
296    #[test]
297    fn test_parse_result_message() {
298        let line = r#"{"type":"result","result":"test output here"}"#;
299        let record = parse_jsonl_line(line).unwrap();
300        match record {
301            TranscriptRecord::ToolResult { output_summary } => {
302                assert_eq!(output_summary, "test output here");
303            }
304            _ => panic!("Expected ToolResult record"),
305        }
306    }
307
308    #[test]
309    fn test_parse_empty_line() {
310        assert!(parse_jsonl_line("").is_none());
311        assert!(parse_jsonl_line("  ").is_none());
312    }
313
314    #[test]
315    fn test_parse_invalid_json() {
316        assert!(parse_jsonl_line("not json").is_none());
317    }
318
319    #[test]
320    fn test_parse_unknown_type() {
321        let line = r#"{"type":"system","data":"info"}"#;
322        assert!(parse_jsonl_line(line).is_none());
323    }
324
325    #[test]
326    fn test_model_display_name() {
327        assert_eq!(model_display_name("claude-opus-4-6"), "Opus 4.6");
328        assert_eq!(model_display_name("claude-sonnet-4-6"), "Sonnet 4.6");
329        assert_eq!(
330            model_display_name("claude-sonnet-4-5-20250514"),
331            "Sonnet 4.5"
332        );
333        assert_eq!(model_display_name("claude-haiku-4-5-20251001"), "Haiku 4.5");
334        assert_eq!(model_display_name("claude-opus-4-5-20250918"), "Opus 4.5");
335        assert_eq!(
336            model_display_name("claude-3-5-sonnet-20241022"),
337            "Sonnet 3.5"
338        );
339        assert_eq!(model_display_name("gpt-4o"), "4o");
340    }
341
342    #[test]
343    fn test_extract_model_id_from_file() {
344        use std::io::Write;
345        let tmp = tempfile::NamedTempFile::new().unwrap();
346        let path = tmp.path().to_str().unwrap().to_string();
347        {
348            let mut f = std::fs::File::create(&path).unwrap();
349            writeln!(f, r#"{{"type":"user","message":{{"content":"hi"}}}}"#).unwrap();
350            writeln!(f, r#"{{"type":"assistant","message":{{"model":"claude-opus-4-6","content":[{{"type":"text","text":"hello"}}]}}}}"#).unwrap();
351        }
352        assert_eq!(extract_model_id(&path), Some("claude-opus-4-6".to_string()));
353    }
354}