hindsight/parser/
transcript.rs

1//! JSONL transcript parser implementation
2//!
3//! Parses Claude Code transcript files (JSONL format) into structured data.
4//! Follows Rust best practices:
5//! - Uses Result<T, E> for error handling
6//! - Borrows where possible to avoid clones
7//! - Iterator-based for memory efficiency
8
9use super::models::{ContentBlock, ExecutionNode, MessageContent, NodeType, Session, TokenUsage};
10use crate::error::{HindsightError, Result};
11use std::fs::File;
12use std::io::{BufRead, BufReader};
13use std::path::Path;
14
15/// Parse a Claude Code JSONL transcript file into a Session
16///
17/// # Arguments
18///
19/// * `path` - Path to the .jsonl transcript file
20///
21/// # Returns
22///
23/// Returns a `Session` containing all parsed execution nodes with metadata.
24///
25/// # Errors
26///
27/// Returns `HindsightError` if:
28/// - File cannot be read
29/// - JSONL format is invalid
30/// - JSON parsing fails
31///
32/// # Example
33///
34/// ```ignore
35/// use hindsight::parser::parse_session;
36/// use std::path::Path;
37///
38/// let session = parse_session(Path::new("session.jsonl"))?;
39/// println!("Found {} tools", session.total_tools);
40/// ```
41/// Parse all subagent JSONL files associated with a session.
42///
43/// Subagent files live at `<parent>/<session_stem>/subagents/*.jsonl`.
44/// Each subagent may use a different model (e.g., haiku spawned from a sonnet session).
45pub fn parse_subagents(session_path: &Path) -> Vec<super::models::Session> {
46    let session_id = session_path.file_stem().unwrap_or_default().to_string_lossy();
47    let subagent_dir = session_path
48        .parent()
49        .map(|p| p.join(session_id.as_ref()).join("subagents"));
50
51    let Some(dir) = subagent_dir else {
52        return vec![];
53    };
54    if !dir.exists() {
55        return vec![];
56    }
57
58    std::fs::read_dir(&dir)
59        .into_iter()
60        .flatten()
61        .filter_map(|e| e.ok())
62        .filter(|e| e.path().extension().is_some_and(|x| x == "jsonl"))
63        .filter_map(|e| parse_session(&e.path()).ok())
64        .collect()
65}
66
67pub fn parse_session(path: &Path) -> Result<Session> {
68    let file = File::open(path)?;
69
70    // Estimate initial capacity from file size to reduce reallocations
71    let file_metadata = file.metadata()?;
72    let file_size = file_metadata.len();
73    let estimated_lines = (file_size / 500).max(100) as usize; // ~500 bytes per line
74
75    let reader = BufReader::new(file);
76    let mut raw_nodes = Vec::with_capacity(estimated_lines);
77
78    // Parse JSONL line by line
79    for (line_num, line_result) in reader.lines().enumerate() {
80        let line = line_result?;
81
82        // Skip empty lines
83        if line.trim().is_empty() {
84            continue;
85        }
86
87        // Parse JSON line into ExecutionNode
88        match serde_json::from_str::<ExecutionNode>(&line) {
89            Ok(node) => {
90                raw_nodes.push(node);
91            }
92            Err(e) => {
93                return Err(HindsightError::JsonParse {
94                    line: line_num + 1,
95                    message: e.to_string(),
96                });
97            }
98        }
99    }
100
101    // ── SSE message merging ────────────────────────────────────────────────────
102    // Claude Code writes each content block of a response as a separate JSONL
103    // line, all sharing the same message.id. Blocks are distinct and ordered:
104    //   thinking → text → tool_use (one or more)
105    // We accumulate all blocks for a given message.id into a single node, and
106    // take the last token-usage record (which has cumulative counts).
107    let mut merged: Vec<ExecutionNode> = Vec::with_capacity(raw_nodes.len());
108    let mut current_id: Option<String> = None;
109    let mut current_base: Option<ExecutionNode> = None;
110    let mut current_content: Vec<ContentBlock> = Vec::new();
111    let mut current_usage: Option<TokenUsage> = None;
112
113    for node in raw_nodes {
114        match extract_message_id(&node) {
115            Some(id) if current_id.as_deref() == Some(id) => {
116                // Same message — ACCUMULATE blocks (each line is a distinct block)
117                // and keep the last token-usage (cumulative counts)
118                let new_blocks = extract_blocks(&node);
119                if !new_blocks.is_empty() {
120                    current_content.extend(new_blocks);
121                }
122                if let Some(tu) = node.effective_token_usage() {
123                    match current_usage.as_mut() {
124                        Some(existing) => existing.merge_last(tu),
125                        None => current_usage = Some(tu.clone()),
126                    }
127                }
128            }
129            Some(id) => {
130                // New message.id — flush previous accumulator
131                if let Some(base) = current_base.take() {
132                    merged.push(finalize_sse(base, current_content, current_usage));
133                }
134                current_id = Some(id.to_string());
135                current_content = extract_blocks(&node);
136                current_usage = node.effective_token_usage().cloned();
137                current_base = Some(node);
138            }
139            None => {
140                // No message.id (tool results, progress, system) — flush and pass through
141                if let Some(base) = current_base.take() {
142                    merged.push(finalize_sse(base, current_content, current_usage));
143                    current_id = None;
144                    current_content = Vec::new();
145                    current_usage = None;
146                }
147                merged.push(node);
148            }
149        }
150    }
151    // Flush final accumulator
152    if let Some(base) = current_base.take() {
153        merged.push(finalize_sse(base, current_content, current_usage));
154    }
155
156    let merged = merged;
157    // ── end SSE deduplication ─────────────────────────────────────────────────
158
159    // ── Progress node deduplication ───────────────────────────────────────────
160    // Claude Code emits one progress frame per SSE tick for each running tool
161    // (agent, bash, hook). All frames for the same invocation share the same
162    // `toolUseID` but differ only by uuid/timestamp. Keep only the last frame
163    // per toolUseID — it has the most complete output/elapsed time.
164    let nodes = dedup_progress_by_tool_use_id(merged);
165    // ── end progress deduplication ────────────────────────────────────────────
166
167    // Extract session ID from filename or first node
168    let session_id = extract_session_id(path)?;
169
170    // Get absolute path for display - try canonicalize, fallback to as-is
171    let file_path = path
172        .canonicalize()
173        .ok()
174        .and_then(|p| p.to_str().map(String::from))
175        .or_else(|| path.to_str().map(String::from));
176
177    Ok(Session::new(session_id, file_path, nodes))
178}
179
180/// Extract session ID from file path or content
181fn extract_session_id(path: &Path) -> Result<String> {
182    // Try to get session ID from filename
183    if let Some(file_name) = path.file_stem() {
184        if let Some(name) = file_name.to_str() {
185            return Ok(name.to_string());
186        }
187    }
188
189    Err(HindsightError::InvalidSession(
190        "Could not extract session ID from path".to_string(),
191    ))
192}
193
194// ── SSE helpers ───────────────────────────────────────────────────────────────
195
196fn extract_message_id(node: &ExecutionNode) -> Option<&str> {
197    node.message.as_ref()?.id.as_deref()
198}
199
200fn extract_blocks(node: &ExecutionNode) -> Vec<ContentBlock> {
201    node.message
202        .as_ref()
203        .and_then(|m| m.content.as_ref())
204        .map(|c| match c {
205            MessageContent::Blocks(b) => b.clone(),
206            MessageContent::Text(_) => vec![],
207        })
208        .unwrap_or_default()
209}
210
211fn finalize_sse(
212    mut base: ExecutionNode,
213    content: Vec<ContentBlock>,
214    token_usage: Option<TokenUsage>,
215) -> ExecutionNode {
216    if let Some(ref mut msg) = base.message {
217        if !content.is_empty() {
218            msg.content = Some(MessageContent::Blocks(content));
219        }
220    }
221    base.token_usage = token_usage;
222    base
223}
224
225/// Extract the top-level `toolUseID` field from a node's flattened extra map.
226fn extract_tool_use_id(node: &ExecutionNode) -> Option<String> {
227    node.extra
228        .as_ref()
229        .and_then(|e| e.get("toolUseID"))
230        .and_then(|v| v.as_str())
231        .map(str::to_string)
232}
233
234/// Deduplicate progress nodes by `toolUseID`, keeping the LAST frame.
235///
236/// For each running tool (agent, bash, hook), Claude Code writes one progress
237/// node per SSE tick. All frames share the same `toolUseID` but differ by uuid.
238/// We keep the LAST frame because it carries the most complete data (highest
239/// elapsed time, fullest output, final exit code).
240///
241/// **Why LAST?** SSE progress frames are cumulative — each tick overwrites
242/// the previous output. The final frame has the complete picture.
243///
244/// See also: `dedup_agent_progress_by_agent_id` in `simple_tree.rs` which
245/// does the opposite (keeps FIRST) for a different reason.
246fn dedup_progress_by_tool_use_id(nodes: Vec<ExecutionNode>) -> Vec<ExecutionNode> {
247    use std::collections::HashMap;
248
249    // Record the index of the last occurrence of each toolUseID in a progress node.
250    let mut last_idx: HashMap<String, usize> = HashMap::new();
251    for (i, node) in nodes.iter().enumerate() {
252        if node.node_type == NodeType::Progress {
253            if let Some(id) = extract_tool_use_id(node) {
254                last_idx.insert(id, i);
255            }
256        }
257    }
258
259    // Retain non-progress nodes unchanged; for progress nodes keep only the last frame.
260    nodes
261        .into_iter()
262        .enumerate()
263        .filter(|(i, node)| {
264            if node.node_type == NodeType::Progress {
265                if let Some(id) = extract_tool_use_id(node) {
266                    return last_idx.get(&id) == Some(i);
267                }
268            }
269            true
270        })
271        .map(|(_, node)| node)
272        .collect()
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use crate::parser::models::{Message, NodeType, TokenUsage};
279    use std::collections::HashMap;
280    use std::io::Write;
281    use tempfile::NamedTempFile;
282
283    #[test]
284    fn test_parse_empty_file() {
285        let file = NamedTempFile::new().unwrap();
286
287        let session = parse_session(file.path()).unwrap();
288        assert_eq!(session.nodes.len(), 0);
289    }
290
291    #[test]
292    fn test_parse_user_message() {
293        let mut file = NamedTempFile::new().unwrap();
294        writeln!(file, r#"{{"type":"user","message":{{"content":"Hello"}}}}"#).unwrap();
295
296        let session = parse_session(file.path()).unwrap();
297        assert_eq!(session.nodes.len(), 1);
298        assert_eq!(session.nodes[0].node_type, NodeType::User);
299    }
300
301    #[test]
302    fn test_parse_tool_use() {
303        // Real Claude Code format: tool calls are ContentBlock::ToolUse inside
304        // an assistant message, not a top-level tool_use field.
305        let mut file = NamedTempFile::new().unwrap();
306        writeln!(
307            file,
308            r#"{{"type":"assistant","message":{{"role":"assistant","id":"msg-1","content":[{{"type":"tool_use","id":"tu-1","name":"Read","input":{{"file_path":"test.txt"}}}}]}}}}"#
309        )
310        .unwrap();
311
312        let session = parse_session(file.path()).unwrap();
313        assert_eq!(session.nodes.len(), 1);
314        // Tool use is in message content blocks
315        let blocks = session.nodes[0].message.as_ref().unwrap().content_blocks();
316        assert_eq!(blocks.len(), 1);
317        assert!(matches!(blocks[0], super::super::models::ContentBlock::ToolUse { ref name, .. } if name == "Read"));
318        assert_eq!(session.total_tools, 1);
319    }
320
321    #[test]
322    fn test_invalid_json() {
323        let mut file = NamedTempFile::new().unwrap();
324        writeln!(file, "{{invalid json").unwrap();
325
326        let result = parse_session(file.path());
327        assert!(result.is_err());
328    }
329
330    // ── SSE deduplication tests ───────────────────────────────────────────────
331
332    fn make_assistant_node(id: &str, text: &str, tokens_out: i64) -> ExecutionNode {
333        ExecutionNode {
334            uuid: Some(format!("uuid-{}", id)),
335            parent_uuid: None,
336            timestamp: Some(1000),
337            node_type: NodeType::Assistant,
338            is_sidechain: None,
339            session_id: None,
340            cwd: None,
341            message: Some(Message {
342                id: Some(id.to_string()),
343                role: Some("assistant".to_string()),
344                model: None,
345                content: Some(MessageContent::Blocks(vec![ContentBlock::Text {
346                    text: text.to_string(),
347                }])),
348                usage: None,
349                extra: HashMap::new(),
350            }),
351            tool_use: None,
352            tool_result: None,
353            tool_use_result: None,
354            thinking: None,
355            progress: None,
356            token_usage: Some(TokenUsage {
357                input_tokens: Some(100),
358                output_tokens: Some(tokens_out),
359                cache_creation_input_tokens: None,
360                cache_read_input_tokens: None,
361            }),
362            extra: None,
363        }
364    }
365
366    fn make_tool_node() -> ExecutionNode {
367        ExecutionNode {
368            uuid: Some("uuid-tool".to_string()),
369            parent_uuid: None,
370            timestamp: Some(2000),
371            node_type: NodeType::Unknown,
372            is_sidechain: None,
373            session_id: None,
374            cwd: None,
375            message: None,
376            tool_use: None,
377            tool_result: None,
378            tool_use_result: None,
379            thinking: None,
380            progress: None,
381            token_usage: None,
382            extra: None,
383        }
384    }
385
386    #[test]
387    fn test_sse_deduplication_single_message_id_produces_one_node() {
388        let mut file = NamedTempFile::new().unwrap();
389
390        // Two SSE frames with same message id
391        let node1 = make_assistant_node("msg-abc", "partial", 10);
392        let node2 = make_assistant_node("msg-abc", "full text here", 20);
393
394        writeln!(file, "{}", serde_json::to_string(&node1).unwrap()).unwrap();
395        writeln!(file, "{}", serde_json::to_string(&node2).unwrap()).unwrap();
396
397        let session = parse_session(file.path()).unwrap();
398
399        // Should be deduplicated to 1 node
400        assert_eq!(session.nodes.len(), 1);
401    }
402
403    #[test]
404    fn test_sse_deduplication_two_message_ids_produce_two_nodes() {
405        let mut file = NamedTempFile::new().unwrap();
406
407        let node1 = make_assistant_node("msg-aaa", "first message", 10);
408        let node2 = make_assistant_node("msg-bbb", "second message", 20);
409
410        writeln!(file, "{}", serde_json::to_string(&node1).unwrap()).unwrap();
411        writeln!(file, "{}", serde_json::to_string(&node2).unwrap()).unwrap();
412
413        let session = parse_session(file.path()).unwrap();
414        assert_eq!(session.nodes.len(), 2);
415    }
416
417    #[test]
418    fn test_sse_deduplication_token_usage_takes_last_cumulative_value() {
419        let mut file = NamedTempFile::new().unwrap();
420
421        // SSE sends cumulative tokens — last frame has the final total
422        let node1 = make_assistant_node("msg-xyz", "partial", 10);
423        let node2 = make_assistant_node("msg-xyz", "complete response", 50);
424
425        writeln!(file, "{}", serde_json::to_string(&node1).unwrap()).unwrap();
426        writeln!(file, "{}", serde_json::to_string(&node2).unwrap()).unwrap();
427
428        let session = parse_session(file.path()).unwrap();
429        assert_eq!(session.nodes.len(), 1);
430
431        // Token usage should reflect the LAST SSE frame
432        let usage = session.nodes[0].token_usage.as_ref().unwrap();
433        assert_eq!(usage.output_tokens, Some(50));
434    }
435
436    #[test]
437    fn test_sse_deduplication_non_assistant_nodes_pass_through_unchanged() {
438        let mut file = NamedTempFile::new().unwrap();
439
440        let tool = make_tool_node();
441        writeln!(file, "{}", serde_json::to_string(&tool).unwrap()).unwrap();
442
443        let session = parse_session(file.path()).unwrap();
444        assert_eq!(session.nodes.len(), 1);
445        assert_eq!(session.nodes[0].node_type, NodeType::Unknown);
446    }
447
448    // ── Progress deduplication tests ──────────────────────────────────────────
449
450    fn make_progress_node(tool_use_id: &str, uuid: &str) -> ExecutionNode {
451        let mut extra = HashMap::new();
452        extra.insert("toolUseID".to_string(), serde_json::json!(tool_use_id));
453        extra.insert(
454            "data".to_string(),
455            serde_json::json!({
456                "type": "agent_progress",
457                "agentId": "abc123",
458                "prompt": "do something",
459                "message": {},
460                "normalizedMessages": []
461            }),
462        );
463        ExecutionNode {
464            uuid: Some(uuid.to_string()),
465            parent_uuid: None,
466            timestamp: Some(1000),
467            node_type: NodeType::Progress,
468            is_sidechain: None,
469            session_id: None,
470            cwd: None,
471            message: None,
472            tool_use: None,
473            tool_result: None,
474            tool_use_result: None,
475            thinking: None,
476            progress: None,
477            token_usage: None,
478            extra: Some(extra),
479        }
480    }
481
482    #[test]
483    fn test_progress_dedup_keeps_only_last_frame_per_tool_use_id() {
484        let mut file = NamedTempFile::new().unwrap();
485
486        // 3 frames for the same toolUseID
487        let n1 = make_progress_node("tool-abc", "uuid-1");
488        let n2 = make_progress_node("tool-abc", "uuid-2");
489        let n3 = make_progress_node("tool-abc", "uuid-3");
490
491        writeln!(file, "{}", serde_json::to_string(&n1).unwrap()).unwrap();
492        writeln!(file, "{}", serde_json::to_string(&n2).unwrap()).unwrap();
493        writeln!(file, "{}", serde_json::to_string(&n3).unwrap()).unwrap();
494
495        let session = parse_session(file.path()).unwrap();
496        assert_eq!(session.nodes.len(), 1, "3 frames should collapse to 1");
497        assert_eq!(
498            session.nodes[0].uuid,
499            Some("uuid-3".to_string()),
500            "last frame kept"
501        );
502    }
503
504    #[test]
505    fn test_progress_dedup_preserves_distinct_tool_use_ids() {
506        let mut file = NamedTempFile::new().unwrap();
507
508        // 2 frames for tool-A, 2 for tool-B
509        let a1 = make_progress_node("tool-A", "uuid-a1");
510        let a2 = make_progress_node("tool-A", "uuid-a2");
511        let b1 = make_progress_node("tool-B", "uuid-b1");
512        let b2 = make_progress_node("tool-B", "uuid-b2");
513
514        writeln!(file, "{}", serde_json::to_string(&a1).unwrap()).unwrap();
515        writeln!(file, "{}", serde_json::to_string(&a2).unwrap()).unwrap();
516        writeln!(file, "{}", serde_json::to_string(&b1).unwrap()).unwrap();
517        writeln!(file, "{}", serde_json::to_string(&b2).unwrap()).unwrap();
518
519        let session = parse_session(file.path()).unwrap();
520        assert_eq!(session.nodes.len(), 2, "two distinct tool IDs → 2 nodes");
521        assert_eq!(session.nodes[0].uuid, Some("uuid-a2".to_string()));
522        assert_eq!(session.nodes[1].uuid, Some("uuid-b2".to_string()));
523    }
524
525    // ── Fixture-based tests (real Claude Code JSONL shapes) ───────────────────
526    //
527    // Helper: parse the (possibly multi-line) JSON string and write it as a
528    // single compact line. JSONL requires one JSON object per line — if the
529    // raw fixture string contains newlines the parser would reject it.
530    fn write_jsonl_fixture(file: &mut impl std::io::Write, json: &str) {
531        let value: serde_json::Value = serde_json::from_str(json)
532            .expect("fixture JSON must be valid");
533        writeln!(file, "{}", value).unwrap();
534    }
535    //
536    // These fixtures match the exact JSON structure emitted by Claude Code in
537    // production. They verify that every field that matters is deserialized
538    // correctly — especially parentUuid (camelCase) which drives tree building.
539
540    /// Real `user` node shape including parentUuid, sessionId, isSidechain, cwd.
541    #[test]
542    fn fixture_user_node_deserializes_parent_uuid() {
543        let json = r#"{
544            "parentUuid": "79b0d470-84e4-42ab-8c38-bcff7a0aa24a",
545            "isSidechain": false,
546            "userType": "external",
547            "cwd": "/home/user/project",
548            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
549            "version": "2.1.71",
550            "type": "user",
551            "message": {
552                "role": "user",
553                "content": "Hello world"
554            },
555            "uuid": "c9f738fe-c3ee-4a41-a3da-a70ea43149f5",
556            "timestamp": "2026-03-09T20:36:13.828Z",
557            "permissionMode": "default"
558        }"#;
559
560        let mut file = NamedTempFile::new().unwrap();
561        write_jsonl_fixture(&mut file, json);
562        let session = parse_session(file.path()).unwrap();
563
564        assert_eq!(session.nodes.len(), 1);
565        let node = &session.nodes[0];
566        assert_eq!(node.node_type, NodeType::User);
567        assert_eq!(node.uuid.as_deref(), Some("c9f738fe-c3ee-4a41-a3da-a70ea43149f5"));
568        // THE critical field — tree hierarchy depends on this
569        assert_eq!(
570            node.parent_uuid.as_deref(),
571            Some("79b0d470-84e4-42ab-8c38-bcff7a0aa24a"),
572            "parentUuid (camelCase) must deserialize into parent_uuid"
573        );
574        assert_eq!(node.is_sidechain, Some(false));
575        assert_eq!(node.session_id.as_deref(), Some("a5134111-4445-460d-9848-3652e0364cc3"));
576        assert_eq!(node.cwd.as_deref(), Some("/home/user/project"));
577    }
578
579    /// Real `assistant` node with thinking + tool_use content blocks and token usage.
580    #[test]
581    fn fixture_assistant_node_with_thinking_and_tool_use() {
582        let json = r#"{
583            "parentUuid": "c9f738fe-c3ee-4a41-a3da-a70ea43149f5",
584            "isSidechain": false,
585            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
586            "version": "2.1.71",
587            "message": {
588                "model": "claude-opus-4-6-20260101",
589                "id": "msg_01E7qN343ih6AF31ZohVDNC4",
590                "type": "message",
591                "role": "assistant",
592                "content": [
593                    {
594                        "type": "thinking",
595                        "thinking": "Let me plan this carefully.",
596                        "signature": "sig_abc123"
597                    },
598                    {
599                        "type": "tool_use",
600                        "id": "toolu_01MXE8tThu2BW3FvknemYN26",
601                        "name": "Bash",
602                        "input": { "command": "ls -la" }
603                    }
604                ],
605                "stop_reason": "tool_use",
606                "usage": {
607                    "input_tokens": 1500,
608                    "output_tokens": 80,
609                    "cache_creation_input_tokens": 0,
610                    "cache_read_input_tokens": 1200
611                }
612            },
613            "requestId": "req_abc",
614            "type": "assistant",
615            "uuid": "87eefa21-2674-4cd8-a892-7dd6c18a0f85",
616            "timestamp": "2026-03-09T20:36:22.317Z"
617        }"#;
618
619        let mut file = NamedTempFile::new().unwrap();
620        write_jsonl_fixture(&mut file, json);
621        let session = parse_session(file.path()).unwrap();
622
623        let node = &session.nodes[0];
624        assert_eq!(node.node_type, NodeType::Assistant);
625        assert_eq!(
626            node.parent_uuid.as_deref(),
627            Some("c9f738fe-c3ee-4a41-a3da-a70ea43149f5")
628        );
629
630        let msg = node.message.as_ref().unwrap();
631        assert_eq!(msg.model_short(), Some("claude-opus-4-6"));
632        assert_eq!(msg.id.as_deref(), Some("msg_01E7qN343ih6AF31ZohVDNC4"));
633
634        let blocks = msg.content_blocks();
635        assert_eq!(blocks.len(), 2);
636        assert!(matches!(blocks[0], crate::parser::models::ContentBlock::Thinking { .. }));
637        assert!(matches!(blocks[1], crate::parser::models::ContentBlock::ToolUse { ref name, .. } if name == "Bash"));
638
639        // Token usage from message.usage
640        let usage = node.effective_token_usage().unwrap();
641        assert_eq!(usage.input_tokens, Some(1500));
642        assert_eq!(usage.output_tokens, Some(80));
643        assert_eq!(usage.cache_read_input_tokens, Some(1200));
644
645        // total_tools counts ContentBlock::ToolUse
646        assert_eq!(session.total_tools, 1);
647    }
648
649    /// Real `user` node containing a `tool_result` content block (tool response).
650    #[test]
651    fn fixture_user_node_with_tool_result_block() {
652        let json = r#"{
653            "parentUuid": "87eefa21-2674-4cd8-a892-7dd6c18a0f85",
654            "isSidechain": false,
655            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
656            "type": "user",
657            "message": {
658                "role": "user",
659                "content": [
660                    {
661                        "type": "tool_result",
662                        "tool_use_id": "toolu_01MXE8tThu2BW3FvknemYN26",
663                        "content": [{ "type": "text", "text": "file1.rs\nfile2.rs" }]
664                    }
665                ]
666            },
667            "uuid": "f1b2c3d4-0000-0000-0000-000000000001",
668            "timestamp": "2026-03-09T20:36:23.000Z"
669        }"#;
670
671        let mut file = NamedTempFile::new().unwrap();
672        write_jsonl_fixture(&mut file, json);
673        let session = parse_session(file.path()).unwrap();
674
675        let node = &session.nodes[0];
676        assert_eq!(node.node_type, NodeType::User);
677        assert_eq!(
678            node.parent_uuid.as_deref(),
679            Some("87eefa21-2674-4cd8-a892-7dd6c18a0f85")
680        );
681
682        let blocks = node.message.as_ref().unwrap().content_blocks();
683        assert_eq!(blocks.len(), 1);
684        assert!(
685            matches!(blocks[0], crate::parser::models::ContentBlock::ToolResult { ref tool_use_id, .. }
686                if tool_use_id == "toolu_01MXE8tThu2BW3FvknemYN26")
687        );
688    }
689
690    /// Real `progress` node with `bash_progress` data — verifies data reaches `extra`.
691    #[test]
692    fn fixture_progress_bash_progress_data_accessible_via_extra() {
693        let json = r#"{
694            "parentUuid": "59880d07-e97a-462a-b93e-af460e5f8608",
695            "isSidechain": false,
696            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
697            "type": "progress",
698            "data": {
699                "type": "bash_progress",
700                "output": "",
701                "fullOutput": "hello",
702                "elapsedTimeSeconds": 3,
703                "totalLines": 0,
704                "totalBytes": 0,
705                "taskId": "bso2475ff",
706                "timeoutMs": 120000
707            },
708            "toolUseID": "bash-progress-0",
709            "parentToolUseID": "toolu_01MXE8tThu2BW3FvknemYN26",
710            "uuid": "c5efb084-fd8d-46c2-9961-7d32017013fb",
711            "timestamp": "2026-03-09T20:43:55.501Z"
712        }"#;
713
714        let mut file = NamedTempFile::new().unwrap();
715        write_jsonl_fixture(&mut file, json);
716        let session = parse_session(file.path()).unwrap();
717
718        let node = &session.nodes[0];
719        assert_eq!(node.node_type, NodeType::Progress);
720        assert_eq!(node.parent_uuid.as_deref(), Some("59880d07-e97a-462a-b93e-af460e5f8608"));
721
722        // Progress data lives in extra["data"]
723        let data = node.extra.as_ref()
724            .and_then(|e| e.get("data"))
725            .expect("progress data must be in extra[\"data\"]");
726        assert_eq!(data.get("type").and_then(|t| t.as_str()), Some("bash_progress"));
727        assert_eq!(data.get("elapsedTimeSeconds").and_then(|v| v.as_f64()), Some(3.0));
728        assert_eq!(data.get("fullOutput").and_then(|v| v.as_str()), Some("hello"));
729    }
730
731    /// Real `progress` node with `agent_progress` data — verifies agent ID extraction.
732    #[test]
733    fn fixture_progress_agent_progress_data_accessible_via_extra() {
734        let json = r#"{
735            "parentUuid": "3df6921d-08eb-4ebd-b938-9ef91842a22a",
736            "isSidechain": false,
737            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
738            "type": "progress",
739            "data": {
740                "type": "agent_progress",
741                "agentId": "agent-abc123",
742                "prompt": "Explore the codebase.",
743                "message": {}
744            },
745            "toolUseID": "toolu_agent_01",
746            "parentToolUseID": "toolu_agent_01",
747            "uuid": "1e3d2e80-2fed-4876-a3eb-6d049630107b",
748            "timestamp": "2026-03-09T20:36:22.317Z"
749        }"#;
750
751        let mut file = NamedTempFile::new().unwrap();
752        write_jsonl_fixture(&mut file, json);
753        let session = parse_session(file.path()).unwrap();
754
755        let node = &session.nodes[0];
756        assert_eq!(node.node_type, NodeType::Progress);
757
758        let data = node.extra.as_ref()
759            .and_then(|e| e.get("data"))
760            .expect("agent_progress data must be in extra[\"data\"]");
761        assert_eq!(data.get("type").and_then(|t| t.as_str()), Some("agent_progress"));
762        assert_eq!(data.get("agentId").and_then(|v| v.as_str()), Some("agent-abc123"));
763    }
764
765    /// Real `progress` node with `hook_progress` data.
766    #[test]
767    fn fixture_progress_hook_progress_data_accessible_via_extra() {
768        let json = r#"{
769            "parentUuid": null,
770            "isSidechain": false,
771            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
772            "type": "progress",
773            "data": {
774                "type": "hook_progress",
775                "hookEvent": "SessionStart",
776                "hookName": "SessionStart:startup",
777                "command": "/usr/local/bin/claude-hindsight hook session-start"
778            },
779            "parentToolUseID": "4a0f7372-12b8-41fe-9ba0-adb589b649ac",
780            "toolUseID": "4a0f7372-12b8-41fe-9ba0-adb589b649ac",
781            "timestamp": "2026-03-09T20:32:19.071Z",
782            "uuid": "79b0d470-84e4-42ab-8c38-bcff7a0aa24a"
783        }"#;
784
785        let mut file = NamedTempFile::new().unwrap();
786        write_jsonl_fixture(&mut file, json);
787        let session = parse_session(file.path()).unwrap();
788
789        let node = &session.nodes[0];
790        assert_eq!(node.node_type, NodeType::Progress);
791        // parentUuid is null → no parent
792        assert_eq!(node.parent_uuid, None);
793
794        let data = node.extra.as_ref()
795            .and_then(|e| e.get("data"))
796            .expect("hook_progress data must be in extra[\"data\"]");
797        assert_eq!(data.get("type").and_then(|t| t.as_str()), Some("hook_progress"));
798        assert_eq!(data.get("hookEvent").and_then(|v| v.as_str()), Some("SessionStart"));
799    }
800
801    /// Real `system` node — stop_hook_summary subtype.
802    #[test]
803    fn fixture_system_stop_hook_summary_parses() {
804        let json = r#"{
805            "parentUuid": "b566be18-5b56-472c-936b-bf8856c055b3",
806            "isSidechain": false,
807            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3",
808            "slug": "bubbly-zooming-quokka",
809            "type": "system",
810            "subtype": "stop_hook_summary",
811            "hookCount": 1,
812            "hookInfos": [{ "command": "/usr/local/bin/claude-hindsight hook stop" }],
813            "hookErrors": [],
814            "preventedContinuation": false,
815            "stopReason": "",
816            "hasOutput": false,
817            "level": "suggestion",
818            "timestamp": "2026-03-09T20:44:20.015Z",
819            "uuid": "185d0e88-785f-413d-a825-ecbe6d44ce7e",
820            "toolUseID": "70eef7b4-fb90-4e73-a5b9-578eb55924f1"
821        }"#;
822
823        let mut file = NamedTempFile::new().unwrap();
824        write_jsonl_fixture(&mut file, json);
825        let session = parse_session(file.path()).unwrap();
826
827        let node = &session.nodes[0];
828        assert_eq!(node.node_type, NodeType::System);
829        assert_eq!(
830            node.parent_uuid.as_deref(),
831            Some("b566be18-5b56-472c-936b-bf8856c055b3")
832        );
833
834        let subtype = node.extra.as_ref()
835            .and_then(|e| e.get("subtype"))
836            .and_then(|v| v.as_str());
837        assert_eq!(subtype, Some("stop_hook_summary"));
838    }
839
840    /// Real `file-history-snapshot` node.
841    #[test]
842    fn fixture_file_history_snapshot_parses() {
843        let json = r#"{
844            "type": "file-history-snapshot",
845            "messageId": "c9f738fe-c3ee-4a41-a3da-a70ea43149f5",
846            "snapshot": {
847                "messageId": "c9f738fe-c3ee-4a41-a3da-a70ea43149f5",
848                "trackedFileBackups": {},
849                "timestamp": "2026-03-09T20:36:13.828Z"
850            },
851            "isSnapshotUpdate": false
852        }"#;
853
854        let mut file = NamedTempFile::new().unwrap();
855        write_jsonl_fixture(&mut file, json);
856        let session = parse_session(file.path()).unwrap();
857
858        let node = &session.nodes[0];
859        assert_eq!(node.node_type, NodeType::FileHistorySnapshot);
860        // These nodes have no uuid/parentUuid — they don't participate in the tree
861        assert_eq!(node.uuid, None);
862        assert_eq!(node.parent_uuid, None);
863
864        // snapshot data is accessible via extra
865        assert!(node.extra.as_ref().and_then(|e| e.get("snapshot")).is_some());
866    }
867
868    /// Real `last-prompt` node — stores the last user message text.
869    #[test]
870    fn fixture_last_prompt_node_parses() {
871        let json = r#"{
872            "type": "last-prompt",
873            "lastPrompt": "can you push it",
874            "sessionId": "a5134111-4445-460d-9848-3652e0364cc3"
875        }"#;
876
877        let mut file = NamedTempFile::new().unwrap();
878        write_jsonl_fixture(&mut file, json);
879        let session = parse_session(file.path()).unwrap();
880
881        let node = &session.nodes[0];
882        assert_eq!(node.node_type, NodeType::LastPrompt);
883        let prompt = node.extra.as_ref()
884            .and_then(|e| e.get("lastPrompt"))
885            .and_then(|v| v.as_str());
886        assert_eq!(prompt, Some("can you push it"));
887    }
888
889    /// Real `pr-link` node — created when Claude Code opens a PR.
890    #[test]
891    fn fixture_pr_link_node_parses() {
892        let json = r#"{
893            "type": "pr-link",
894            "sessionId": "48317b72-a9e4-4c5d-87a4-9f8a6f912e27",
895            "prNumber": 1,
896            "prUrl": "https://github.com/example/repo/pull/1",
897            "prRepository": "example/repo",
898            "timestamp": "2026-03-02T01:20:49.326Z"
899        }"#;
900
901        let mut file = NamedTempFile::new().unwrap();
902        write_jsonl_fixture(&mut file, json);
903        let session = parse_session(file.path()).unwrap();
904
905        let node = &session.nodes[0];
906        assert_eq!(node.node_type, NodeType::PrLink);
907        let pr_url = node.extra.as_ref()
908            .and_then(|e| e.get("prUrl"))
909            .and_then(|v| v.as_str());
910        assert_eq!(pr_url, Some("https://github.com/example/repo/pull/1"));
911    }
912
913    /// Real `queue-operation` node.
914    #[test]
915    fn fixture_queue_operation_node_parses() {
916        let json = r#"{
917            "type": "queue-operation",
918            "operation": "enqueue",
919            "timestamp": "2026-03-02T01:02:04.245Z",
920            "sessionId": "48317b72-a9e4-4c5d-87a4-9f8a6f912e27",
921            "content": "i see it http://localhost:3000"
922        }"#;
923
924        let mut file = NamedTempFile::new().unwrap();
925        write_jsonl_fixture(&mut file, json);
926        let session = parse_session(file.path()).unwrap();
927
928        let node = &session.nodes[0];
929        assert_eq!(node.node_type, NodeType::QueueOperation);
930        let op = node.extra.as_ref()
931            .and_then(|e| e.get("operation"))
932            .and_then(|v| v.as_str());
933        assert_eq!(op, Some("enqueue"));
934    }
935
936    /// Multi-node fixture: verifies parentUuid links are correct across a full
937    /// user → assistant(tool_use) → user(tool_result) chain.
938    #[test]
939    fn fixture_parent_uuid_links_across_full_tool_chain() {
940        let json = concat!(
941            r#"{"type":"user","uuid":"node-1","parentUuid":null,"sessionId":"s1","message":{"role":"user","content":"do something"},"timestamp":1000}"#, "\n",
942            r#"{"type":"assistant","uuid":"node-2","parentUuid":"node-1","sessionId":"s1","message":{"role":"assistant","id":"msg-1","content":[{"type":"tool_use","id":"toolu-1","name":"Bash","input":{"command":"ls"}}]},"timestamp":2000}"#, "\n",
943            r#"{"type":"user","uuid":"node-3","parentUuid":"node-2","sessionId":"s1","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu-1","content":[{"type":"text","text":"ok"}]}]},"timestamp":3000}"#, "\n"
944        );
945
946        let mut file = NamedTempFile::new().unwrap();
947        write!(file, "{json}").unwrap();
948        let session = parse_session(file.path()).unwrap();
949
950        assert_eq!(session.nodes.len(), 3);
951
952        // Verify parent links
953        assert_eq!(session.nodes[0].parent_uuid, None);
954        assert_eq!(session.nodes[1].parent_uuid.as_deref(), Some("node-1"));
955        assert_eq!(session.nodes[2].parent_uuid.as_deref(), Some("node-2"));
956
957        // Verify tool_use is counted from content blocks
958        assert_eq!(session.total_tools, 1);
959
960        // Verify the tool result block is recognized
961        let result_blocks = session.nodes[2].message.as_ref().unwrap().content_blocks();
962        assert!(matches!(
963            result_blocks[0],
964            crate::parser::models::ContentBlock::ToolResult { ref tool_use_id, .. }
965                if tool_use_id == "toolu-1"
966        ));
967    }
968
969    /// Sidechain node (isSidechain: true) parses correctly.
970    #[test]
971    fn fixture_sidechain_node_parses_is_sidechain_field() {
972        let json = r#"{
973            "type": "user",
974            "uuid": "side-node-1",
975            "parentUuid": "main-node-1",
976            "isSidechain": true,
977            "sessionId": "s1",
978            "message": { "role": "user", "content": "subagent prompt" },
979            "timestamp": 1000
980        }"#;
981
982        let mut file = NamedTempFile::new().unwrap();
983        write_jsonl_fixture(&mut file, json);
984        let session = parse_session(file.path()).unwrap();
985
986        let node = &session.nodes[0];
987        assert_eq!(node.is_sidechain, Some(true));
988        assert_eq!(node.parent_uuid.as_deref(), Some("main-node-1"));
989    }
990
991    /// Agent tool call in assistant message — verifies Agent invocations are
992    /// counted in total_tools just like Bash, Read, etc.
993    #[test]
994    fn fixture_agent_tool_call_counted_in_total_tools() {
995        let json = r#"{
996            "type": "assistant",
997            "uuid": "node-agent",
998            "parentUuid": "node-user",
999            "sessionId": "s1",
1000            "message": {
1001                "role": "assistant",
1002                "id": "msg-agent-1",
1003                "content": [
1004                    {
1005                        "type": "tool_use",
1006                        "id": "toolu_agent_01",
1007                        "name": "Agent",
1008                        "input": {
1009                            "prompt": "Explore the project structure thoroughly.",
1010                            "subagent_type": "general-purpose"
1011                        }
1012                    }
1013                ]
1014            },
1015            "timestamp": 2000
1016        }"#;
1017
1018        let mut file = NamedTempFile::new().unwrap();
1019        write_jsonl_fixture(&mut file, json);
1020        let session = parse_session(file.path()).unwrap();
1021
1022        assert_eq!(session.total_tools, 1, "Agent tool call must be counted");
1023
1024        let blocks = session.nodes[0].message.as_ref().unwrap().content_blocks();
1025        assert!(
1026            matches!(blocks[0], crate::parser::models::ContentBlock::ToolUse { ref name, .. } if name == "Agent")
1027        );
1028    }
1029}
hindsight/parser/transcript.rs

hindsight/parser/
transcript.rs