Skip to main content

chub_core/team/tracking/
transcript.rs

1//! Transcript linking and token extraction.
2//!
3//! Reads Claude Code transcript files (JSONL), extracts token usage
4//! (deduplicated by message.id), modified files, and spawned agent IDs.
5
6use std::collections::{HashMap, HashSet};
7use std::fs;
8use std::path::{Path, PathBuf};
9
10use serde::Deserialize;
11
12use super::types::TokenUsage;
13
14// ---------------------------------------------------------------------------
15// Transcript path resolution
16// ---------------------------------------------------------------------------
17
18/// Sanitize a repo path for Claude Code's project directory naming.
19/// Replaces non-alphanumeric chars with dashes (matches entire.io's SanitizePathForClaude).
20fn sanitize_path_for_claude(path: &str) -> String {
21    path.chars()
22        .map(|c| if c.is_ascii_alphanumeric() { c } else { '-' })
23        .collect()
24}
25
26/// Find the Claude Code projects directory for a given repo path.
27/// Returns `~/.claude/projects/<sanitized-repo>/`.
28pub fn claude_projects_dir(repo_path: &str) -> Option<PathBuf> {
29    let home = dirs::home_dir()?;
30    let sanitized = sanitize_path_for_claude(repo_path);
31    Some(home.join(".claude").join("projects").join(sanitized))
32}
33
34/// Find a Claude Code transcript file for a session.
35pub fn find_transcript(repo_path: &str, session_id: &str) -> Option<PathBuf> {
36    let dir = claude_projects_dir(repo_path)?;
37    let path = dir.join(format!("{}.jsonl", session_id));
38    if path.exists() {
39        Some(path)
40    } else {
41        // Try listing files to find a matching transcript
42        find_transcript_by_scan(&dir, session_id)
43    }
44}
45
46/// Scan a directory for a transcript matching the session ID.
47fn find_transcript_by_scan(dir: &Path, _session_id: &str) -> Option<PathBuf> {
48    if !dir.is_dir() {
49        return None;
50    }
51    // Claude Code transcripts are named by their conversation UUID
52    // We can find the most recent one if no exact match
53    let mut transcripts: Vec<_> = fs::read_dir(dir)
54        .ok()?
55        .flatten()
56        .filter(|e| {
57            e.path()
58                .extension()
59                .map(|ext| ext == "jsonl")
60                .unwrap_or(false)
61        })
62        .filter_map(|e| {
63            let meta = e.metadata().ok()?;
64            Some((e.path(), meta.modified().ok()?))
65        })
66        .collect();
67
68    transcripts.sort_by(|a, b| b.1.cmp(&a.1));
69    transcripts.into_iter().next().map(|(p, _)| p)
70}
71
72// ---------------------------------------------------------------------------
73// Transcript line parsing (Claude Code JSONL format)
74// ---------------------------------------------------------------------------
75
76/// A line from a Claude Code transcript JSONL file.
77#[derive(Debug, Deserialize)]
78struct TranscriptLine {
79    #[serde(default)]
80    r#type: String,
81    #[serde(default)]
82    uuid: Option<String>,
83    #[serde(default)]
84    message: Option<serde_json::Value>,
85}
86
87/// Message-level token usage from Claude API response.
88#[derive(Debug, Deserialize)]
89struct MessageUsage {
90    #[serde(default)]
91    input_tokens: i64,
92    #[serde(default, alias = "cache_creation_input_tokens")]
93    cache_creation_tokens: i64,
94    #[serde(default, alias = "cache_read_input_tokens")]
95    cache_read_tokens: i64,
96    #[serde(default)]
97    output_tokens: i64,
98    /// Extended thinking / reasoning tokens (Claude, o1/o3).
99    /// Claude API field: absent or 0 when thinking is off.
100    #[serde(default)]
101    reasoning_tokens: i64,
102}
103
104/// A captured thinking/reasoning block from an assistant message.
105#[derive(Debug, Clone, serde::Serialize)]
106pub struct ThinkingBlock {
107    /// The thinking text content.
108    pub content: String,
109    /// Which message (by index) this thinking block came from.
110    pub message_index: i32,
111}
112
113/// Results from parsing a transcript.
114#[derive(Debug, Default)]
115pub struct TranscriptAnalysis {
116    pub token_usage: TokenUsage,
117    pub modified_files: Vec<String>,
118    pub spawned_agent_ids: Vec<String>,
119    pub turn_count: i32,
120    pub message_count: i32,
121    pub model: Option<String>,
122    /// Captured thinking/reasoning blocks from the conversation.
123    pub thinking_blocks: Vec<ThinkingBlock>,
124    /// Total number of thinking blocks encountered.
125    pub thinking_block_count: i32,
126    /// Whether extended thinking was used in this session.
127    pub has_extended_thinking: bool,
128}
129
130/// Parse a Claude Code transcript file and extract token usage and metadata.
131/// Deduplicates by message.id (Claude streams may create multiple entries).
132pub fn parse_transcript(path: &Path) -> TranscriptAnalysis {
133    parse_transcript_range(path, 0, None)
134}
135
136/// Parse a range of a transcript (from byte offset, optionally limited lines).
137pub fn parse_transcript_range(
138    path: &Path,
139    start_byte: i64,
140    _max_lines: Option<usize>,
141) -> TranscriptAnalysis {
142    let mut analysis = TranscriptAnalysis::default();
143
144    let content = match fs::read_to_string(path) {
145        Ok(c) => c,
146        Err(_) => return analysis,
147    };
148
149    // Skip to start_byte offset
150    let content = if start_byte > 0 && (start_byte as usize) < content.len() {
151        &content[start_byte as usize..]
152    } else {
153        &content
154    };
155
156    let mut seen_ids: HashSet<String> = HashSet::new();
157    let mut token_map: HashMap<String, MessageUsage> = HashMap::new();
158    let mut files: HashSet<String> = HashSet::new();
159    let mut agent_ids: HashSet<String> = HashSet::new();
160    let mut user_turns = 0i32;
161    let mut model: Option<String> = None;
162
163    for line in content.lines() {
164        let line = line.trim();
165        if line.is_empty() {
166            continue;
167        }
168
169        let tl: TranscriptLine = match serde_json::from_str(line) {
170            Ok(t) => t,
171            Err(_) => continue,
172        };
173
174        // Count user turns — only real user prompts, not system/hook messages
175        if tl.r#type == "user" {
176            if let Some(ref msg) = tl.message {
177                let is_real_user = is_real_user_turn(msg);
178                if is_real_user {
179                    user_turns += 1;
180                }
181            }
182        }
183
184        // Track message count
185        analysis.message_count += 1;
186
187        // Deduplicate by UUID
188        if let Some(ref uuid) = tl.uuid {
189            if !seen_ids.insert(uuid.clone()) {
190                // Already seen — update token usage (take latest streaming value)
191            }
192        }
193
194        // Extract from assistant messages
195        if tl.r#type == "assistant" {
196            if let Some(ref msg) = tl.message {
197                // Extract model name (take first non-empty model seen)
198                if model.is_none() {
199                    if let Some(m) = msg.get("model").and_then(|v| v.as_str()) {
200                        if !m.is_empty() {
201                            model = Some(m.to_string());
202                        }
203                    }
204                }
205
206                // Extract token usage from message.usage
207                if let Some(usage) = msg.get("usage") {
208                    if let Ok(mu) = serde_json::from_value::<MessageUsage>(usage.clone()) {
209                        let msg_id = msg
210                            .get("id")
211                            .and_then(|v| v.as_str())
212                            .unwrap_or("")
213                            .to_string();
214                        // Dedup by message.id — keep latest (streaming updates)
215                        token_map.insert(msg_id, mu);
216                    }
217                }
218
219                // Extract thinking/reasoning content blocks
220                extract_thinking_from_message(msg, analysis.message_count, &mut analysis);
221
222                // Extract modified files from tool_use content blocks
223                extract_files_from_message(msg, &mut files);
224
225                // Extract spawned agent IDs
226                extract_agent_ids_from_message(msg, &mut agent_ids);
227            }
228        }
229    }
230
231    // Sum token usage across all deduplicated messages
232    for mu in token_map.values() {
233        analysis.token_usage.input_tokens += mu.input_tokens;
234        analysis.token_usage.output_tokens += mu.output_tokens;
235        analysis.token_usage.cache_read_tokens += mu.cache_read_tokens;
236        analysis.token_usage.cache_creation_tokens += mu.cache_creation_tokens;
237        analysis.token_usage.reasoning_tokens += mu.reasoning_tokens;
238    }
239    analysis.token_usage.api_call_count = token_map.len() as i64;
240    // Mark extended thinking if any reasoning tokens were found
241    if analysis.token_usage.reasoning_tokens > 0 || !analysis.thinking_blocks.is_empty() {
242        analysis.has_extended_thinking = true;
243    }
244    analysis.turn_count = user_turns;
245    analysis.model = model;
246    analysis.modified_files = files.into_iter().collect();
247    analysis.modified_files.sort();
248    analysis.spawned_agent_ids = agent_ids.into_iter().collect();
249
250    analysis
251}
252
253/// Check if a user message is a real user turn (not a system message, hook output,
254/// or subagent notification). Claude Code logs system-reminders and hook results as
255/// "user" type messages in the transcript.
256fn is_real_user_turn(msg: &serde_json::Value) -> bool {
257    let content = match msg.get("content") {
258        Some(c) => c,
259        None => return false,
260    };
261
262    // String content
263    if let Some(text) = content.as_str() {
264        return !is_system_content(text);
265    }
266
267    // Array content — check text blocks
268    if let Some(blocks) = content.as_array() {
269        for block in blocks {
270            if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
271                if !is_system_content(text) {
272                    return true;
273                }
274            }
275            // tool_result blocks are internal
276            if block.get("type").and_then(|t| t.as_str()) == Some("tool_result") {
277                return false;
278            }
279        }
280        return false;
281    }
282
283    false
284}
285
286/// Check if text content is a system/internal message rather than user input.
287fn is_system_content(text: &str) -> bool {
288    let trimmed = text.trim();
289    trimmed.starts_with("<system-reminder>")
290        || trimmed.starts_with("<task-notification>")
291        || trimmed.starts_with("<user-prompt-submit-hook>")
292        || trimmed.starts_with("<available-deferred-tools>")
293        || trimmed.contains("<system-reminder>")
294}
295
296/// Extract thinking/reasoning blocks from an assistant message.
297/// Claude uses `type: "thinking"` content blocks with a `thinking` field.
298/// OpenAI o1/o3 may use `type: "reasoning"` or include in `reasoning_content`.
299fn extract_thinking_from_message(
300    msg: &serde_json::Value,
301    message_index: i32,
302    analysis: &mut TranscriptAnalysis,
303) {
304    // Check content[] array for thinking blocks (Claude extended thinking)
305    if let Some(content) = msg.get("content").and_then(|c| c.as_array()) {
306        for block in content {
307            let block_type = block.get("type").and_then(|t| t.as_str()).unwrap_or("");
308            match block_type {
309                "thinking" => {
310                    // Claude extended thinking: { "type": "thinking", "thinking": "..." }
311                    if let Some(text) = block.get("thinking").and_then(|t| t.as_str()) {
312                        if !text.trim().is_empty() {
313                            analysis.thinking_blocks.push(ThinkingBlock {
314                                content: text.to_string(),
315                                message_index,
316                            });
317                            analysis.thinking_block_count += 1;
318                        }
319                    }
320                }
321                "reasoning" => {
322                    // o1/o3 style reasoning blocks
323                    if let Some(text) = block
324                        .get("content")
325                        .or_else(|| block.get("text"))
326                        .and_then(|t| t.as_str())
327                    {
328                        if !text.trim().is_empty() {
329                            analysis.thinking_blocks.push(ThinkingBlock {
330                                content: text.to_string(),
331                                message_index,
332                            });
333                            analysis.thinking_block_count += 1;
334                        }
335                    }
336                }
337                _ => {}
338            }
339        }
340    }
341
342    // Check top-level reasoning_content (some providers put reasoning here)
343    if let Some(reasoning) = msg.get("reasoning_content").and_then(|r| r.as_str()) {
344        if !reasoning.trim().is_empty() {
345            analysis.thinking_blocks.push(ThinkingBlock {
346                content: reasoning.to_string(),
347                message_index,
348            });
349            analysis.thinking_block_count += 1;
350        }
351    }
352}
353
354/// Extract file paths from tool_use blocks in a message.
355fn extract_files_from_message(msg: &serde_json::Value, files: &mut HashSet<String>) {
356    // Claude Code messages have content[] array with tool_use blocks
357    if let Some(content) = msg.get("content").and_then(|c| c.as_array()) {
358        for block in content {
359            let block_type = block.get("type").and_then(|t| t.as_str()).unwrap_or("");
360            if block_type == "tool_use" {
361                let tool_name = block.get("name").and_then(|n| n.as_str()).unwrap_or("");
362                if let Some(input) = block.get("input") {
363                    match tool_name {
364                        "Write" | "Edit" | "file_write_tool" | "edit_tool" => {
365                            if let Some(fp) = input
366                                .get("file_path")
367                                .or_else(|| input.get("path"))
368                                .and_then(|v| v.as_str())
369                            {
370                                files.insert(crate::team::hooks::relativize_path(fp));
371                            }
372                        }
373                        "NotebookEdit" => {
374                            if let Some(fp) = input.get("notebook_path").and_then(|v| v.as_str()) {
375                                files.insert(crate::team::hooks::relativize_path(fp));
376                            }
377                        }
378                        _ => {}
379                    }
380                }
381            }
382        }
383    }
384}
385
386/// Extract spawned agent IDs from tool results (Agent tool).
387fn extract_agent_ids_from_message(msg: &serde_json::Value, ids: &mut HashSet<String>) {
388    if let Some(content) = msg.get("content").and_then(|c| c.as_array()) {
389        for block in content {
390            let block_type = block.get("type").and_then(|t| t.as_str()).unwrap_or("");
391            if block_type == "tool_result" || block_type == "text" {
392                if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
393                    // Pattern: "agentId: <hex>" in tool result content
394                    for line in text.lines() {
395                        if let Some(rest) = line.strip_prefix("agentId: ") {
396                            let id = rest.trim();
397                            if !id.is_empty() {
398                                ids.insert(id.to_string());
399                            }
400                        }
401                    }
402                }
403            }
404        }
405    }
406}
407
408/// A conversation message suitable for display.
409#[derive(Debug, serde::Serialize)]
410pub struct ConversationMessage {
411    pub role: String,
412    pub content: String,
413    /// For tool_use: the tool name
414    #[serde(skip_serializing_if = "Option::is_none")]
415    pub tool: Option<String>,
416    /// For tool_use: the file path if applicable
417    #[serde(skip_serializing_if = "Option::is_none")]
418    pub file: Option<String>,
419    /// Thinking/reasoning content from extended thinking
420    #[serde(skip_serializing_if = "Option::is_none")]
421    pub thinking: Option<String>,
422}
423
424/// Parse a transcript into displayable conversation messages.
425/// Filters out system messages and deduplicates by UUID.
426pub fn parse_conversation(path: &Path) -> Vec<ConversationMessage> {
427    let content = match fs::read_to_string(path) {
428        Ok(c) => c,
429        Err(_) => return Vec::new(),
430    };
431
432    let mut messages = Vec::new();
433    let mut seen_uuids: HashSet<String> = HashSet::new();
434
435    for line in content.lines() {
436        let line = line.trim();
437        if line.is_empty() {
438            continue;
439        }
440
441        let tl: TranscriptLine = match serde_json::from_str(line) {
442            Ok(t) => t,
443            Err(_) => continue,
444        };
445
446        // Deduplicate by UUID — keep only the first occurrence
447        if let Some(ref uuid) = tl.uuid {
448            if !seen_uuids.insert(uuid.clone()) {
449                continue;
450            }
451        }
452
453        let msg = match tl.message {
454            Some(ref m) => m,
455            None => continue,
456        };
457
458        if tl.r#type == "user" {
459            if !is_real_user_turn(msg) {
460                continue;
461            }
462            let text = extract_text_content(msg);
463            if !text.is_empty() {
464                messages.push(ConversationMessage {
465                    role: "user".to_string(),
466                    content: text,
467                    tool: None,
468                    file: None,
469                    thinking: None,
470                });
471            }
472        } else if tl.r#type == "assistant" {
473            // Extract thinking, text, and tool_use blocks
474            if let Some(content_arr) = msg.get("content").and_then(|c| c.as_array()) {
475                let mut thinking_parts = Vec::new();
476                let mut text_parts = Vec::new();
477                let mut tool_uses = Vec::new();
478
479                for block in content_arr {
480                    let block_type = block.get("type").and_then(|t| t.as_str()).unwrap_or("");
481                    match block_type {
482                        "thinking" => {
483                            if let Some(t) = block.get("thinking").and_then(|t| t.as_str()) {
484                                let trimmed = t.trim();
485                                if !trimmed.is_empty() {
486                                    thinking_parts.push(trimmed.to_string());
487                                }
488                            }
489                        }
490                        "reasoning" => {
491                            if let Some(t) = block
492                                .get("content")
493                                .or_else(|| block.get("text"))
494                                .and_then(|t| t.as_str())
495                            {
496                                let trimmed = t.trim();
497                                if !trimmed.is_empty() {
498                                    thinking_parts.push(trimmed.to_string());
499                                }
500                            }
501                        }
502                        "text" => {
503                            if let Some(t) = block.get("text").and_then(|t| t.as_str()) {
504                                let trimmed = t.trim();
505                                if !trimmed.is_empty() {
506                                    text_parts.push(trimmed.to_string());
507                                }
508                            }
509                        }
510                        "tool_use" => {
511                            let name = block
512                                .get("name")
513                                .and_then(|n| n.as_str())
514                                .unwrap_or("unknown");
515                            let file = block
516                                .get("input")
517                                .and_then(|i| {
518                                    i.get("file_path")
519                                        .or_else(|| i.get("path"))
520                                        .or_else(|| i.get("command"))
521                                })
522                                .and_then(|v| v.as_str())
523                                .map(|s| truncate_str(s, 200));
524                            tool_uses.push((name.to_string(), file));
525                        }
526                        _ => {}
527                    }
528                }
529
530                // Also check top-level reasoning_content
531                if let Some(reasoning) = msg.get("reasoning_content").and_then(|r| r.as_str()) {
532                    let trimmed = reasoning.trim();
533                    if !trimmed.is_empty() {
534                        thinking_parts.push(trimmed.to_string());
535                    }
536                }
537
538                // Combine thinking into a single field
539                let thinking = if thinking_parts.is_empty() {
540                    None
541                } else {
542                    Some(thinking_parts.join("\n\n"))
543                };
544
545                // Add text message if any (with thinking attached)
546                if !text_parts.is_empty() || thinking.is_some() {
547                    messages.push(ConversationMessage {
548                        role: "assistant".to_string(),
549                        content: text_parts.join("\n\n"),
550                        tool: None,
551                        file: None,
552                        thinking,
553                    });
554                }
555
556                // Add tool uses
557                for (tool_name, file) in tool_uses {
558                    messages.push(ConversationMessage {
559                        role: "tool".to_string(),
560                        content: String::new(),
561                        tool: Some(tool_name),
562                        file,
563                        thinking: None,
564                    });
565                }
566            }
567        }
568    }
569
570    messages
571}
572
573/// Extract text content from a message value.
574fn extract_text_content(msg: &serde_json::Value) -> String {
575    if let Some(text) = msg.get("content").and_then(|c| c.as_str()) {
576        return text.to_string();
577    }
578    if let Some(blocks) = msg.get("content").and_then(|c| c.as_array()) {
579        let parts: Vec<&str> = blocks
580            .iter()
581            .filter_map(|b| {
582                if b.get("type").and_then(|t| t.as_str()) == Some("text") {
583                    b.get("text").and_then(|t| t.as_str())
584                } else {
585                    None
586                }
587            })
588            .collect();
589        return parts.join("\n");
590    }
591    String::new()
592}
593
594fn truncate_str(s: &str, max_len: usize) -> String {
595    if s.len() <= max_len {
596        s.to_string()
597    } else {
598        format!("{}...", &s[..max_len])
599    }
600}
601
602/// Copy a transcript file into `.git/chub/transcripts/` for local review.
603/// The copy is stored as `<session_id>.jsonl` inside the repo's `.git` directory
604/// so it won't be committed but is available for LLM review.
605pub fn archive_transcript_to_git(transcript_path: &Path, session_id: &str) -> Option<PathBuf> {
606    // Find the .git directory
607    let git_dir = find_git_dir()?;
608    let dest_dir = git_dir.join("chub").join("transcripts");
609    fs::create_dir_all(&dest_dir).ok()?;
610
611    let dest = dest_dir.join(format!("{}.jsonl", session_id));
612    fs::copy(transcript_path, &dest).ok()?;
613    Some(dest)
614}
615
616/// Find the `.git` directory for the current repo.
617fn find_git_dir() -> Option<PathBuf> {
618    let output = std::process::Command::new("git")
619        .args(["rev-parse", "--git-dir"])
620        .output()
621        .ok()?;
622    if !output.status.success() {
623        return None;
624    }
625    let path_str = String::from_utf8_lossy(&output.stdout).trim().to_string();
626    let p = PathBuf::from(&path_str);
627    if p.is_dir() {
628        Some(if p.is_absolute() {
629            p
630        } else {
631            std::env::current_dir().ok()?.join(p)
632        })
633    } else {
634        None
635    }
636}
637
638/// Get the byte size of a transcript file.
639pub fn transcript_size(path: &Path) -> i64 {
640    fs::metadata(path).map(|m| m.len() as i64).unwrap_or(0)
641}
642
643/// Count lines in a transcript file.
644pub fn transcript_line_count(path: &Path) -> i64 {
645    fs::read_to_string(path)
646        .map(|c| c.lines().count() as i64)
647        .unwrap_or(0)
648}
649
650// ---------------------------------------------------------------------------
651// Diff tracking
652// ---------------------------------------------------------------------------
653
654/// Get files modified since a given commit using git diff-tree.
655pub fn get_diff_files(base_commit: &str) -> DiffResult {
656    let mut result = DiffResult::default();
657
658    // Get diff against base commit
659    let output = std::process::Command::new("git")
660        .args(["diff", "--name-status", base_commit, "HEAD"])
661        .output();
662
663    if let Ok(output) = output {
664        let text = String::from_utf8_lossy(&output.stdout);
665        for line in text.lines() {
666            let parts: Vec<&str> = line.split('\t').collect();
667            if parts.len() >= 2 {
668                let status = parts[0];
669                let file = parts[1].to_string();
670                match status {
671                    "A" => result.new_files.push(file),
672                    "D" => result.deleted_files.push(file),
673                    _ => result.modified_files.push(file), // M, R, C, etc.
674                }
675            }
676        }
677    }
678
679    result
680}
681
682/// Calculate line attribution between two commits.
683pub fn calculate_attribution(base_commit: &str) -> Option<super::types::InitialAttribution> {
684    // Get total diff stats
685    let output = std::process::Command::new("git")
686        .args(["diff", "--numstat", base_commit, "HEAD"])
687        .output()
688        .ok()?;
689
690    let text = String::from_utf8_lossy(&output.stdout);
691    let mut agent_added: i64 = 0;
692    let mut agent_removed: i64 = 0;
693
694    for line in text.lines() {
695        let parts: Vec<&str> = line.split('\t').collect();
696        if parts.len() >= 3 {
697            if let (Ok(added), Ok(removed)) = (parts[0].parse::<i64>(), parts[1].parse::<i64>()) {
698                agent_added += added;
699                agent_removed += removed;
700            }
701        }
702    }
703
704    let total = agent_added;
705    let percentage = if total > 0 {
706        (agent_added as f64 / total as f64) * 100.0
707    } else {
708        0.0
709    };
710
711    Some(super::types::InitialAttribution {
712        calculated_at: crate::util::now_iso8601(),
713        agent_lines: agent_added,
714        human_added: 0,
715        human_modified: 0,
716        human_removed: agent_removed,
717        total_committed: total,
718        agent_percentage: percentage,
719    })
720}
721
722#[derive(Debug, Default)]
723pub struct DiffResult {
724    pub modified_files: Vec<String>,
725    pub new_files: Vec<String>,
726    pub deleted_files: Vec<String>,
727}
728
729impl DiffResult {
730    pub fn all_files(&self) -> Vec<String> {
731        let mut all = Vec::new();
732        all.extend(self.modified_files.iter().cloned());
733        all.extend(self.new_files.iter().cloned());
734        all.extend(self.deleted_files.iter().cloned());
735        all
736    }
737}
738
739#[cfg(test)]
740mod tests {
741    use super::*;
742
743    #[test]
744    fn sanitize_path() {
745        assert_eq!(
746            sanitize_path_for_claude("/home/user/my-project"),
747            "-home-user-my-project"
748        );
749        assert_eq!(
750            sanitize_path_for_claude("D:\\PWorkspaces\\Context\\chub"),
751            "D--PWorkspaces-Context-chub"
752        );
753    }
754
755    #[test]
756    fn parse_empty_transcript() {
757        let dir = std::env::temp_dir().join("chub-test-transcript");
758        let _ = fs::create_dir_all(&dir);
759        let path = dir.join("empty.jsonl");
760        let _ = fs::write(&path, "");
761        let analysis = parse_transcript(&path);
762        assert!(analysis.token_usage.is_empty());
763        assert_eq!(analysis.turn_count, 0);
764        let _ = fs::remove_dir_all(&dir);
765    }
766
767    #[test]
768    fn parse_transcript_with_usage() {
769        let dir = std::env::temp_dir().join("chub-test-transcript2");
770        let _ = fs::create_dir_all(&dir);
771        let path = dir.join("test.jsonl");
772
773        let content = r#"{"type":"user","uuid":"u1","message":{"role":"user","content":"hello"}}
774{"type":"assistant","uuid":"a1","message":{"id":"msg_1","role":"assistant","content":[{"type":"text","text":"hi"}],"usage":{"input_tokens":100,"output_tokens":50,"cache_read_input_tokens":10,"cache_creation_input_tokens":5}}}
775{"type":"user","uuid":"u2","message":{"role":"user","content":"edit file"}}
776{"type":"assistant","uuid":"a2","message":{"id":"msg_2","role":"assistant","content":[{"type":"tool_use","name":"Write","input":{"file_path":"/src/main.rs","content":"fn main()"}}],"usage":{"input_tokens":200,"output_tokens":100}}}
777"#;
778        let _ = fs::write(&path, content);
779        let analysis = parse_transcript(&path);
780        assert_eq!(analysis.token_usage.input_tokens, 300);
781        assert_eq!(analysis.token_usage.output_tokens, 150);
782        assert_eq!(analysis.token_usage.cache_read_tokens, 10);
783        assert_eq!(analysis.token_usage.api_call_count, 2);
784        assert_eq!(analysis.turn_count, 2);
785        assert!(analysis
786            .modified_files
787            .contains(&"/src/main.rs".to_string()));
788        let _ = fs::remove_dir_all(&dir);
789    }
790
791    #[test]
792    fn parse_thinking_blocks() {
793        let dir = std::env::temp_dir().join("chub-test-thinking");
794        let _ = fs::create_dir_all(&dir);
795        let path = dir.join("thinking.jsonl");
796
797        let content = r#"{"type":"user","uuid":"u1","message":{"role":"user","content":"analyze this"}}
798{"type":"assistant","uuid":"a1","message":{"id":"msg_1","role":"assistant","model":"claude-opus-4-6","content":[{"type":"thinking","thinking":"Let me analyze the code structure first."},{"type":"text","text":"I'll analyze this for you."},{"type":"tool_use","name":"Read","input":{"file_path":"src/main.rs"}}],"usage":{"input_tokens":5000,"output_tokens":2000,"reasoning_tokens":3500}}}
799{"type":"user","uuid":"u2","message":{"role":"user","content":"refactor it"}}
800{"type":"assistant","uuid":"a2","message":{"id":"msg_2","role":"assistant","content":[{"type":"thinking","thinking":"The user wants a refactor."},{"type":"text","text":"Refactoring now."}],"usage":{"input_tokens":8000,"output_tokens":3000,"reasoning_tokens":2000}}}
801"#;
802        let _ = fs::write(&path, content);
803        let analysis = parse_transcript(&path);
804
805        // Verify reasoning tokens extracted
806        assert_eq!(analysis.token_usage.reasoning_tokens, 5500);
807        assert!(analysis.has_extended_thinking);
808
809        // Verify thinking blocks captured
810        assert_eq!(analysis.thinking_block_count, 2);
811        assert_eq!(analysis.thinking_blocks.len(), 2);
812        assert!(analysis.thinking_blocks[0]
813            .content
814            .contains("analyze the code"));
815        assert!(analysis.thinking_blocks[1]
816            .content
817            .contains("wants a refactor"));
818
819        // Verify other tokens still correct
820        assert_eq!(analysis.token_usage.input_tokens, 13000);
821        assert_eq!(analysis.token_usage.output_tokens, 5000);
822        assert_eq!(analysis.model, Some("claude-opus-4-6".to_string()));
823
824        let _ = fs::remove_dir_all(&dir);
825    }
826
827    #[test]
828    fn parse_conversation_with_thinking() {
829        let dir = std::env::temp_dir().join("chub-test-conv-thinking");
830        let _ = fs::create_dir_all(&dir);
831        let path = dir.join("conv.jsonl");
832
833        let content = r#"{"type":"user","uuid":"u1","message":{"role":"user","content":"hello"}}
834{"type":"assistant","uuid":"a1","message":{"id":"msg_1","role":"assistant","content":[{"type":"thinking","thinking":"User said hello, I should greet back."},{"type":"text","text":"Hi there!"}],"usage":{"input_tokens":100,"output_tokens":50}}}
835"#;
836        let _ = fs::write(&path, content);
837        let messages = parse_conversation(&path);
838
839        assert_eq!(messages.len(), 2);
840        // User message has no thinking
841        assert!(messages[0].thinking.is_none());
842        // Assistant message has thinking
843        assert!(messages[1].thinking.is_some());
844        assert!(messages[1]
845            .thinking
846            .as_ref()
847            .unwrap()
848            .contains("greet back"));
849        assert_eq!(messages[1].content, "Hi there!");
850
851        let _ = fs::remove_dir_all(&dir);
852    }
853
854    #[test]
855    fn dedup_by_message_id() {
856        let dir = std::env::temp_dir().join("chub-test-dedup");
857        let _ = fs::create_dir_all(&dir);
858        let path = dir.join("dedup.jsonl");
859
860        // Same message.id appears twice (streaming update)
861        let content = r#"{"type":"assistant","uuid":"a1","message":{"id":"msg_1","role":"assistant","content":[],"usage":{"input_tokens":100,"output_tokens":50}}}
862{"type":"assistant","uuid":"a1-update","message":{"id":"msg_1","role":"assistant","content":[],"usage":{"input_tokens":100,"output_tokens":80}}}
863"#;
864        let _ = fs::write(&path, content);
865        let analysis = parse_transcript(&path);
866        // Should use the LAST value for msg_1 (80, not 50)
867        assert_eq!(analysis.token_usage.output_tokens, 80);
868        assert_eq!(analysis.token_usage.api_call_count, 1); // deduplicated
869        let _ = fs::remove_dir_all(&dir);
870    }
871}