Skip to main content

normalize_chat_sessions/formats/
claude_code.rs

1//! Claude Code JSONL format parser.
2
3use super::{
4    LogFormat, ParseError, SessionFile, list_jsonl_sessions, list_subagent_sessions, peek_lines,
5};
6use crate::{ContentBlock, Message, Role, Session, TokenUsage, Turn};
7use serde_json::Value;
8use std::collections::HashMap;
9use std::fs::File;
10use std::io::{BufRead, BufReader};
11use std::path::{Path, PathBuf};
12
13/// Claude Code session log format (JSONL).
14pub struct ClaudeCodeFormat;
15
16impl LogFormat for ClaudeCodeFormat {
17    fn name(&self) -> &'static str {
18        "claude"
19    }
20
21    fn sessions_dir(&self, project: Option<&Path>) -> PathBuf {
22        let claude_dir = if let Ok(dir) = std::env::var("CLAUDE_SESSIONS_DIR") {
23            PathBuf::from(dir)
24        } else {
25            let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into());
26            PathBuf::from(home).join(".claude/projects")
27        };
28
29        // Claude encodes project paths - check which encoding variant exists
30        let path_to_claude_dir = |path: &Path| -> PathBuf {
31            let raw = path.to_string_lossy();
32            let path_str = raw.trim_end_matches('/').replace('/', "-");
33            // Try with leading dash first (Claude's format)
34            let proj_dir = claude_dir.join(format!("-{}", path_str.trim_start_matches('-')));
35            if proj_dir.exists() {
36                return proj_dir;
37            }
38            // Try without leading dash
39            let proj_dir = claude_dir.join(&path_str);
40            if proj_dir.exists() {
41                return proj_dir;
42            }
43            // Return primary format even if it doesn't exist yet
44            claude_dir.join(format!("-{}", path_str.trim_start_matches('-')))
45        };
46
47        if let Some(proj) = project {
48            return path_to_claude_dir(proj);
49        }
50
51        // Discover git root via gix (no PATH dependency on git binary)
52        if let Ok(cwd) = std::env::current_dir()
53            && let Ok(repo) = gix::discover(&cwd)
54            && let Some(worktree) = repo.workdir()
55        {
56            return path_to_claude_dir(worktree);
57        }
58
59        if let Ok(cwd) = std::env::current_dir() {
60            return path_to_claude_dir(&cwd);
61        }
62
63        claude_dir
64    }
65
66    fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> {
67        list_jsonl_sessions(&self.sessions_dir(project))
68    }
69
70    fn list_subagent_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> {
71        list_subagent_sessions(&self.sessions_dir(project))
72    }
73
74    fn detect(&self, path: &Path) -> f64 {
75        // Check extension
76        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
77        if ext != "jsonl" {
78            return 0.0;
79        }
80
81        // Peek at first few lines
82        for line in peek_lines(path, 5) {
83            if let Ok(entry) = serde_json::from_str::<Value>(&line) {
84                // Claude Code has type field with specific values
85                if let Some(t) = entry.get("type").and_then(|v| v.as_str())
86                    && matches!(
87                        t,
88                        "user" | "assistant" | "summary" | "file-history-snapshot"
89                    )
90                {
91                    return 1.0;
92                }
93            }
94        }
95        0.0
96    }
97
98    fn parse(&self, path: &Path) -> Result<Session, ParseError> {
99        let file = File::open(path).map_err(|e| ParseError::Io {
100            path: path.to_path_buf(),
101            source: e,
102        })?;
103        let reader = BufReader::new(file);
104
105        let mut session = Session::new(path.to_path_buf(), self.name());
106        let mut current_turn = Turn::default();
107        let mut request_tokens: HashMap<String, TokenUsage> = HashMap::new();
108        // All requestIds seen in the current turn (one per API call; multi-round turns
109        // have multiple calls: tool-call round 1, tool-call round 2, ..., final answer).
110        let mut turn_request_ids: Vec<String> = Vec::new();
111
112        for line in reader.lines() {
113            let line = line.map_err(|e| ParseError::Io {
114                path: path.to_path_buf(),
115                source: e,
116            })?;
117            if line.trim().is_empty() {
118                continue;
119            }
120
121            let Ok(entry) = serde_json::from_str::<Value>(&line) else {
122                continue;
123            };
124
125            let Some(entry_type) = entry.get("type").and_then(|v| v.as_str()) else {
126                continue;
127            };
128
129            match entry_type {
130                "user" => {
131                    // isMeta: true = caveat/context injections by Claude Code itself (not human input)
132                    let is_meta = entry
133                        .get("isMeta")
134                        .and_then(|v| v.as_bool())
135                        .unwrap_or(false);
136                    // Compaction summary injections: bare string starting with the continuation prefix
137                    let content_str = entry
138                        .get("message")
139                        .and_then(|m| m.get("content"))
140                        .and_then(|c| c.as_str());
141                    let is_compaction_summary = content_str
142                        .is_some_and(|s| s.starts_with("This session is being continued"));
143                    // Treat these as system-role messages so they don't appear in user output
144                    let role = if is_meta || is_compaction_summary {
145                        Role::System
146                    } else {
147                        Role::User
148                    };
149                    let message = parse_message(&entry, role);
150                    // Tool result messages are structurally "user" in Claude Code's format
151                    // but semantically they are tool responses, not human input.
152                    let is_tool_result = !message.content.is_empty()
153                        && message
154                            .content
155                            .iter()
156                            .all(|b| matches!(b, ContentBlock::ToolResult { .. }));
157
158                    if is_tool_result {
159                        // Tool results belong to the current turn, not a new one
160                        let mut tool_msg = message;
161                        tool_msg.role = Role::Tool;
162                        current_turn.messages.push(tool_msg);
163                    } else {
164                        // Flush previous turn if we have messages
165                        if !current_turn.messages.is_empty() {
166                            current_turn.token_usage =
167                                sum_turn_tokens(&turn_request_ids, &mut request_tokens);
168                            turn_request_ids.clear();
169                            session.turns.push(std::mem::take(&mut current_turn));
170                        }
171                        current_turn.messages.push(message);
172                    }
173                }
174                "assistant" => {
175                    let request_id = entry
176                        .get("requestId")
177                        .and_then(|v| v.as_str())
178                        .map(String::from);
179
180                    // Extract per-turn model
181                    let turn_model = entry
182                        .get("message")
183                        .and_then(|m| m.get("model"))
184                        .and_then(|v| v.as_str())
185                        .map(String::from);
186
187                    // Extract token usage (take max per request due to streaming)
188                    if let Some(usage) = entry.get("message").and_then(|m| m.get("usage")) {
189                        let tokens = TokenUsage {
190                            input: usage
191                                .get("input_tokens")
192                                .and_then(|v| v.as_u64())
193                                .unwrap_or(0),
194                            output: usage
195                                .get("output_tokens")
196                                .and_then(|v| v.as_u64())
197                                .unwrap_or(0),
198                            cache_read: usage
199                                .get("cache_read_input_tokens")
200                                .and_then(|v| v.as_u64()),
201                            cache_create: usage
202                                .get("cache_creation_input_tokens")
203                                .and_then(|v| v.as_u64()),
204                            model: turn_model.clone(),
205                        };
206                        if let Some(ref req_id) = request_id {
207                            let existing = request_tokens.entry(req_id.clone()).or_default();
208                            existing.input = existing.input.max(tokens.input);
209                            existing.output = existing.output.max(tokens.output);
210                            if let Some(cr) = tokens.cache_read {
211                                *existing.cache_read.get_or_insert(0) =
212                                    existing.cache_read.unwrap_or(0).max(cr);
213                            }
214                            if let Some(cc) = tokens.cache_create {
215                                *existing.cache_create.get_or_insert(0) =
216                                    existing.cache_create.unwrap_or(0).max(cc);
217                            }
218                            if tokens.model.is_some() {
219                                existing.model = tokens.model;
220                            }
221                        }
222                    }
223
224                    // Extract model from first assistant message
225                    if session.metadata.model.is_none() {
226                        session.metadata.model = entry
227                            .get("message")
228                            .and_then(|m| m.get("model"))
229                            .and_then(|v| v.as_str())
230                            .map(String::from);
231                    }
232
233                    let message = parse_message(&entry, Role::Assistant);
234                    current_turn.messages.push(message);
235                    if let Some(req_id) = request_id
236                        && !turn_request_ids.contains(&req_id)
237                    {
238                        turn_request_ids.push(req_id);
239                    }
240                }
241                "summary" => {
242                    // Extract session metadata from summary
243                    if session.metadata.session_id.is_none() {
244                        session.metadata.session_id = entry
245                            .get("sessionId")
246                            .and_then(|v| v.as_str())
247                            .map(String::from);
248                    }
249                    // Extract timestamp
250                    if session.metadata.timestamp.is_none() {
251                        session.metadata.timestamp = entry
252                            .get("timestamp")
253                            .and_then(|v| v.as_str())
254                            .map(String::from);
255                    }
256                }
257                _ => {}
258            }
259        }
260
261        // Flush final turn
262        if !current_turn.messages.is_empty() {
263            current_turn.token_usage = sum_turn_tokens(&turn_request_ids, &mut request_tokens);
264            session.turns.push(current_turn);
265        }
266
267        // Set provider
268        session.metadata.provider = Some("anthropic".to_string());
269
270        // Detect subagent metadata from the file path and first entry's fields.
271        // Subagent files live at <session-uuid>/subagents/agent-<id>.jsonl
272        if let Some(stem) = path.file_stem().and_then(|s| s.to_str())
273            && stem.starts_with("agent-")
274        {
275            session.agent_id = Some(stem.to_string());
276            // Parent ID from the grandparent directory name (the session UUID)
277            if let Some(parent_dir) = path.parent().and_then(|p| p.parent())
278                && let Some(parent_name) = parent_dir.file_name().and_then(|n| n.to_str())
279            {
280                session.parent_id = Some(parent_name.to_string());
281            }
282            // Read companion .meta.json for agent type, default to "subagent"
283            let meta_path = path.with_extension("meta.json");
284            session.subagent_type = Some(
285                std::fs::read_to_string(&meta_path)
286                    .ok()
287                    .and_then(|s| serde_json::from_str::<Value>(&s).ok())
288                    .and_then(|v| {
289                        v.get("agentType")
290                            .and_then(|t| t.as_str())
291                            .map(String::from)
292                    })
293                    .unwrap_or_else(|| "subagent".into()),
294            );
295        } else {
296            session.subagent_type = Some("interactive".into());
297        }
298
299        Ok(session)
300    }
301}
302
303/// Parse a JSONL entry into a Message.
304/// Sum token usage across all API calls in a turn.
305///
306/// A single user-prompt turn may involve multiple API calls (e.g. tool-call
307/// rounds before the final answer). Each call has its own `requestId` and its
308/// own `usage` entry. We sum them so `Turn::token_usage` reflects the full cost
309/// of the turn, not just the last API call.
310fn sum_turn_tokens(
311    ids: &[String],
312    request_tokens: &mut HashMap<String, TokenUsage>,
313) -> Option<TokenUsage> {
314    if ids.is_empty() {
315        return None;
316    }
317    let mut total = TokenUsage::default();
318    let mut any = false;
319    for id in ids {
320        if let Some(u) = request_tokens.remove(id) {
321            total.input += u.input;
322            total.output += u.output;
323            if let Some(cr) = u.cache_read {
324                *total.cache_read.get_or_insert(0) += cr;
325            }
326            if let Some(cc) = u.cache_create {
327                *total.cache_create.get_or_insert(0) += cc;
328            }
329            // Use the model from the last API call (most likely the final answer)
330            if u.model.is_some() {
331                total.model = u.model;
332            }
333            any = true;
334        }
335    }
336    any.then_some(total)
337}
338
339fn parse_message(entry: &Value, role: Role) -> Message {
340    let mut content_blocks = Vec::new();
341
342    // Content can be a bare string (human-typed prompts) or an array of content blocks
343    // (tool results, assistant text blocks, etc.)
344    let content_value = entry.get("message").and_then(|m| m.get("content"));
345
346    if let Some(text) = content_value.and_then(|c| c.as_str()) {
347        if !text.is_empty() {
348            content_blocks.push(ContentBlock::Text {
349                text: text.to_string(),
350            });
351        }
352    } else if let Some(content) = content_value.and_then(|c| c.as_array()) {
353        for block in content {
354            let block_type = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
355
356            match block_type {
357                "text" => {
358                    if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
359                        content_blocks.push(ContentBlock::Text {
360                            text: text.to_string(),
361                        });
362                    }
363                }
364                "tool_use" => {
365                    let id = block
366                        .get("id")
367                        .and_then(|v| v.as_str())
368                        .unwrap_or("")
369                        .to_string();
370                    let name = block
371                        .get("name")
372                        .and_then(|v| v.as_str())
373                        .unwrap_or("")
374                        .to_string();
375                    let input = block.get("input").cloned().unwrap_or(Value::Null);
376                    content_blocks.push(ContentBlock::ToolUse { id, name, input });
377                }
378                "tool_result" => {
379                    let tool_use_id = block
380                        .get("tool_use_id")
381                        .and_then(|v| v.as_str())
382                        .unwrap_or("")
383                        .to_string();
384                    let result_content = match block.get("content") {
385                        Some(v) if v.is_string() => v.as_str().unwrap_or("").to_string(),
386                        Some(v) => v
387                            .as_array()
388                            .map(|arr| {
389                                arr.iter()
390                                    .filter_map(|b| b.get("text").and_then(|t| t.as_str()))
391                                    .collect::<Vec<_>>()
392                                    .join("\n")
393                            })
394                            .unwrap_or_default(),
395                        _ => String::new(),
396                    };
397                    let is_error = block
398                        .get("is_error")
399                        .and_then(|v| v.as_bool())
400                        .unwrap_or(false);
401                    content_blocks.push(ContentBlock::ToolResult {
402                        tool_use_id,
403                        content: result_content,
404                        is_error,
405                    });
406                }
407                "thinking" => {
408                    if let Some(text) = block.get("thinking").and_then(|v| v.as_str()) {
409                        content_blocks.push(ContentBlock::Thinking {
410                            text: text.to_string(),
411                        });
412                    }
413                }
414                _ => {}
415            }
416        }
417    }
418
419    Message {
420        role,
421        content: content_blocks,
422        timestamp: entry
423            .get("timestamp")
424            .and_then(|v| v.as_str())
425            .map(String::from),
426    }
427}