Skip to main content

normalize_chat_sessions/formats/
claude_code.rs

1//! Claude Code JSONL format parser.
2
3use super::{LogFormat, SessionFile, list_jsonl_sessions, peek_lines};
4use crate::{ContentBlock, Message, Role, Session, TokenUsage, Turn};
5use serde_json::Value;
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11/// Claude Code session log format (JSONL).
12pub struct ClaudeCodeFormat;
13
14impl LogFormat for ClaudeCodeFormat {
15    fn name(&self) -> &'static str {
16        "claude"
17    }
18
19    fn sessions_dir(&self, project: Option<&Path>) -> PathBuf {
20        let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into());
21        let claude_dir = PathBuf::from(home).join(".claude/projects");
22
23        // Claude encodes project paths - check which encoding variant exists
24        let path_to_claude_dir = |path: &Path| -> PathBuf {
25            let path_str = path.to_string_lossy().replace('/', "-");
26            // Try with leading dash first (Claude's format)
27            let proj_dir = claude_dir.join(format!("-{}", path_str.trim_start_matches('-')));
28            if proj_dir.exists() {
29                return proj_dir;
30            }
31            // Try without leading dash
32            let proj_dir = claude_dir.join(&path_str);
33            if proj_dir.exists() {
34                return proj_dir;
35            }
36            // Return primary format even if it doesn't exist yet
37            claude_dir.join(format!("-{}", path_str.trim_start_matches('-')))
38        };
39
40        if let Some(proj) = project {
41            return path_to_claude_dir(proj);
42        }
43
44        if let Ok(output) = std::process::Command::new("git")
45            .args(["rev-parse", "--show-toplevel"])
46            .output()
47        {
48            if output.status.success() {
49                return path_to_claude_dir(Path::new(
50                    String::from_utf8_lossy(&output.stdout).trim(),
51                ));
52            }
53        }
54
55        if let Ok(cwd) = std::env::current_dir() {
56            return path_to_claude_dir(&cwd);
57        }
58
59        claude_dir
60    }
61
62    fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> {
63        list_jsonl_sessions(&self.sessions_dir(project))
64    }
65
66    fn detect(&self, path: &Path) -> f64 {
67        // Check extension
68        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
69        if ext != "jsonl" {
70            return 0.0;
71        }
72
73        // Peek at first few lines
74        for line in peek_lines(path, 5) {
75            if let Ok(entry) = serde_json::from_str::<Value>(&line) {
76                // Claude Code has type field with specific values
77                if let Some(t) = entry.get("type").and_then(|v| v.as_str()) {
78                    if matches!(
79                        t,
80                        "user" | "assistant" | "summary" | "file-history-snapshot"
81                    ) {
82                        return 1.0;
83                    }
84                }
85            }
86        }
87        0.0
88    }
89
90    fn parse(&self, path: &Path) -> Result<Session, String> {
91        let file = File::open(path).map_err(|e| e.to_string())?;
92        let reader = BufReader::new(file);
93
94        let mut session = Session::new(path.to_path_buf(), self.name());
95        let mut current_turn = Turn::default();
96        let mut request_tokens: HashMap<String, TokenUsage> = HashMap::new();
97        let mut last_request_id: Option<String> = None;
98
99        for line in reader.lines() {
100            let line = line.map_err(|e| e.to_string())?;
101            if line.trim().is_empty() {
102                continue;
103            }
104
105            let Ok(entry) = serde_json::from_str::<Value>(&line) else {
106                continue;
107            };
108
109            let Some(entry_type) = entry.get("type").and_then(|v| v.as_str()) else {
110                continue;
111            };
112
113            match entry_type {
114                "user" => {
115                    // Flush previous turn if we have messages
116                    if !current_turn.messages.is_empty() {
117                        // Attach token usage from the last request
118                        if let Some(req_id) = &last_request_id {
119                            if let Some(usage) = request_tokens.remove(req_id) {
120                                current_turn.token_usage = Some(usage);
121                            }
122                        }
123                        session.turns.push(std::mem::take(&mut current_turn));
124                    }
125
126                    let message = parse_message(&entry, Role::User);
127                    current_turn.messages.push(message);
128                }
129                "assistant" => {
130                    let request_id = entry
131                        .get("requestId")
132                        .and_then(|v| v.as_str())
133                        .map(String::from);
134
135                    // Extract token usage (take max per request due to streaming)
136                    if let Some(usage) = entry.get("message").and_then(|m| m.get("usage")) {
137                        let tokens = TokenUsage {
138                            input: usage
139                                .get("input_tokens")
140                                .and_then(|v| v.as_u64())
141                                .unwrap_or(0),
142                            output: usage
143                                .get("output_tokens")
144                                .and_then(|v| v.as_u64())
145                                .unwrap_or(0),
146                            cache_read: usage
147                                .get("cache_read_input_tokens")
148                                .and_then(|v| v.as_u64()),
149                            cache_create: usage
150                                .get("cache_creation_input_tokens")
151                                .and_then(|v| v.as_u64()),
152                        };
153                        if let Some(ref req_id) = request_id {
154                            let existing = request_tokens.entry(req_id.clone()).or_default();
155                            existing.input = existing.input.max(tokens.input);
156                            existing.output = existing.output.max(tokens.output);
157                            if let Some(cr) = tokens.cache_read {
158                                *existing.cache_read.get_or_insert(0) =
159                                    existing.cache_read.unwrap_or(0).max(cr);
160                            }
161                            if let Some(cc) = tokens.cache_create {
162                                *existing.cache_create.get_or_insert(0) =
163                                    existing.cache_create.unwrap_or(0).max(cc);
164                            }
165                        }
166                    }
167
168                    // Extract model from first assistant message
169                    if session.metadata.model.is_none() {
170                        session.metadata.model = entry
171                            .get("message")
172                            .and_then(|m| m.get("model"))
173                            .and_then(|v| v.as_str())
174                            .map(String::from);
175                    }
176
177                    let message = parse_message(&entry, Role::Assistant);
178                    current_turn.messages.push(message);
179                    last_request_id = request_id;
180                }
181                "summary" => {
182                    // Extract session metadata from summary
183                    if session.metadata.session_id.is_none() {
184                        session.metadata.session_id = entry
185                            .get("sessionId")
186                            .and_then(|v| v.as_str())
187                            .map(String::from);
188                    }
189                    // Extract timestamp
190                    if session.metadata.timestamp.is_none() {
191                        session.metadata.timestamp = entry
192                            .get("timestamp")
193                            .and_then(|v| v.as_str())
194                            .map(String::from);
195                    }
196                }
197                _ => {}
198            }
199        }
200
201        // Flush final turn
202        if !current_turn.messages.is_empty() {
203            if let Some(req_id) = &last_request_id {
204                if let Some(usage) = request_tokens.remove(req_id) {
205                    current_turn.token_usage = Some(usage);
206                }
207            }
208            session.turns.push(current_turn);
209        }
210
211        // Set provider
212        session.metadata.provider = Some("anthropic".to_string());
213
214        Ok(session)
215    }
216}
217
218/// Parse a JSONL entry into a Message.
219fn parse_message(entry: &Value, role: Role) -> Message {
220    let mut content_blocks = Vec::new();
221
222    // Content can be a bare string (human-typed prompts) or an array of content blocks
223    // (tool results, assistant text blocks, etc.)
224    let content_value = entry.get("message").and_then(|m| m.get("content"));
225
226    if let Some(text) = content_value.and_then(|c| c.as_str()) {
227        if !text.is_empty() {
228            content_blocks.push(ContentBlock::Text {
229                text: text.to_string(),
230            });
231        }
232    } else if let Some(content) = content_value.and_then(|c| c.as_array()) {
233        for block in content {
234            let block_type = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
235
236            match block_type {
237                "text" => {
238                    if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
239                        content_blocks.push(ContentBlock::Text {
240                            text: text.to_string(),
241                        });
242                    }
243                }
244                "tool_use" => {
245                    let id = block
246                        .get("id")
247                        .and_then(|v| v.as_str())
248                        .unwrap_or("")
249                        .to_string();
250                    let name = block
251                        .get("name")
252                        .and_then(|v| v.as_str())
253                        .unwrap_or("")
254                        .to_string();
255                    let input = block.get("input").cloned().unwrap_or(Value::Null);
256                    content_blocks.push(ContentBlock::ToolUse { id, name, input });
257                }
258                "tool_result" => {
259                    let tool_use_id = block
260                        .get("tool_use_id")
261                        .and_then(|v| v.as_str())
262                        .unwrap_or("")
263                        .to_string();
264                    let result_content = block
265                        .get("content")
266                        .and_then(|v| v.as_str())
267                        .unwrap_or("")
268                        .to_string();
269                    let is_error = block
270                        .get("is_error")
271                        .and_then(|v| v.as_bool())
272                        .unwrap_or(false);
273                    content_blocks.push(ContentBlock::ToolResult {
274                        tool_use_id,
275                        content: result_content,
276                        is_error,
277                    });
278                }
279                "thinking" => {
280                    if let Some(text) = block.get("thinking").and_then(|v| v.as_str()) {
281                        content_blocks.push(ContentBlock::Thinking {
282                            text: text.to_string(),
283                        });
284                    }
285                }
286                _ => {}
287            }
288        }
289    }
290
291    Message {
292        role,
293        content: content_blocks,
294        timestamp: entry
295            .get("timestamp")
296            .and_then(|v| v.as_str())
297            .map(String::from),
298    }
299}