Skip to main content

agent_teams/util/
session_discovery.rs

1//! Auto-discovery of Claude Code JSONL session files.
2//!
3//! Claude Code stores session logs at `~/.claude/projects/{encoded-path}/`
4//! where path encoding replaces `/` with `-`. For example:
5//! `/Users/alex/myproject` → `-Users-alex-myproject`
6//!
7//! This module auto-discovers matching session files, parses token usage,
8//! and aggregates costs — eliminating the need for `--session-jsonl PATH`.
9
10use std::path::{Path, PathBuf};
11
12use chrono::{DateTime, Utc};
13
14use crate::error::{Error, Result};
15use crate::models::token::{
16    AgentTokenUsage, CostSummary, TokenUsage, ToolCallRecord,
17    estimate_cost, truncate_string, MAX_TOOL_INPUT_SUMMARY_LEN,
18};
19
20/// A discovered JSONL session file.
21#[derive(Debug, Clone)]
22pub struct SessionFile {
23    /// Full path to the JSONL file.
24    pub path: PathBuf,
25    /// Session UUID (extracted from filename).
26    pub session_id: String,
27    /// File modification time.
28    pub modified: Option<DateTime<Utc>>,
29    /// File size in bytes.
30    pub size: u64,
31}
32
33/// Encode a project path the way Claude Code does: replace `/` with `-`.
34///
35/// `/Users/alex/myproject` → `-Users-alex-myproject`
36pub fn encode_project_path(path: &Path) -> String {
37    let s = path.to_string_lossy();
38    s.replace('/', "-")
39}
40
41/// Discover JSONL session files for a given repository path.
42///
43/// Looks in `~/.claude/projects/{encoded-path}/` for `*.jsonl` files.
44/// Returns an empty vec if the directory doesn't exist or can't be read.
45pub fn discover_sessions(repo_path: &Path) -> Vec<SessionFile> {
46    let Some(home) = dirs::home_dir() else {
47        return Vec::new();
48    };
49
50    // Canonicalize the repo path for consistent encoding
51    let canonical = repo_path.canonicalize().unwrap_or_else(|_| repo_path.to_path_buf());
52    let encoded = encode_project_path(&canonical);
53    let projects_dir = home.join(".claude").join("projects").join(&encoded);
54
55    discover_sessions_in(&projects_dir)
56}
57
58/// Discover JSONL session files in a specific directory.
59fn discover_sessions_in(dir: &Path) -> Vec<SessionFile> {
60    let entries = match std::fs::read_dir(dir) {
61        Ok(e) => e,
62        Err(_) => return Vec::new(),
63    };
64
65    let mut sessions = Vec::new();
66
67    for entry in entries.flatten() {
68        let path = entry.path();
69        let name = entry.file_name();
70        let name = name.to_string_lossy();
71
72        if let Some(stem) = name.strip_suffix(".jsonl") {
73            let metadata = entry.metadata().ok();
74            let modified = metadata.as_ref().and_then(|m| {
75                m.modified().ok().map(|t| {
76                    DateTime::<Utc>::from(t)
77                })
78            });
79            let size = metadata.map(|m| m.len()).unwrap_or(0);
80
81            sessions.push(SessionFile {
82                path,
83                session_id: stem.to_string(),
84                modified,
85                size,
86            });
87        }
88    }
89
90    // Sort by modification time (newest first)
91    sessions.sort_by(|a, b| b.modified.cmp(&a.modified));
92
93    sessions
94}
95
96/// Parse a Claude Code JSONL session file for token usage and tool calls.
97///
98/// This replicates the parsing logic from `CheckpointCollector::parse_jsonl_session`
99/// but works independently of the checkpoint feature.
100pub fn parse_session_file(
101    path: &Path,
102) -> Result<(Vec<ToolCallRecord>, Option<TokenUsage>)> {
103    let content = std::fs::read_to_string(path).map_err(|e| Error::Other(
104        format!("Failed to read JSONL session file {}: {e}", path.display()),
105    ))?;
106
107    let mut tool_calls = Vec::new();
108    let mut total_input_tokens: u64 = 0;
109    let mut total_output_tokens: u64 = 0;
110    let mut total_cache_read: u64 = 0;
111    let mut total_cache_write: u64 = 0;
112    let mut has_usage = false;
113
114    for line in content.lines() {
115        let line = line.trim();
116        if line.is_empty() {
117            continue;
118        }
119
120        let value: serde_json::Value = match serde_json::from_str(line) {
121            Ok(v) => v,
122            Err(_) => continue,
123        };
124
125        // Extract tool calls (tool_name format)
126        if let Some(tool_name) = value.get("tool_name").and_then(|v| v.as_str()) {
127            let input_summary = value
128                .get("tool_input")
129                .map(|v| truncate_string(&v.to_string(), MAX_TOOL_INPUT_SUMMARY_LEN));
130
131            let timestamp = value
132                .get("timestamp")
133                .and_then(|v| v.as_str())
134                .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
135                .map(|dt| dt.with_timezone(&chrono::Utc));
136
137            tool_calls.push(ToolCallRecord {
138                tool_name: tool_name.to_string(),
139                input_summary,
140                timestamp,
141            });
142        }
143
144        // Also check for "type": "tool_use" format (Claude API format)
145        if value.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
146            if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
147                let input_summary = value
148                    .get("input")
149                    .map(|v| truncate_string(&v.to_string(), MAX_TOOL_INPUT_SUMMARY_LEN));
150
151                tool_calls.push(ToolCallRecord {
152                    tool_name: name.to_string(),
153                    input_summary,
154                    timestamp: None,
155                });
156            }
157        }
158
159        // Extract token usage
160        if let Some(usage) = value.get("usage") {
161            has_usage = true;
162            if let Some(n) = usage.get("input_tokens").and_then(|v| v.as_u64()) {
163                total_input_tokens += n;
164            }
165            if let Some(n) = usage.get("output_tokens").and_then(|v| v.as_u64()) {
166                total_output_tokens += n;
167            }
168            if let Some(n) = usage.get("cache_read_input_tokens").and_then(|v| v.as_u64()) {
169                total_cache_read += n;
170            }
171            if let Some(n) = usage.get("cache_creation_input_tokens").and_then(|v| v.as_u64()) {
172                total_cache_write += n;
173            }
174        }
175    }
176
177    let token_usage = if has_usage {
178        Some(TokenUsage {
179            input_tokens: total_input_tokens,
180            output_tokens: total_output_tokens,
181            cache_read_tokens: if total_cache_read > 0 {
182                Some(total_cache_read)
183            } else {
184                None
185            },
186            cache_write_tokens: if total_cache_write > 0 {
187                Some(total_cache_write)
188            } else {
189                None
190            },
191        })
192    } else {
193        None
194    };
195
196    Ok((tool_calls, token_usage))
197}
198
199/// Parse only the token usage from a JSONL session file (ignores tool calls).
200pub fn parse_token_usage(path: &Path) -> Result<Option<TokenUsage>> {
201    let (_, usage) = parse_session_file(path)?;
202    Ok(usage)
203}
204
205/// Aggregate costs across multiple session files.
206///
207/// If `agent_name` is provided, all usage is attributed to that agent.
208/// Otherwise, tries to extract agent names from session files.
209pub fn aggregate_cost(
210    sessions: &[SessionFile],
211    agent_name: Option<&str>,
212) -> Result<CostSummary> {
213    let mut total_usage = TokenUsage {
214        input_tokens: 0,
215        output_tokens: 0,
216        cache_read_tokens: None,
217        cache_write_tokens: None,
218    };
219    let mut per_agent: Vec<AgentTokenUsage> = Vec::new();
220    let mut session_count = 0;
221
222    for session in sessions {
223        match parse_token_usage(&session.path)? {
224            Some(usage) => {
225                total_usage.merge(&usage);
226                session_count += 1;
227
228                let name = agent_name
229                    .map(String::from)
230                    .unwrap_or_else(|| extract_agent_name(&session.path));
231
232                // Merge into per_agent
233                if let Some(existing) = per_agent.iter_mut().find(|a| a.agent_name == name) {
234                    existing.usage.merge(&usage);
235                } else {
236                    per_agent.push(AgentTokenUsage {
237                        agent_name: name,
238                        usage,
239                    });
240                }
241            }
242            None => continue,
243        }
244    }
245
246    let estimated_cost_usd = estimate_cost(&total_usage);
247
248    Ok(CostSummary {
249        total_usage,
250        session_count,
251        per_agent,
252        estimated_cost_usd,
253    })
254}
255
256/// Try to extract agent name from JSONL file by scanning for agent-related fields.
257fn extract_agent_name(path: &Path) -> String {
258    if let Ok(content) = std::fs::read_to_string(path) {
259        // Check first few lines for an agent name hint
260        for line in content.lines().take(20) {
261            if let Ok(value) = serde_json::from_str::<serde_json::Value>(line) {
262                // Claude Code session files sometimes have a "slug" or "agentId" field
263                if let Some(name) = value.get("agentId").and_then(|v| v.as_str()) {
264                    return name.to_string();
265                }
266                if let Some(slug) = value.get("slug").and_then(|v| v.as_str()) {
267                    return slug.to_string();
268                }
269            }
270        }
271    }
272    // Fall back to session ID from filename
273    path.file_stem()
274        .and_then(|s| s.to_str())
275        .unwrap_or("unknown")
276        .to_string()
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use tempfile::TempDir;
283
284    #[test]
285    fn encode_project_path_basic() {
286        let path = Path::new("/Users/alex/myproject");
287        assert_eq!(encode_project_path(path), "-Users-alex-myproject");
288    }
289
290    #[test]
291    fn encode_project_path_root() {
292        let path = Path::new("/");
293        assert_eq!(encode_project_path(path), "-");
294    }
295
296    #[test]
297    fn discover_sessions_empty_dir() {
298        let dir = TempDir::new().unwrap();
299        let sessions = discover_sessions_in(dir.path());
300        assert!(sessions.is_empty());
301    }
302
303    #[test]
304    fn discover_sessions_with_jsonl_files() {
305        let dir = TempDir::new().unwrap();
306        std::fs::write(dir.path().join("abc-123.jsonl"), "{}").unwrap();
307        std::fs::write(dir.path().join("def-456.jsonl"), "{}").unwrap();
308        std::fs::write(dir.path().join("not-session.json"), "{}").unwrap();
309
310        let sessions = discover_sessions_in(dir.path());
311        assert_eq!(sessions.len(), 2);
312        assert!(sessions.iter().any(|s| s.session_id == "abc-123"));
313        assert!(sessions.iter().any(|s| s.session_id == "def-456"));
314    }
315
316    #[test]
317    fn parse_session_file_basic() {
318        let dir = TempDir::new().unwrap();
319        let path = dir.path().join("session.jsonl");
320        std::fs::write(
321            &path,
322            r#"{"tool_name":"Read","tool_input":{"path":"src/main.rs"},"timestamp":"2025-01-01T00:00:00Z"}
323{"usage":{"input_tokens":1000,"output_tokens":500,"cache_read_input_tokens":200}}
324{"invalid json
325{"usage":{"input_tokens":2000,"output_tokens":300}}
326"#,
327        )
328        .unwrap();
329
330        let (tool_calls, token_usage) = parse_session_file(&path).unwrap();
331
332        assert_eq!(tool_calls.len(), 1);
333        assert_eq!(tool_calls[0].tool_name, "Read");
334
335        let usage = token_usage.unwrap();
336        assert_eq!(usage.input_tokens, 3000);
337        assert_eq!(usage.output_tokens, 800);
338        assert_eq!(usage.cache_read_tokens, Some(200));
339    }
340
341    #[test]
342    fn aggregate_cost_multiple_sessions() {
343        let dir = TempDir::new().unwrap();
344
345        let path1 = dir.path().join("session1.jsonl");
346        std::fs::write(
347            &path1,
348            r#"{"usage":{"input_tokens":1000,"output_tokens":500}}
349"#,
350        )
351        .unwrap();
352
353        let path2 = dir.path().join("session2.jsonl");
354        std::fs::write(
355            &path2,
356            r#"{"usage":{"input_tokens":2000,"output_tokens":300}}
357"#,
358        )
359        .unwrap();
360
361        let sessions = vec![
362            SessionFile {
363                path: path1,
364                session_id: "s1".into(),
365                modified: None,
366                size: 0,
367            },
368            SessionFile {
369                path: path2,
370                session_id: "s2".into(),
371                modified: None,
372                size: 0,
373            },
374        ];
375
376        let cost = aggregate_cost(&sessions, Some("test-agent")).unwrap();
377        assert_eq!(cost.session_count, 2);
378        assert_eq!(cost.total_usage.input_tokens, 3000);
379        assert_eq!(cost.total_usage.output_tokens, 800);
380        assert_eq!(cost.per_agent.len(), 1);
381        assert_eq!(cost.per_agent[0].agent_name, "test-agent");
382        assert!(cost.estimated_cost_usd > 0.0);
383    }
384
385    #[test]
386    fn aggregate_cost_empty_sessions() {
387        let cost = aggregate_cost(&[], None).unwrap();
388        assert_eq!(cost.session_count, 0);
389        assert_eq!(cost.total_usage.input_tokens, 0);
390        assert_eq!(cost.estimated_cost_usd, 0.0);
391    }
392}