Skip to main content

cortex_rs_stats/
log_scraper.rs

1use anyhow::Result;
2use chrono::{Duration, Utc};
3use serde_json::Value;
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
8pub struct TokenCounts {
9    pub input: u64,
10    pub output: u64,
11    pub cache_creation_input: u64,
12    pub cache_read_input: u64,
13}
14
15/// Scan ~/.claude/ JSONL session files for usage data within `days`.
16/// Returns map of model → TokenCounts (input/output/cache_creation/cache_read).
17pub fn scrape_claude_logs(days: u32) -> Result<HashMap<String, TokenCounts>> {
18    let claude_dir = claude_dir();
19    if !claude_dir.exists() {
20        return Ok(HashMap::new());
21    }
22
23    let cutoff = Utc::now() - Duration::days(days as i64);
24    let cutoff_ts = cutoff.timestamp();
25    let mut totals: HashMap<String, TokenCounts> = HashMap::new();
26
27    scan_dir(&claude_dir, cutoff_ts, &mut totals)?;
28
29    Ok(totals)
30}
31
32fn scan_dir(
33    dir: &std::path::Path,
34    cutoff_ts: i64,
35    totals: &mut HashMap<String, TokenCounts>,
36) -> Result<()> {
37    let entries = match std::fs::read_dir(dir) {
38        Ok(e) => e,
39        Err(_) => return Ok(()),
40    };
41
42    for entry in entries.flatten() {
43        let path = entry.path();
44        if path.is_dir() {
45            scan_dir(&path, cutoff_ts, totals)?;
46        } else if path.extension().map(|e| e == "jsonl").unwrap_or(false) {
47            // File-level short circuit: if the whole file hasn't been touched
48            // since the cutoff, no line in it can be in-window.
49            if let Ok(meta) = entry.metadata() {
50                if let Ok(modified) = meta.modified() {
51                    if let Ok(elapsed) = modified.duration_since(std::time::UNIX_EPOCH) {
52                        if (elapsed.as_secs() as i64) < cutoff_ts {
53                            continue;
54                        }
55                    }
56                }
57            }
58            parse_jsonl(&path, cutoff_ts, totals)?;
59        }
60    }
61    Ok(())
62}
63
64fn parse_jsonl(
65    path: &std::path::Path,
66    cutoff_ts: i64,
67    totals: &mut HashMap<String, TokenCounts>,
68) -> Result<()> {
69    let content = match std::fs::read_to_string(path) {
70        Ok(c) => c,
71        Err(_) => return Ok(()),
72    };
73
74    for line in content.lines() {
75        let line = line.trim();
76        if line.is_empty() {
77            continue;
78        }
79        let Ok(v): Result<Value, _> = serde_json::from_str(line) else {
80            continue;
81        };
82
83        // Real Claude logs use two timestamp formats — int milliseconds in
84        // history.jsonl, and ISO 8601 strings in session JSONLs. Parse both.
85        // If neither parses, skip the entry conservatively (don't count untimestamped rows).
86        let Some(ts) = parse_timestamp(v.get("timestamp")) else {
87            continue;
88        };
89        if ts < cutoff_ts {
90            continue;
91        }
92
93        extract_usage(&v, totals);
94    }
95
96    Ok(())
97}
98
99/// Accepts: int seconds, int milliseconds, ISO 8601 string. Returns unix seconds.
100fn parse_timestamp(v: Option<&Value>) -> Option<i64> {
101    let v = v?;
102    if let Some(i) = v.as_i64() {
103        // Disambiguate ms vs s by magnitude. Anything > 1e12 is ms (year 33658 in s).
104        return Some(if i > 1_000_000_000_000 { i / 1000 } else { i });
105    }
106    if let Some(s) = v.as_str() {
107        return chrono::DateTime::parse_from_rfc3339(s)
108            .ok()
109            .map(|dt| dt.timestamp());
110    }
111    None
112}
113
114fn extract_usage(v: &Value, totals: &mut HashMap<String, TokenCounts>) {
115    if let (Some(model), Some(usage)) = (
116        v.get("model").and_then(|m| m.as_str()),
117        v.get("usage"),
118    ) {
119        let input = usage.get("input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
120        let output = usage.get("output_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
121        let cache_creation = usage
122            .get("cache_creation_input_tokens")
123            .and_then(|t| t.as_u64())
124            .unwrap_or(0);
125        let cache_read = usage
126            .get("cache_read_input_tokens")
127            .and_then(|t| t.as_u64())
128            .unwrap_or(0);
129
130        if input + output + cache_creation + cache_read > 0 {
131            let entry = totals.entry(model.to_string()).or_default();
132            entry.input += input;
133            entry.output += output;
134            entry.cache_creation_input += cache_creation;
135            entry.cache_read_input += cache_read;
136            return;
137        }
138    }
139
140    for key in &["message", "result", "response"] {
141        if let Some(nested) = v.get(key) {
142            extract_usage(nested, totals);
143        }
144    }
145}
146
147fn claude_dir() -> PathBuf {
148    let home = std::env::var("HOME").unwrap_or_default();
149    PathBuf::from(home).join(".claude")
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use std::io::Write;
156
157    #[test]
158    fn parses_usage_from_jsonl_line() {
159        let mut totals = HashMap::new();
160        let line = r#"{"timestamp": 9999999999, "model": "claude-sonnet-4-6", "usage": {"input_tokens": 1000, "output_tokens": 500}}"#;
161        let v: Value = serde_json::from_str(line).unwrap();
162        extract_usage(&v, &mut totals);
163        let counts = totals.get("claude-sonnet-4-6").copied().unwrap();
164        assert_eq!(counts.input, 1000);
165        assert_eq!(counts.output, 500);
166        assert_eq!(counts.cache_creation_input, 0);
167        assert_eq!(counts.cache_read_input, 0);
168    }
169
170    #[test]
171    fn parses_cache_tokens() {
172        let mut totals = HashMap::new();
173        let line = r#"{"timestamp": 9999999999, "model": "claude-opus-4-7", "usage": {"input_tokens": 6, "output_tokens": 170, "cache_creation_input_tokens": 44102, "cache_read_input_tokens": 0}}"#;
174        let v: Value = serde_json::from_str(line).unwrap();
175        extract_usage(&v, &mut totals);
176        let c = totals.get("claude-opus-4-7").copied().unwrap();
177        assert_eq!(c.input, 6);
178        assert_eq!(c.output, 170);
179        assert_eq!(c.cache_creation_input, 44102);
180    }
181
182    #[test]
183    fn skips_old_entries() {
184        let dir = tempfile::tempdir().unwrap();
185        let path = dir.path().join("session.jsonl");
186        let mut f = std::fs::File::create(&path).unwrap();
187        // timestamp 0 = very old
188        writeln!(f, r#"{{"timestamp": 0, "model": "claude-opus-4-7", "usage": {{"input_tokens": 9999, "output_tokens": 9999}}}}"#).unwrap();
189
190        let mut totals = HashMap::new();
191        parse_jsonl(&path, chrono::Utc::now().timestamp() - 100, &mut totals).unwrap();
192        assert!(totals.is_empty(), "old entry should be skipped");
193    }
194
195    #[test]
196    fn parses_iso_timestamp() {
197        let ts = parse_timestamp(Some(&serde_json::json!("2026-05-13T14:25:07.145Z")));
198        assert!(ts.is_some());
199        // 2026-05-13T14:25:07Z = 1778682307 (computed via python)
200        assert_eq!(ts.unwrap(), 1778682307);
201    }
202
203    #[test]
204    fn parses_int_milliseconds() {
205        let ts = parse_timestamp(Some(&serde_json::json!(1778386184520_i64)));
206        assert_eq!(ts.unwrap(), 1778386184);
207    }
208
209    #[test]
210    fn parses_int_seconds() {
211        let ts = parse_timestamp(Some(&serde_json::json!(1778386184_i64)));
212        assert_eq!(ts.unwrap(), 1778386184);
213    }
214
215    #[test]
216    fn untimestamped_entries_excluded() {
217        let dir = tempfile::tempdir().unwrap();
218        let path = dir.path().join("session.jsonl");
219        let mut f = std::fs::File::create(&path).unwrap();
220        // No timestamp field — must NOT be counted
221        writeln!(f, r#"{{"model": "claude-opus-4-7", "usage": {{"input_tokens": 9999, "output_tokens": 9999}}}}"#).unwrap();
222
223        let mut totals = HashMap::new();
224        parse_jsonl(&path, 0, &mut totals).unwrap();
225        assert!(totals.is_empty(), "untimestamped row leaked through cutoff filter");
226    }
227
228    #[test]
229    fn iso_timestamp_in_window_is_counted() {
230        let dir = tempfile::tempdir().unwrap();
231        let path = dir.path().join("session.jsonl");
232        let mut f = std::fs::File::create(&path).unwrap();
233        let recent = chrono::Utc::now() - chrono::Duration::hours(1);
234        let iso = recent.to_rfc3339();
235        writeln!(
236            f,
237            r#"{{"timestamp": "{}", "model": "claude-sonnet-4-6", "usage": {{"input_tokens": 100, "output_tokens": 50}}}}"#,
238            iso
239        ).unwrap();
240
241        let mut totals = HashMap::new();
242        let cutoff = (chrono::Utc::now() - chrono::Duration::days(7)).timestamp();
243        parse_jsonl(&path, cutoff, &mut totals).unwrap();
244        let c = totals.get("claude-sonnet-4-6").copied().unwrap();
245        assert_eq!(c.input, 100);
246        assert_eq!(c.output, 50);
247    }
248}