cortex_rs_stats/
log_scraper.rs1use anyhow::Result;
2use chrono::{Duration, Utc};
3use serde_json::Value;
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
8pub struct TokenCounts {
9 pub input: u64,
10 pub output: u64,
11 pub cache_creation_input: u64,
12 pub cache_read_input: u64,
13}
14
15pub fn scrape_claude_logs(days: u32) -> Result<HashMap<String, TokenCounts>> {
18 let claude_dir = claude_dir();
19 if !claude_dir.exists() {
20 return Ok(HashMap::new());
21 }
22
23 let cutoff = Utc::now() - Duration::days(days as i64);
24 let cutoff_ts = cutoff.timestamp();
25 let mut totals: HashMap<String, TokenCounts> = HashMap::new();
26
27 scan_dir(&claude_dir, cutoff_ts, &mut totals)?;
28
29 Ok(totals)
30}
31
32fn scan_dir(
33 dir: &std::path::Path,
34 cutoff_ts: i64,
35 totals: &mut HashMap<String, TokenCounts>,
36) -> Result<()> {
37 let entries = match std::fs::read_dir(dir) {
38 Ok(e) => e,
39 Err(_) => return Ok(()),
40 };
41
42 for entry in entries.flatten() {
43 let path = entry.path();
44 if path.is_dir() {
45 scan_dir(&path, cutoff_ts, totals)?;
46 } else if path.extension().map(|e| e == "jsonl").unwrap_or(false) {
47 if let Ok(meta) = entry.metadata() {
50 if let Ok(modified) = meta.modified() {
51 if let Ok(elapsed) = modified.duration_since(std::time::UNIX_EPOCH) {
52 if (elapsed.as_secs() as i64) < cutoff_ts {
53 continue;
54 }
55 }
56 }
57 }
58 parse_jsonl(&path, cutoff_ts, totals)?;
59 }
60 }
61 Ok(())
62}
63
64fn parse_jsonl(
65 path: &std::path::Path,
66 cutoff_ts: i64,
67 totals: &mut HashMap<String, TokenCounts>,
68) -> Result<()> {
69 let content = match std::fs::read_to_string(path) {
70 Ok(c) => c,
71 Err(_) => return Ok(()),
72 };
73
74 for line in content.lines() {
75 let line = line.trim();
76 if line.is_empty() {
77 continue;
78 }
79 let Ok(v): Result<Value, _> = serde_json::from_str(line) else {
80 continue;
81 };
82
83 let Some(ts) = parse_timestamp(v.get("timestamp")) else {
87 continue;
88 };
89 if ts < cutoff_ts {
90 continue;
91 }
92
93 extract_usage(&v, totals);
94 }
95
96 Ok(())
97}
98
99fn parse_timestamp(v: Option<&Value>) -> Option<i64> {
101 let v = v?;
102 if let Some(i) = v.as_i64() {
103 return Some(if i > 1_000_000_000_000 { i / 1000 } else { i });
105 }
106 if let Some(s) = v.as_str() {
107 return chrono::DateTime::parse_from_rfc3339(s)
108 .ok()
109 .map(|dt| dt.timestamp());
110 }
111 None
112}
113
114fn extract_usage(v: &Value, totals: &mut HashMap<String, TokenCounts>) {
115 if let (Some(model), Some(usage)) = (
116 v.get("model").and_then(|m| m.as_str()),
117 v.get("usage"),
118 ) {
119 let input = usage.get("input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
120 let output = usage.get("output_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
121 let cache_creation = usage
122 .get("cache_creation_input_tokens")
123 .and_then(|t| t.as_u64())
124 .unwrap_or(0);
125 let cache_read = usage
126 .get("cache_read_input_tokens")
127 .and_then(|t| t.as_u64())
128 .unwrap_or(0);
129
130 if input + output + cache_creation + cache_read > 0 {
131 let entry = totals.entry(model.to_string()).or_default();
132 entry.input += input;
133 entry.output += output;
134 entry.cache_creation_input += cache_creation;
135 entry.cache_read_input += cache_read;
136 return;
137 }
138 }
139
140 for key in &["message", "result", "response"] {
141 if let Some(nested) = v.get(key) {
142 extract_usage(nested, totals);
143 }
144 }
145}
146
147fn claude_dir() -> PathBuf {
148 let home = std::env::var("HOME").unwrap_or_default();
149 PathBuf::from(home).join(".claude")
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 use std::io::Write;
156
157 #[test]
158 fn parses_usage_from_jsonl_line() {
159 let mut totals = HashMap::new();
160 let line = r#"{"timestamp": 9999999999, "model": "claude-sonnet-4-6", "usage": {"input_tokens": 1000, "output_tokens": 500}}"#;
161 let v: Value = serde_json::from_str(line).unwrap();
162 extract_usage(&v, &mut totals);
163 let counts = totals.get("claude-sonnet-4-6").copied().unwrap();
164 assert_eq!(counts.input, 1000);
165 assert_eq!(counts.output, 500);
166 assert_eq!(counts.cache_creation_input, 0);
167 assert_eq!(counts.cache_read_input, 0);
168 }
169
170 #[test]
171 fn parses_cache_tokens() {
172 let mut totals = HashMap::new();
173 let line = r#"{"timestamp": 9999999999, "model": "claude-opus-4-7", "usage": {"input_tokens": 6, "output_tokens": 170, "cache_creation_input_tokens": 44102, "cache_read_input_tokens": 0}}"#;
174 let v: Value = serde_json::from_str(line).unwrap();
175 extract_usage(&v, &mut totals);
176 let c = totals.get("claude-opus-4-7").copied().unwrap();
177 assert_eq!(c.input, 6);
178 assert_eq!(c.output, 170);
179 assert_eq!(c.cache_creation_input, 44102);
180 }
181
182 #[test]
183 fn skips_old_entries() {
184 let dir = tempfile::tempdir().unwrap();
185 let path = dir.path().join("session.jsonl");
186 let mut f = std::fs::File::create(&path).unwrap();
187 writeln!(f, r#"{{"timestamp": 0, "model": "claude-opus-4-7", "usage": {{"input_tokens": 9999, "output_tokens": 9999}}}}"#).unwrap();
189
190 let mut totals = HashMap::new();
191 parse_jsonl(&path, chrono::Utc::now().timestamp() - 100, &mut totals).unwrap();
192 assert!(totals.is_empty(), "old entry should be skipped");
193 }
194
195 #[test]
196 fn parses_iso_timestamp() {
197 let ts = parse_timestamp(Some(&serde_json::json!("2026-05-13T14:25:07.145Z")));
198 assert!(ts.is_some());
199 assert_eq!(ts.unwrap(), 1778682307);
201 }
202
203 #[test]
204 fn parses_int_milliseconds() {
205 let ts = parse_timestamp(Some(&serde_json::json!(1778386184520_i64)));
206 assert_eq!(ts.unwrap(), 1778386184);
207 }
208
209 #[test]
210 fn parses_int_seconds() {
211 let ts = parse_timestamp(Some(&serde_json::json!(1778386184_i64)));
212 assert_eq!(ts.unwrap(), 1778386184);
213 }
214
215 #[test]
216 fn untimestamped_entries_excluded() {
217 let dir = tempfile::tempdir().unwrap();
218 let path = dir.path().join("session.jsonl");
219 let mut f = std::fs::File::create(&path).unwrap();
220 writeln!(f, r#"{{"model": "claude-opus-4-7", "usage": {{"input_tokens": 9999, "output_tokens": 9999}}}}"#).unwrap();
222
223 let mut totals = HashMap::new();
224 parse_jsonl(&path, 0, &mut totals).unwrap();
225 assert!(totals.is_empty(), "untimestamped row leaked through cutoff filter");
226 }
227
228 #[test]
229 fn iso_timestamp_in_window_is_counted() {
230 let dir = tempfile::tempdir().unwrap();
231 let path = dir.path().join("session.jsonl");
232 let mut f = std::fs::File::create(&path).unwrap();
233 let recent = chrono::Utc::now() - chrono::Duration::hours(1);
234 let iso = recent.to_rfc3339();
235 writeln!(
236 f,
237 r#"{{"timestamp": "{}", "model": "claude-sonnet-4-6", "usage": {{"input_tokens": 100, "output_tokens": 50}}}}"#,
238 iso
239 ).unwrap();
240
241 let mut totals = HashMap::new();
242 let cutoff = (chrono::Utc::now() - chrono::Duration::days(7)).timestamp();
243 parse_jsonl(&path, cutoff, &mut totals).unwrap();
244 let c = totals.get("claude-sonnet-4-6").copied().unwrap();
245 assert_eq!(c.input, 100);
246 assert_eq!(c.output, 50);
247 }
248}