use anyhow::{Context, Result};
use serde_json::Value;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use crate::ingest::UsageRow;
use crate::paths;
pub fn sessions_dir() -> Result<PathBuf> {
if let Ok(home) = std::env::var("CODEX_HOME") {
return Ok(PathBuf::from(home).join("sessions"));
}
Ok(paths::home()?.join(".codex").join("sessions"))
}
pub fn enumerate_files() -> Result<Vec<PathBuf>> {
let root = sessions_dir()?;
if !root.exists() {
return Ok(Vec::new());
}
let out = WalkDir::new(&root)
.into_iter()
.filter_map(std::result::Result::ok)
.filter(|e| e.file_type().is_file())
.filter_map(|e| {
let p = e.path();
let is_jsonl = p.extension().and_then(|s| s.to_str()) == Some("jsonl");
let is_rollout = p
.file_name()
.and_then(|s| s.to_str())
.is_some_and(|n| n.starts_with("rollout-"));
if is_jsonl && is_rollout {
Some(p.to_path_buf())
} else {
None
}
})
.collect();
Ok(out)
}
pub fn parse_rollout(path: &Path, consume_to: Option<usize>) -> Result<Vec<UsageRow>> {
let bytes = std::fs::read(path).with_context(|| format!("reading {}", path.display()))?;
let limit = consume_to.unwrap_or(bytes.len()).min(bytes.len());
let safe = match bytes[..limit].iter().rposition(|b| *b == b'\n') {
Some(i) => &bytes[..=i],
None => &bytes[..0],
};
let mut session_id = String::new();
let mut cwd: Option<String> = None;
let mut current_model = "unknown".to_string();
let mut rows = Vec::new();
let mut event_idx: u64 = 0;
let transcript_path = path.to_string_lossy().to_string();
for line in safe.split(|b| *b == b'\n') {
if line.is_empty() {
continue;
}
let Ok(line_str) = std::str::from_utf8(line) else {
continue;
};
let v: Value = match serde_json::from_str(line_str) {
Ok(v) => v,
Err(_) => continue,
};
let typ = v.get("type").and_then(|x| x.as_str()).unwrap_or("");
let Some(payload) = v.get("payload") else {
continue;
};
match typ {
"session_meta" => {
if let Some(id) = payload.get("id").and_then(|x| x.as_str()) {
session_id = id.to_string();
}
if let Some(c) = payload.get("cwd").and_then(|x| x.as_str()) {
cwd = Some(c.to_string());
}
}
"turn_context" => {
if let Some(m) = payload.get("model").and_then(|x| x.as_str()) {
current_model = m.to_string();
}
if let Some(c) = payload.get("cwd").and_then(|x| x.as_str()) {
cwd = Some(c.to_string());
}
}
"event_msg" => {
if payload.get("type").and_then(|x| x.as_str()) != Some("token_count") {
continue;
}
let info = payload.get("info");
let info = match info {
Some(i) if !i.is_null() => i,
_ => continue,
};
let last = info.get("last_token_usage");
let last = match last {
Some(l) if !l.is_null() => l,
_ => continue,
};
let in_total = u(last, "input_tokens");
let cached = u(last, "cached_input_tokens");
let output = u(last, "output_tokens");
let reasoning = u(last, "reasoning_output_tokens");
if in_total == 0 && output == 0 {
continue;
}
let timestamp = v
.get("timestamp")
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
if timestamp.is_empty() || session_id.is_empty() {
continue;
}
let project_path = cwd.clone().unwrap_or_default();
let message_id = format!("codex:{session_id}:{event_idx:08}");
event_idx += 1;
rows.push(UsageRow {
message_id,
source: "codex".into(),
uuid: session_id.clone(),
session_id: session_id.clone(),
project_path,
cwd: cwd.clone(),
transcript_path: transcript_path.clone(),
timestamp,
model: current_model.clone(),
is_sidechain: false,
input_tokens: in_total.saturating_sub(cached),
output_tokens: output,
cache_creation_5m: 0,
cache_creation_1h: 0,
cache_read_tokens: cached,
reasoning_tokens: reasoning,
});
}
_ => {}
}
}
Ok(rows)
}
fn u(v: &Value, k: &str) -> u64 {
v.get(k).and_then(serde_json::Value::as_u64).unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_token_count_with_split_input() {
let tmp = std::env::temp_dir().join("tokr-codex-test.jsonl");
std::fs::write(
&tmp,
r#"{"timestamp":"2026-04-16T23:47:22.210Z","type":"session_meta","payload":{"id":"019d-test","cwd":"/work/foo"}}
{"timestamp":"2026-04-16T23:47:22.212Z","type":"turn_context","payload":{"model":"gpt-5.4"}}
{"timestamp":"2026-04-16T23:47:32.847Z","type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":45097,"cached_input_tokens":5504,"output_tokens":582,"reasoning_output_tokens":305,"total_tokens":45679},"last_token_usage":{"input_tokens":45097,"cached_input_tokens":5504,"output_tokens":582,"reasoning_output_tokens":305,"total_tokens":45679}}}}
"#,
).unwrap();
let rows = parse_rollout(&tmp, None).unwrap();
assert_eq!(rows.len(), 1);
let r = &rows[0];
assert_eq!(r.source, "codex");
assert_eq!(r.model, "gpt-5.4");
assert_eq!(r.session_id, "019d-test");
assert_eq!(r.input_tokens, 45097 - 5504);
assert_eq!(r.cache_read_tokens, 5504);
assert_eq!(r.output_tokens, 582);
assert_eq!(r.reasoning_tokens, 305);
assert_eq!(r.cache_creation_5m, 0);
assert_eq!(r.message_id, "codex:019d-test:00000000");
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn ordinals_are_unique_even_with_duplicate_timestamps() {
let tmp = std::env::temp_dir().join("tokr-codex-test-dup-ts.jsonl");
std::fs::write(
&tmp,
r#"{"timestamp":"2026-04-16T23:47:22.210Z","type":"session_meta","payload":{"id":"sess-dup","cwd":"/w"}}
{"timestamp":"2026-04-16T23:47:22.212Z","type":"turn_context","payload":{"model":"gpt-5.4"}}
{"timestamp":"2026-04-16T23:47:32.847Z","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":100,"cached_input_tokens":0,"output_tokens":10,"reasoning_output_tokens":0,"total_tokens":110}}}}
{"timestamp":"2026-04-16T23:47:32.847Z","type":"event_msg","payload":{"type":"token_count","info":{"last_token_usage":{"input_tokens":200,"cached_input_tokens":0,"output_tokens":20,"reasoning_output_tokens":0,"total_tokens":220}}}}
"#,
).unwrap();
let rows = parse_rollout(&tmp, None).unwrap();
assert_eq!(rows.len(), 2);
assert_ne!(rows[0].message_id, rows[1].message_id);
assert_eq!(rows[0].message_id, "codex:sess-dup:00000000");
assert_eq!(rows[1].message_id, "codex:sess-dup:00000001");
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn skips_init_events_with_null_info() {
let tmp = std::env::temp_dir().join("tokr-codex-test-null.jsonl");
std::fs::write(
&tmp,
r#"{"timestamp":"2026-04-16T23:47:22.210Z","type":"session_meta","payload":{"id":"x","cwd":"/"}}
{"timestamp":"2026-04-16T23:47:22.212Z","type":"turn_context","payload":{"model":"gpt-5"}}
{"timestamp":"2026-04-16T23:47:22.447Z","type":"event_msg","payload":{"type":"token_count","info":null}}
"#,
).unwrap();
let rows = parse_rollout(&tmp, None).unwrap();
assert_eq!(rows.len(), 0);
let _ = std::fs::remove_file(&tmp);
}
}