use crate::collect::model_from_json;
use crate::core::cost::estimate_tail_event_cost_usd_e6;
use crate::core::event::{Event, EventKind, EventSource, SessionRecord, SessionStatus};
use anyhow::{Context, Result};
use serde_json::Value;
use std::path::Path;
pub fn parse_cursor_line(
session_id: &str,
seq: u64,
base_ts: u64,
line: &str,
) -> Result<Option<Event>> {
let v: Value = serde_json::from_str(line.trim()).context("cursor transcript: invalid JSON")?;
let obj = match v.as_object() {
Some(o) => o,
None => return Ok(None),
};
let content = obj
.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_array());
let content = match content {
Some(c) => c,
None => return Ok(None),
};
let ts_ms = line_ts_ms(obj).unwrap_or(base_ts + seq * 100);
let ts_exact = line_ts_ms(obj).is_some();
let (tokens_in, tokens_out, reasoning_tokens) = line_usage_tokens(obj);
let line_model = model_from_json::from_object(obj);
let cost_usd_e6 = estimate_tail_event_cost_usd_e6(
line_model.as_deref(),
tokens_in,
tokens_out,
reasoning_tokens,
);
for block in content {
let block_type = block.get("type").and_then(|t| t.as_str()).unwrap_or("");
match block_type {
"tool_use" => {
let tool_name = block
.get("name")
.and_then(|n| n.as_str())
.unwrap_or("")
.to_string();
if tool_name == "TodoWrite" {
return Ok(Some(todo_write_lifecycle(
session_id,
seq,
ts_ms,
ts_exact,
(tokens_in, tokens_out, reasoning_tokens, cost_usd_e6),
block,
)));
}
return Ok(Some(Event {
session_id: session_id.to_string(),
seq,
ts_ms,
ts_exact,
kind: EventKind::ToolCall,
source: EventSource::Tail,
tool: Some(tool_name),
tool_call_id: block
.get("id")
.and_then(|v| v.as_str())
.map(ToOwned::to_owned),
tokens_in,
tokens_out,
reasoning_tokens,
cost_usd_e6,
stop_reason: None,
latency_ms: None,
ttft_ms: None,
retry_count: None,
context_used_tokens: None,
context_max_tokens: None,
cache_creation_tokens: None,
cache_read_tokens: None,
system_prompt_tokens: None,
payload: block.clone(),
}));
}
"tool_result" => {
return Ok(Some(Event {
session_id: session_id.to_string(),
seq,
ts_ms,
ts_exact,
kind: EventKind::ToolResult,
source: EventSource::Tail,
tool: None,
tool_call_id: block
.get("tool_use_id")
.and_then(|v| v.as_str())
.map(ToOwned::to_owned),
tokens_in: None,
tokens_out: None,
reasoning_tokens: None,
cost_usd_e6: None,
stop_reason: None,
latency_ms: None,
ttft_ms: None,
retry_count: None,
context_used_tokens: None,
context_max_tokens: None,
cache_creation_tokens: None,
cache_read_tokens: None,
system_prompt_tokens: None,
payload: block.clone(),
}));
}
_ => {}
}
}
Ok(None)
}
fn todo_counts(input: &Value) -> (u32, u32, u32) {
let Some(arr) = input.get("todos").and_then(|t| t.as_array()) else {
return (0, 0, 0);
};
let mut comp = 0u32;
let mut canc = 0u32;
for t in arr {
match t.get("status").and_then(|s| s.as_str()).unwrap_or("") {
"completed" => comp += 1,
"cancelled" => canc += 1,
_ => {}
}
}
(arr.len() as u32, comp, canc)
}
type CursorLineUsage = (Option<u32>, Option<u32>, Option<u32>, Option<i64>);
fn todo_write_lifecycle(
session_id: &str,
seq: u64,
ts_ms: u64,
ts_exact: bool,
usage: CursorLineUsage,
block: &Value,
) -> Event {
let (tokens_in, tokens_out, reasoning_tokens, cost_usd_e6) = usage;
let input = block.get("input").unwrap_or(block);
let (total, comp, canc) = todo_counts(input);
Event {
session_id: session_id.to_string(),
seq,
ts_ms,
ts_exact,
kind: EventKind::Lifecycle,
source: EventSource::Tail,
tool: Some("TodoWrite".into()),
tool_call_id: block
.get("id")
.and_then(|v| v.as_str())
.map(ToOwned::to_owned),
tokens_in,
tokens_out,
reasoning_tokens,
cost_usd_e6,
stop_reason: None,
latency_ms: None,
ttft_ms: None,
retry_count: None,
context_used_tokens: None,
context_max_tokens: None,
cache_creation_tokens: None,
cache_read_tokens: None,
system_prompt_tokens: None,
payload: serde_json::json!({
"type": "todo_write",
"todos_total": total,
"todos_completed": comp,
"todos_cancelled": canc,
}),
}
}
fn line_ts_ms(obj: &serde_json::Map<String, Value>) -> Option<u64> {
if let Some(t) = ["timestamp_ms", "ts_ms", "created_at_ms"]
.iter()
.find_map(|k| obj.get(*k).and_then(|v| v.as_u64()))
{
return Some(t);
}
if let Some(t) = obj.get("timestamp").and_then(|v| v.as_u64()) {
return Some(if t < 1_000_000_000_000 {
t.saturating_mul(1000)
} else {
t
});
}
None
}
fn line_usage_tokens(
obj: &serde_json::Map<String, Value>,
) -> (Option<u32>, Option<u32>, Option<u32>) {
let mut tokens_in = None;
let mut tokens_out = None;
let mut reasoning_tokens = None;
if let Some(t) = obj.get("tokens").and_then(|x| x.as_object()) {
tokens_in = t
.get("inputTokens")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
tokens_out = t
.get("outputTokens")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
reasoning_tokens = t
.get("reasoningTokens")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
}
if let Some(u) = obj.get("usage").and_then(|x| x.as_object()) {
tokens_in = tokens_in.or_else(|| {
u.get("prompt_tokens")
.or_else(|| u.get("input_tokens"))
.and_then(|v| v.as_u64())
.map(|v| v as u32)
});
tokens_out = tokens_out.or_else(|| {
u.get("completion_tokens")
.or_else(|| u.get("output_tokens"))
.and_then(|v| v.as_u64())
.map(|v| v as u32)
});
reasoning_tokens = reasoning_tokens.or_else(|| {
u.get("reasoning_tokens")
.and_then(|v| v.as_u64())
.map(|v| v as u32)
});
}
(tokens_in, tokens_out, reasoning_tokens)
}
fn file_mtime_ms(path: &Path) -> u64 {
path.metadata()
.ok()
.and_then(|m| m.modified().ok())
.map(|t| {
t.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
})
.unwrap_or(0)
}
fn scan_jsonl_in_dir(dir: &Path, session_id: &str) -> Result<(Vec<Event>, Option<String>)> {
let base_ts = super::dir_mtime_ms(dir);
let mut entries: Vec<_> = std::fs::read_dir(dir)
.with_context(|| format!("read dir: {}", dir.display()))?
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
entries.sort_by_key(|e| e.file_name());
let mut events = Vec::new();
let mut seq: u64 = 0;
let mut model: Option<String> = None;
for entry in entries {
let content = std::fs::read_to_string(entry.path())?;
for line in content.lines() {
if line.trim().is_empty() {
continue;
}
if let Some(m) = model_from_json::from_line(line) {
model = Some(m);
}
if let Some(ev) = parse_cursor_line(session_id, seq, base_ts, line)? {
events.push(ev);
seq += 1;
} else {
seq += 1;
}
}
}
Ok((events, model))
}
fn scan_jsonl_file(path: &Path, session_id: &str) -> Result<(Vec<Event>, Option<String>)> {
let base_ts = file_mtime_ms(path);
let content =
std::fs::read_to_string(path).with_context(|| format!("read file: {}", path.display()))?;
let mut events = Vec::new();
let mut seq: u64 = 0;
let mut model: Option<String> = None;
for line in content.lines() {
if line.trim().is_empty() {
continue;
}
if let Some(m) = model_from_json::from_line(line) {
model = Some(m);
}
if let Some(ev) = parse_cursor_line(session_id, seq, base_ts, line)? {
events.push(ev);
seq += 1;
} else {
seq += 1;
}
}
Ok((events, model))
}
fn cursor_workspace_for_session_dir(dir: &Path) -> String {
dir.parent()
.and_then(|p| p.parent())
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default()
}
pub fn scan_session_dir_all(dir: &Path) -> Result<Vec<(SessionRecord, Vec<Event>)>> {
let session_id = dir
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_string();
let workspace = cursor_workspace_for_session_dir(dir);
let (main_events, main_model) = scan_jsonl_in_dir(dir, &session_id)?;
let main_record = SessionRecord {
id: session_id.clone(),
agent: "cursor".to_string(),
model: main_model,
workspace: workspace.clone(),
started_at_ms: crate::collect::tail::dir_mtime_ms(dir),
ended_at_ms: None,
status: SessionStatus::Done,
trace_path: dir.to_string_lossy().to_string(),
start_commit: None,
end_commit: None,
branch: None,
dirty_start: None,
dirty_end: None,
repo_binding_source: None,
prompt_fingerprint: None,
parent_session_id: None,
agent_version: None,
os: None,
arch: None,
repo_file_count: None,
repo_total_loc: None,
};
let mut out = vec![(main_record, main_events)];
let subagents = dir.join("subagents");
if subagents.is_dir() {
let mut subs: Vec<_> = std::fs::read_dir(&subagents)
.with_context(|| format!("read dir: {}", subagents.display()))?
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
subs.sort_by_key(|e| e.file_name());
for entry in subs {
let path = entry.path();
let sub_id = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
if sub_id.is_empty() {
continue;
}
let (events, sub_model) = scan_jsonl_file(&path, &sub_id)?;
let record = SessionRecord {
id: sub_id.clone(),
agent: "cursor".to_string(),
model: sub_model,
workspace: workspace.clone(),
started_at_ms: file_mtime_ms(&path),
ended_at_ms: None,
status: SessionStatus::Done,
trace_path: path.to_string_lossy().to_string(),
start_commit: None,
end_commit: None,
branch: None,
dirty_start: None,
dirty_end: None,
repo_binding_source: None,
prompt_fingerprint: None,
parent_session_id: Some(session_id.clone()),
agent_version: None,
os: None,
arch: None,
repo_file_count: None,
repo_total_loc: None,
};
out.push((record, events));
}
}
Ok(out)
}
pub fn scan_session_dir(dir: &Path) -> Result<(SessionRecord, Vec<Event>)> {
let session_id = dir
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_string();
let workspace = cursor_workspace_for_session_dir(dir);
let (events, model) = scan_jsonl_in_dir(dir, &session_id)?;
let record = SessionRecord {
id: session_id.clone(),
agent: "cursor".to_string(),
model,
workspace,
started_at_ms: crate::collect::tail::dir_mtime_ms(dir),
ended_at_ms: None,
status: SessionStatus::Done,
trace_path: dir.to_string_lossy().to_string(),
start_commit: None,
end_commit: None,
branch: None,
dirty_start: None,
dirty_end: None,
repo_binding_source: None,
prompt_fingerprint: None,
parent_session_id: None,
agent_version: None,
os: None,
arch: None,
repo_file_count: None,
repo_total_loc: None,
};
Ok((record, events))
}
#[cfg(test)]
mod tests {
use super::*;
const TOOL_USE_LINE: &str = r#"{"role":"assistant","message":{"content":[{"type":"tool_use","id":"toolu_01","name":"read_file","input":{"path":"src/main.rs"}}]}}"#;
const TOOL_RESULT_LINE: &str = r#"{"role":"user","message":{"content":[{"type":"tool_result","tool_use_id":"toolu_01","content":[{"type":"text","text":"fn main() {}"}]}]}}"#;
const TEXT_ONLY_LINE: &str =
r#"{"role":"assistant","message":{"content":[{"type":"text","text":"hello"}]}}"#;
#[test]
fn parse_tool_use() {
let ev = parse_cursor_line("s1", 0, 0, TOOL_USE_LINE)
.unwrap()
.unwrap();
assert_eq!(ev.kind, EventKind::ToolCall);
assert_eq!(ev.tool.as_deref(), Some("read_file"));
assert_eq!(ev.tool_call_id.as_deref(), Some("toolu_01"));
assert_eq!(ev.session_id, "s1");
}
#[test]
fn parse_tool_result() {
let ev = parse_cursor_line("s1", 1, 0, TOOL_RESULT_LINE)
.unwrap()
.unwrap();
assert_eq!(ev.kind, EventKind::ToolResult);
assert_eq!(ev.seq, 1);
assert_eq!(ev.tool_call_id.as_deref(), Some("toolu_01"));
}
#[test]
fn text_only_returns_none() {
let result = parse_cursor_line("s1", 2, 0, TEXT_ONLY_LINE).unwrap();
assert!(result.is_none());
}
#[test]
fn ts_ms_synthesized() {
let ev = parse_cursor_line("s1", 3, 1000, TOOL_USE_LINE)
.unwrap()
.unwrap();
assert_eq!(ev.ts_ms, 1000 + 3 * 100);
}
#[test]
fn parse_tool_use_sets_cost_when_tokens_present() {
let p = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/cursor/01_tool_with_tokens.jsonl");
let line = std::fs::read_to_string(p).unwrap();
let ev = parse_cursor_line("s1", 0, 0, line.trim()).unwrap().unwrap();
assert_eq!(ev.tool.as_deref(), Some("Read"));
assert!(
ev.cost_usd_e6.is_some_and(|c| c > 0),
"expected estimated cost_usd_e6"
);
assert_eq!(ev.tokens_in, Some(100));
assert_eq!(ev.tokens_out, Some(50));
}
#[test]
fn scan_fixture_dir() {
let fixture_dir =
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cursor");
let (record, events) = scan_session_dir(&fixture_dir).unwrap();
assert_eq!(record.agent, "cursor");
assert_eq!(record.model.as_deref(), Some("cursor-model-with-usage"));
assert_eq!(record.status, SessionStatus::Done);
assert!(!events.is_empty(), "expected events from fixture files");
assert!(events.iter().any(|e| e.kind == EventKind::ToolCall));
assert!(events.iter().any(|e| e.kind == EventKind::ToolResult));
assert!(
events.iter().any(|e| e.cost_usd_e6.is_some_and(|c| c > 0)),
"expected at least one cost-bearing event from usage fixture"
);
}
#[test]
fn scan_session_dir_all_includes_subagents() {
let fixture_dir =
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cursor");
let sessions = scan_session_dir_all(&fixture_dir).unwrap();
assert!(
sessions.len() >= 2,
"expected main session + subagent fixture"
);
let sub_id = "a1b2c3d4-e5f6-7890-abcd-ef1234567890";
let sub = sessions
.iter()
.find(|(r, _)| r.id == sub_id)
.expect("subagent session");
assert_eq!(sub.0.agent, "cursor");
assert!(
sub.0.trace_path.ends_with(".jsonl"),
"subagent trace_path should be file path"
);
assert!(
sub.1.iter().any(|e| e.tool.as_deref() == Some("grep")),
"subagent tool call"
);
}
}