1use anyhow::{Context, Result};
4use chrono::{DateTime, NaiveDate, TimeZone, Utc};
5use serde_json::Value;
6use statsai_core::{
7 canonical_display, display_path, expand_home_path, hash_text, home_dir, path_hash,
8 project_bucket_key, semantic_event_id, summary_id, BillingPeriod, Confidence, EventSource,
9 IdentitySource, LatencySource, LocationOrigin, ModelInfo, ParseEvidence, PrivacyInfo,
10 PrivacyMode, ProjectInfo, RuntimeInfo, SessionInfo, SourceKind, SourceLocation,
11 SubscriptionStatus, SummaryMetadata, UsageCounts, UsageEvent, UsageSummary,
12 USAGE_EVENT_SCHEMA_VERSION, USAGE_SUMMARY_SCHEMA_VERSION,
13};
14use statsai_pricing::{estimate_cost, normalize_model_name};
15use std::collections::{HashMap, HashSet};
16use std::fs::File;
17use std::io::{BufRead, BufReader};
18use std::path::{Path, PathBuf};
19use std::time::UNIX_EPOCH;
20use walkdir::WalkDir;
21
22pub const CLAUDE_CODE_PROVIDER: &str = "claude_code";
23pub const CODEX_PROVIDER: &str = "codex";
24const SESSION_SCOPED_EVENT_KEY_VERSION: &str = "semantic_usage_event.v1";
25const PATH_INDEPENDENT_EVENT_KEY_VERSION: &str = "semantic_usage_event.v4";
26const SCAN_CACHE_SIGNATURE_VERSION: &str = "scan-cache.v1";
27const CODEX_SCAN_CACHE_PARSER_REVISION: &str = "turn-runtime-project-context.v8";
30const CLAUDE_SCAN_CACHE_PARSER_REVISION: &str = "project-context.v2";
31
32pub use statsai_core::{VerifiedSourceState, VerifiedSubscriptionState};
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35enum EventDeduplication {
36 SessionScoped,
37 PathIndependent,
38}
39
40#[derive(Debug, Clone)]
41pub struct ScanOptions {
42 pub device_id: String,
43 pub selected_cache_keys: Option<HashSet<String>>,
44}
45
46impl ScanOptions {
47 fn should_scan(&self, cache_key: &str) -> bool {
48 self.selected_cache_keys
49 .as_ref()
50 .is_none_or(|selected| selected.contains(cache_key))
51 }
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct ScanCandidateFile {
56 pub path: PathBuf,
57 pub cache_key: String,
58 pub cache_signature: String,
59}
60
61#[derive(Debug, Clone, Default)]
62pub struct ScanDiagnostics {
63 pub files_scanned: u64,
64 pub files_skipped_unchanged: u64,
65 pub raw_rows: u64,
66 pub candidate_usage_rows: u64,
67 pub accepted_events: u64,
68 pub duplicate_events: u64,
69 pub skipped_zero_events: u64,
70 pub invalid_rows: u64,
71 pub timestamp_fallbacks: u64,
72 pub model_fallbacks: u64,
73}
74
75#[derive(Debug, Clone, Default)]
76pub struct AdapterScan {
77 pub events: Vec<UsageEvent>,
78 pub summaries: Vec<UsageSummary>,
79 pub diagnostics: ScanDiagnostics,
80 pub verified_source_state: Option<VerifiedSourceState>,
81}
82
83pub trait ProviderAdapter {
84 fn id(&self) -> &'static str;
85 fn version(&self) -> &'static str;
86 fn provider(&self) -> &'static str;
87 fn discover(&self) -> Vec<SourceLocation>;
88 fn scan_candidates(&self, source: &SourceLocation) -> Result<Vec<ScanCandidateFile>>;
89 fn probe_verified_source_state(
90 &self,
91 _source: &SourceLocation,
92 ) -> Result<Option<VerifiedSourceState>> {
93 Ok(None)
94 }
95 fn scan(&self, source: &SourceLocation, options: &ScanOptions) -> Result<AdapterScan>;
96}
97
98#[derive(Debug, Default)]
99pub struct ClaudeCodeAdapter;
100
101impl ProviderAdapter for ClaudeCodeAdapter {
102 fn id(&self) -> &'static str {
103 "claude-code-local-jsonl"
104 }
105
106 fn version(&self) -> &'static str {
107 env!("CARGO_PKG_VERSION")
108 }
109
110 fn provider(&self) -> &'static str {
111 CLAUDE_CODE_PROVIDER
112 }
113
114 fn discover(&self) -> Vec<SourceLocation> {
115 let mut sources = Vec::new();
116 let mut seen = HashSet::new();
117 if let Ok(value) = std::env::var("CLAUDE_CONFIG_DIR") {
118 for root in split_paths(&value)
119 .into_iter()
120 .map(|path| normalize_claude_config_root(&path))
121 {
122 if root.join("projects").is_dir() && seen.insert(canonical_display(&root)) {
123 sources.push(claude_source_for_root(self, &root, LocationOrigin::Env));
124 }
125 }
126 return sources;
127 }
128
129 if let Some(home) = home_dir() {
130 let xdg = std::env::var_os("XDG_CONFIG_HOME")
131 .map(PathBuf::from)
132 .unwrap_or_else(|| home.join(".config"));
133 for root in [xdg.join("claude"), home.join(".claude")] {
134 if root.join("projects").is_dir() && seen.insert(canonical_display(&root)) {
135 sources.push(claude_source_for_root(self, &root, LocationOrigin::Default));
136 }
137 }
138 }
139
140 sources
141 }
142
143 fn scan_candidates(&self, source: &SourceLocation) -> Result<Vec<ScanCandidateFile>> {
144 claude_scan_candidates(source, self.version())
145 }
146
147 fn scan(&self, source: &SourceLocation, options: &ScanOptions) -> Result<AdapterScan> {
148 scan_claude_source(self, source, options)
149 }
150}
151
152#[derive(Debug, Default)]
153pub struct CodexAdapter;
154
155impl ProviderAdapter for CodexAdapter {
156 fn id(&self) -> &'static str {
157 "codex-local-jsonl"
158 }
159
160 fn version(&self) -> &'static str {
161 env!("CARGO_PKG_VERSION")
162 }
163
164 fn provider(&self) -> &'static str {
165 CODEX_PROVIDER
166 }
167
168 fn discover(&self) -> Vec<SourceLocation> {
169 let mut sources = Vec::new();
170 let mut seen = HashSet::new();
171 if let Ok(value) = std::env::var("CODEX_HOME") {
172 for root in split_paths(&value) {
173 if seen.insert(canonical_display(&root)) {
174 sources.push(codex_source_for_root(self, &root, LocationOrigin::Env));
175 }
176 }
177 return sources;
178 }
179
180 if let Some(home) = home_dir() {
181 let root = home.join(".codex");
182 if root.exists() {
183 sources.push(codex_source_for_root(self, &root, LocationOrigin::Default));
184 }
185 }
186
187 sources
188 }
189
190 fn scan_candidates(&self, source: &SourceLocation) -> Result<Vec<ScanCandidateFile>> {
191 codex_scan_candidates(source, self.version())
192 }
193
194 fn probe_verified_source_state(
195 &self,
196 source: &SourceLocation,
197 ) -> Result<Option<VerifiedSourceState>> {
198 let Some(root) = source_root_path(source) else {
199 return Ok(None);
200 };
201 let root = codex_source_root(&root);
202 Ok(codex_auth_snapshot(&root))
203 }
204
205 fn scan(&self, source: &SourceLocation, options: &ScanOptions) -> Result<AdapterScan> {
206 scan_codex_source(self, source, options)
207 }
208}
209
210pub fn adapter_for_provider(provider: &str) -> Option<Box<dyn ProviderAdapter>> {
211 match provider {
212 CLAUDE_CODE_PROVIDER | "claude" | "claude-code" => Some(Box::new(ClaudeCodeAdapter)),
213 CODEX_PROVIDER => Some(Box::new(CodexAdapter)),
214 _ => None,
215 }
216}
217
218pub fn default_adapters() -> Vec<Box<dyn ProviderAdapter>> {
219 vec![Box::new(ClaudeCodeAdapter), Box::new(CodexAdapter)]
220}
221
222fn codex_source_for_root(
223 adapter: &CodexAdapter,
224 root: &Path,
225 origin: LocationOrigin,
226) -> SourceLocation {
227 SourceLocation::local_adapter(
228 adapter.provider(),
229 adapter.id(),
230 adapter.version(),
231 root,
232 origin,
233 )
234}
235
236fn claude_source_for_root(
237 adapter: &ClaudeCodeAdapter,
238 root: &Path,
239 origin: LocationOrigin,
240) -> SourceLocation {
241 let root = normalize_claude_config_root(root);
242 SourceLocation::local_adapter(
243 adapter.provider(),
244 adapter.id(),
245 adapter.version(),
246 &root,
247 origin,
248 )
249}
250
251fn source_root_path(source: &SourceLocation) -> Option<PathBuf> {
252 source.path_label.as_deref().map(PathBuf::from)
253}
254
255fn normalize_claude_config_root(root: &Path) -> PathBuf {
256 if root.file_name().is_some_and(|name| name == "projects") {
257 return root
258 .parent()
259 .map(Path::to_path_buf)
260 .unwrap_or_else(|| root.to_path_buf());
261 }
262 root.to_path_buf()
263}
264
265fn split_paths(value: &str) -> Vec<PathBuf> {
266 value
267 .split(',')
268 .map(str::trim)
269 .filter(|part| !part.is_empty())
270 .map(expand_home_path)
271 .collect()
272}
273
274fn scan_claude_source(
275 adapter: &ClaudeCodeAdapter,
276 source: &SourceLocation,
277 options: &ScanOptions,
278) -> Result<AdapterScan> {
279 let mut scan = AdapterScan::default();
280 let Some(path_label) = source
281 .path_label
282 .as_deref()
283 .filter(|label| !label.is_empty())
284 else {
285 return Ok(scan);
286 };
287 let root = normalize_claude_config_root(Path::new(path_label));
288 if !root.exists() {
289 return Ok(scan);
290 }
291
292 let projects = root.join("projects");
293 let session_projects = load_claude_session_projects(&projects);
294 let cache_namespace = scan_cache_namespace(source, adapter.version());
295 let event_files = claude_jsonl_candidates(&projects, &cache_namespace)?;
296 let mut seen = HashSet::new();
297 {
298 let mut ctx = FileParseContext {
299 adapter,
300 source,
301 options,
302 scan: &mut scan,
303 seen: &mut seen,
304 };
305 for candidate in event_files {
306 if !options.should_scan(&candidate.cache_key) {
307 ctx.scan.diagnostics.files_skipped_unchanged += 1;
308 continue;
309 }
310 ctx.scan.diagnostics.files_scanned += 1;
311 parse_claude_file(&mut ctx, &projects, &session_projects, &candidate.path)?;
312 }
313 }
314
315 if let Some(candidate) = claude_stats_cache_candidate(&root, &cache_namespace) {
316 if options.should_scan(&candidate.cache_key) {
317 scan.diagnostics.files_scanned += 1;
318 parse_claude_stats_cache(adapter, source, options, &candidate.path, &mut scan)?;
319 } else {
320 scan.diagnostics.files_skipped_unchanged += 1;
321 }
322 }
323 scan.diagnostics.accepted_events = scan.events.len() as u64;
324 Ok(scan)
325}
326
327fn scan_codex_source(
328 adapter: &CodexAdapter,
329 source: &SourceLocation,
330 options: &ScanOptions,
331) -> Result<AdapterScan> {
332 let mut scan = AdapterScan::default();
333 let Some(path_label) = source
334 .path_label
335 .as_deref()
336 .filter(|label| !label.is_empty())
337 else {
338 return Ok(scan);
339 };
340 let source_path = PathBuf::from(path_label);
341 let root = codex_source_root(&source_path);
342 let cache_namespace = scan_cache_namespace(source, adapter.version());
343 let mut seen = HashSet::new();
344 {
345 let mut ctx = FileParseContext {
346 adapter,
347 source,
348 options,
349 scan: &mut scan,
350 seen: &mut seen,
351 };
352 for candidate in codex_jsonl_candidates(source, &source_path, &cache_namespace)? {
353 if !options.should_scan(&candidate.cache_key) {
354 ctx.scan.diagnostics.files_skipped_unchanged += 1;
355 continue;
356 }
357 let usage_root = codex_usage_root_for_file(&root, &candidate.path);
358 ctx.scan.diagnostics.files_scanned += 1;
359 parse_codex_file(&mut ctx, &root, &usage_root, &candidate.path)?;
360 }
361 }
362 scan.verified_source_state = codex_auth_snapshot(&root);
363 scan.diagnostics.accepted_events = scan.events.len() as u64;
364 Ok(scan)
365}
366
367fn collect_jsonl_files(root: &Path) -> Result<Vec<PathBuf>> {
368 if !root.exists() {
369 return Ok(Vec::new());
370 }
371
372 let mut files = Vec::new();
373 for entry in WalkDir::new(root).follow_links(false) {
374 let entry = entry?;
375 if entry.file_type().is_file()
376 && entry.path().extension().and_then(|ext| ext.to_str()) == Some("jsonl")
377 {
378 files.push(entry.path().to_path_buf());
379 }
380 }
381 files.sort_by_cached_key(|path| path.to_string_lossy().into_owned());
382 Ok(files)
383}
384
385fn codex_source_root(path: &Path) -> PathBuf {
386 if path
387 .file_name()
388 .and_then(|name| name.to_str())
389 .is_some_and(|name| matches!(name, "sessions" | "archived_sessions"))
390 {
391 return path
392 .parent()
393 .map(Path::to_path_buf)
394 .unwrap_or_else(|| path.to_path_buf());
395 }
396 path.to_path_buf()
397}
398
399fn codex_usage_roots(path: &Path) -> Vec<PathBuf> {
400 if path
401 .file_name()
402 .and_then(|name| name.to_str())
403 .is_some_and(|name| matches!(name, "sessions" | "archived_sessions"))
404 {
405 return if path.is_dir() {
406 vec![path.to_path_buf()]
407 } else {
408 Vec::new()
409 };
410 }
411
412 ["sessions", "archived_sessions"]
413 .into_iter()
414 .map(|child| path.join(child))
415 .filter(|candidate| candidate.is_dir())
416 .collect()
417}
418
419fn claude_scan_candidates(
420 source: &SourceLocation,
421 adapter_version: &str,
422) -> Result<Vec<ScanCandidateFile>> {
423 let Some(path_label) = source
424 .path_label
425 .as_deref()
426 .filter(|label| !label.is_empty())
427 else {
428 return Ok(Vec::new());
429 };
430 let root = normalize_claude_config_root(Path::new(path_label));
431 if !root.exists() {
432 return Ok(Vec::new());
433 }
434 let cache_namespace = scan_cache_namespace(source, adapter_version);
435
436 let mut candidates = claude_jsonl_candidates(&root.join("projects"), &cache_namespace)?;
437 if let Some(candidate) = claude_stats_cache_candidate(&root, &cache_namespace) {
438 candidates.push(candidate);
439 }
440 Ok(candidates)
441}
442
443fn claude_jsonl_candidates(root: &Path, cache_namespace: &str) -> Result<Vec<ScanCandidateFile>> {
444 collect_jsonl_files(root)?
445 .into_iter()
446 .map(|path| {
447 let dependency = claude_session_index_dependency(root, &path);
448 Ok(scan_candidate(path, dependency.as_deref(), cache_namespace))
449 })
450 .collect()
451}
452
453fn claude_stats_cache_candidate(root: &Path, cache_namespace: &str) -> Option<ScanCandidateFile> {
454 let path = root.join("stats-cache.json");
455 path.is_file()
456 .then(|| scan_candidate(path, None, cache_namespace))
457}
458
459fn claude_session_index_dependency(root: &Path, path: &Path) -> Option<String> {
460 path.ancestors()
461 .take_while(|ancestor| ancestor.starts_with(root))
462 .skip(1)
463 .find_map(|ancestor| {
464 let session_index = ancestor.join("sessions-index.json");
465 session_index
466 .is_file()
467 .then(|| file_metadata_signature(&session_index))
468 })
469}
470
471fn codex_scan_candidates(
472 source: &SourceLocation,
473 adapter_version: &str,
474) -> Result<Vec<ScanCandidateFile>> {
475 let Some(path_label) = source
476 .path_label
477 .as_deref()
478 .filter(|label| !label.is_empty())
479 else {
480 return Ok(Vec::new());
481 };
482 let source_path = PathBuf::from(path_label);
483 let cache_namespace = scan_cache_namespace(source, adapter_version);
484 codex_jsonl_candidates(source, &source_path, &cache_namespace)
485}
486
487fn codex_jsonl_candidates(
488 _source: &SourceLocation,
489 path: &Path,
490 cache_namespace: &str,
491) -> Result<Vec<ScanCandidateFile>> {
492 let root = codex_source_root(path);
493 let roots = codex_usage_roots(path);
494 let auth_dependency = Some(file_metadata_signature(&root.join("auth.json")));
495 let dependency = auth_dependency.as_deref();
496 let mut candidates = Vec::new();
497 for usage_root in roots {
498 for candidate_path in collect_jsonl_files(&usage_root)? {
499 candidates.push(scan_candidate(candidate_path, dependency, cache_namespace));
500 }
501 }
502 Ok(candidates)
503}
504
505fn codex_usage_root_for_file(root: &Path, path: &Path) -> PathBuf {
506 for child in ["sessions", "archived_sessions"] {
507 let usage_root = root.join(child);
508 if path.starts_with(&usage_root) {
509 return usage_root;
510 }
511 }
512 root.to_path_buf()
513}
514
515fn scan_candidate(
516 path: PathBuf,
517 dependency_signature: Option<&str>,
518 cache_namespace: &str,
519) -> ScanCandidateFile {
520 let cache_key = canonical_display(&path);
521 let file_signature = file_metadata_signature(&path);
522 let cache_signature = dependency_signature
523 .map(|dependency| hash_text(&format!("{cache_namespace}:{file_signature}:{dependency}")))
524 .unwrap_or_else(|| hash_text(&format!("{cache_namespace}:{file_signature}")));
525 ScanCandidateFile {
526 path,
527 cache_key,
528 cache_signature,
529 }
530}
531
532fn scan_cache_namespace(source: &SourceLocation, adapter_version: &str) -> String {
533 let adapter_id = source.adapter_id.as_deref().unwrap_or("");
534 let path_hash = source.path_hash.as_deref().unwrap_or("");
535 let parser_revision = scan_cache_parser_revision(source);
536 hash_text(&format!(
537 "{SCAN_CACHE_SIGNATURE_VERSION}:{}:{:?}:{adapter_id}:{adapter_version}:{path_hash}:{parser_revision}",
538 source.provider, source.source_kind
539 ))
540}
541
542fn scan_cache_parser_revision(source: &SourceLocation) -> &'static str {
543 match source.provider.as_str() {
544 CODEX_PROVIDER => CODEX_SCAN_CACHE_PARSER_REVISION,
545 CLAUDE_CODE_PROVIDER => CLAUDE_SCAN_CACHE_PARSER_REVISION,
546 _ => "default",
547 }
548}
549
550fn file_metadata_signature(path: &Path) -> String {
551 let Ok(metadata) = std::fs::metadata(path) else {
552 return "missing".to_string();
553 };
554 let modified = metadata
555 .modified()
556 .ok()
557 .and_then(|value| value.duration_since(UNIX_EPOCH).ok());
558 let (seconds, nanos) = modified
559 .map(|value| (value.as_secs(), value.subsec_nanos()))
560 .unwrap_or((0, 0));
561 let created = metadata
562 .created()
563 .ok()
564 .and_then(|value| value.duration_since(UNIX_EPOCH).ok());
565 let (created_seconds, created_nanos) = created
566 .map(|value| (value.as_secs(), value.subsec_nanos()))
567 .unwrap_or((0, 0));
568 hash_text(&format!(
569 "meta.v2:{}:{}:{}:{}:{}",
570 metadata.len(),
571 seconds,
572 nanos,
573 created_seconds,
574 created_nanos
575 ))
576}
577
578struct FileParseContext<'a, A: ProviderAdapter + ?Sized> {
579 adapter: &'a A,
580 source: &'a SourceLocation,
581 options: &'a ScanOptions,
582 scan: &'a mut AdapterScan,
583 seen: &'a mut HashSet<String>,
584}
585
586#[derive(Debug, Clone, Default)]
587struct ClaudeSessionProjectMetadata {
588 project_path: Option<PathBuf>,
589 git_branch: Option<String>,
590}
591
592#[derive(Debug, Clone, Default)]
593struct ProjectContext {
594 project_label: Option<String>,
595 repo_remote_hash: Option<String>,
596 repo_label: Option<String>,
597 branch_hash: Option<String>,
598 branch_label: Option<String>,
599 path_hash: Option<String>,
600 path_label: Option<String>,
601}
602
603impl ProjectContext {
604 fn into_project_info(self) -> Option<ProjectInfo> {
605 let identity_key = if let Some(path_hash) = self.path_hash.as_deref() {
606 format!(
607 "path:{path_hash}:repo:{}",
608 self.repo_remote_hash.as_deref().unwrap_or("none")
609 )
610 } else if let Some(repo_remote_hash) = self.repo_remote_hash.as_deref() {
611 format!("repo:{repo_remote_hash}")
612 } else {
613 return None;
614 };
615
616 Some(ProjectInfo {
617 project_id: format!("project_{}", &hash_text(&identity_key)[..24]),
618 project_label: self.project_label,
619 repo_remote_hash: self.repo_remote_hash,
620 repo_label: self.repo_label,
621 branch_hash: self.branch_hash,
622 branch_label: self.branch_label,
623 path_hash: self.path_hash,
624 path_label: self.path_label,
625 })
626 }
627}
628
629fn parse_claude_file(
630 ctx: &mut FileParseContext<'_, ClaudeCodeAdapter>,
631 projects: &Path,
632 session_projects: &HashMap<String, ClaudeSessionProjectMetadata>,
633 path: &Path,
634) -> Result<()> {
635 let file = File::open(path).with_context(|| format!("read {}", path.display()))?;
636 let reader = BufReader::new(file);
637 let fallback_timestamp = file_modified_timestamp(path).unwrap_or_else(Utc::now);
638 let project = claude_project_context_for_file(session_projects, projects, path);
639
640 for (index, line) in reader.lines().enumerate() {
641 let line = line?;
642 if line.trim().is_empty() {
643 continue;
644 }
645 ctx.scan.diagnostics.raw_rows += 1;
646 let Ok(value) = serde_json::from_str::<Value>(&line) else {
647 ctx.scan.diagnostics.invalid_rows += 1;
648 continue;
649 };
650 let Some(usage_value) = value
651 .pointer("/message/usage")
652 .or_else(|| value.get("usage"))
653 else {
654 continue;
655 };
656 ctx.scan.diagnostics.candidate_usage_rows += 1;
657 let usage = claude_usage_counts_from_value(usage_value);
658 if usage.computed_total() == 0 {
659 ctx.scan.diagnostics.skipped_zero_events += 1;
660 continue;
661 }
662 let (timestamp, timestamp_inferred) = timestamp_from_nested_value(&value)
663 .map(|timestamp| (timestamp, false))
664 .unwrap_or((fallback_timestamp, true));
665 if timestamp_inferred {
666 ctx.scan.diagnostics.timestamp_fallbacks += 1;
667 }
668 let model = model_from_nested_value(&value, None);
669 let model_inferred = model.is_none();
670 if model_inferred {
671 ctx.scan.diagnostics.model_fallbacks += 1;
672 }
673 let session_raw = value
674 .get("sessionId")
675 .or_else(|| value.get("session_id"))
676 .and_then(Value::as_str)
677 .map(ToOwned::to_owned)
678 .unwrap_or_else(|| fallback_session_id(path));
679 let event = usage_event(
680 ctx.adapter,
681 ctx.source,
682 ctx.options,
683 ProviderEventParts {
684 timestamp,
685 session_started_at: None,
686 session_ended_at: None,
687 duration_seconds: None,
688 model,
689 usage,
690 runtime: None,
691 session_raw,
692 project: project.clone(),
693 event_kind: "claude_message_usage",
694 source_file: path,
695 line_number: index + 1,
696 model_inferred,
697 timestamp_inferred,
698 deduplication: EventDeduplication::SessionScoped,
699 },
700 );
701 push_deduped(ctx.scan, ctx.seen, event);
702 }
703
704 Ok(())
705}
706
707fn claude_project_context_for_file(
708 session_projects: &HashMap<String, ClaudeSessionProjectMetadata>,
709 projects_root: &Path,
710 path: &Path,
711) -> Option<ProjectInfo> {
712 claude_session_metadata_for_file(session_projects, path)
713 .and_then(|metadata| {
714 resolve_project_context(
715 metadata.project_path.clone(),
716 None,
717 metadata.git_branch.clone(),
718 )
719 })
720 .or_else(|| project_context_from_path_fallback(projects_root, path))
721}
722
723fn claude_session_metadata_for_file<'a>(
724 session_projects: &'a HashMap<String, ClaudeSessionProjectMetadata>,
725 path: &Path,
726) -> Option<&'a ClaudeSessionProjectMetadata> {
727 let canonical_path = canonical_display(path);
728 if let Some(metadata) = session_projects.get(&canonical_path) {
729 return Some(metadata);
730 }
731
732 path.ancestors()
733 .skip(1)
734 .find_map(|ancestor| session_projects.get(&canonical_display(ancestor)))
735}
736
737fn parse_claude_stats_cache(
738 adapter: &ClaudeCodeAdapter,
739 source: &SourceLocation,
740 options: &ScanOptions,
741 path: &Path,
742 scan: &mut AdapterScan,
743) -> Result<()> {
744 if !path.is_file() {
745 return Ok(());
746 }
747 let text = std::fs::read_to_string(path).with_context(|| format!("read {}", path.display()))?;
748 let value: Value =
749 serde_json::from_str(&text).with_context(|| format!("parse {}", path.display()))?;
750 let Some(model_usage) = value.get("modelUsage").and_then(Value::as_object) else {
751 scan.diagnostics.invalid_rows += 1;
752 return Ok(());
753 };
754
755 let period_start = value
756 .get("firstSessionDate")
757 .and_then(timestamp_from_scalar);
758 let period_end = value.get("lastComputedDate").and_then(stats_cache_date_end);
759 let observed_at = period_end
760 .or_else(|| file_modified_timestamp(path))
761 .unwrap_or_else(Utc::now);
762 let metadata = SummaryMetadata {
763 summary_format: "claude_stats_cache".to_string(),
764 summary_version: value
765 .get("version")
766 .and_then(value_as_u64)
767 .map(|value| value.to_string()),
768 total_sessions: value.get("totalSessions").and_then(value_as_u64),
769 total_messages: value.get("totalMessages").and_then(value_as_u64),
770 last_computed_at: period_end,
771 };
772 let file_path_hash = hash_text(&canonical_display(path));
773
774 for (model_name, usage_value) in model_usage {
775 scan.diagnostics.candidate_usage_rows += 1;
776 let usage = claude_usage_counts_from_value(usage_value);
777 if usage.computed_total() == 0 {
778 scan.diagnostics.skipped_zero_events += 1;
779 continue;
780 }
781 let model = model_info(model_name);
782 let semantic_key = format!(
783 "claude_stats_cache.v1:{}:{}:{}:{}:{}:{}:{}:{}",
784 model_name,
785 period_start
786 .map(|date| date.to_rfc3339())
787 .unwrap_or_else(|| "unknown_start".to_string()),
788 period_end
789 .map(|date| date.to_rfc3339())
790 .unwrap_or_else(|| "unknown_end".to_string()),
791 usage.input_tokens.unwrap_or(0),
792 usage.cache_read_tokens.unwrap_or(0),
793 usage.cache_creation_tokens.unwrap_or(0),
794 usage.output_tokens.unwrap_or(0),
795 usage.computed_total(),
796 );
797 let mut cost = estimate_cost(adapter.provider(), Some(&model), &usage);
798 if let Some(provider_cost) = usage_value
799 .get("costUSD")
800 .and_then(Value::as_f64)
801 .filter(|cost| *cost > 0.0)
802 {
803 cost.provider_reported_usd = Some((provider_cost * 100.0).round() as i64);
804 cost.pricing_source = Some("claude_stats_cache:costUSD".to_string());
805 cost.confidence = Confidence::Medium;
806 }
807 scan.summaries.push(UsageSummary {
808 schema_version: USAGE_SUMMARY_SCHEMA_VERSION.to_string(),
809 summary_id: summary_id(adapter.provider(), &source.source_id, &semantic_key),
810 device_id: options.device_id.clone(),
811 provider: adapter.provider().to_string(),
812 source_id: source.source_id.clone(),
813 provider_account_id: None,
814 source: EventSource {
815 adapter_id: adapter.id().to_string(),
816 adapter_version: adapter.version().to_string(),
817 source_kind: SourceKind::LocalSummary,
818 location_origin: Some(source.location_origin.clone()),
819 source_type: "stats-cache.json".to_string(),
820 source_path_hash: Some(file_path_hash.clone()),
821 source_record_id: Some(format!("summary_key_{}", &hash_text(&semantic_key)[..32])),
822 parse_confidence: Confidence::Medium,
823 },
824 model: Some(model),
825 models: Vec::new(),
826 usage,
827 cost,
828 parse_evidence: Some(ParseEvidence {
829 event_key_version: "claude_stats_cache_summary.v1".to_string(),
830 source_file_path_hash: Some(file_path_hash.clone()),
831 source_line_number: None,
832 source_record_id: Some(semantic_key),
833 model_inferred: false,
834 timestamp_inferred: period_start.is_none() || period_end.is_none(),
835 account_identity_source: IdentitySource::Unresolved,
836 }),
837 project: None,
838 privacy: metadata_only_privacy(),
839 metrics: None,
840 period_start,
841 period_end,
842 observed_at,
843 metadata: metadata.clone(),
844 imported_at: Utc::now(),
845 });
846 }
847
848 Ok(())
849}
850
851fn parse_codex_file(
852 ctx: &mut FileParseContext<'_, CodexAdapter>,
853 root: &Path,
854 usage_root: &Path,
855 path: &Path,
856) -> Result<()> {
857 let file = File::open(path).with_context(|| format!("read {}", path.display()))?;
858 let reader = BufReader::new(file);
859 let fallback_timestamp = file_modified_timestamp(path).unwrap_or_else(Utc::now);
860 let mut previous_totals: Option<UsageCounts> = None;
861 let mut current_model: Option<String> = None;
862 let mut current_model_is_fallback = false;
863 let mut current_project: Option<ProjectInfo> = None;
864 let session_raw = codex_session_id(usage_root, path);
865 let mut records = Vec::new();
866
867 for (index, line) in reader.lines().enumerate() {
868 let line = line?;
869 if line.trim().is_empty() {
870 continue;
871 }
872 ctx.scan.diagnostics.raw_rows += 1;
873 if !codex_line_could_have_usage_or_context(&line) {
874 continue;
875 }
876 let Ok(value) = serde_json::from_str::<Value>(&line) else {
877 ctx.scan.diagnostics.invalid_rows += 1;
878 continue;
879 };
880
881 if is_codex_session_meta(&value) {
882 current_project = codex_project_context_from_value(&value);
883 continue;
884 }
885
886 if is_codex_turn_context(&value) {
887 if let Some(model_name) = codex_model_from_value(&value, current_model.as_deref())
888 .and_then(|model| model.normalized_name)
889 {
890 current_model = Some(model_name);
891 current_model_is_fallback = false;
892 }
893 if let Some(project) = codex_project_context_from_value(&value) {
894 current_project = Some(project);
895 }
896 continue;
897 }
898
899 let is_token_count_event = is_codex_token_count(&value);
900 let is_task_started = is_codex_task_started(&value);
901 let is_task_complete = is_codex_task_complete(&value);
902 let message_role = codex_visible_message_role(&value).map(ToOwned::to_owned);
903 let event_session_raw =
904 session_raw_from_value(&value).unwrap_or_else(|| session_raw.clone());
905 let usage = if is_token_count_event {
906 let info = value.pointer("/payload/info");
907 let total_usage = info
908 .and_then(|info| info.get("total_token_usage"))
909 .map(codex_usage_counts_from_value);
910 let usage = info
911 .and_then(|info| info.get("last_token_usage"))
912 .map(codex_usage_counts_from_value)
913 .or_else(|| {
914 total_usage
915 .as_ref()
916 .map(|total| subtract_usage_counts(total, previous_totals.as_ref()))
917 });
918 if let Some(total) = total_usage {
919 previous_totals = Some(total);
920 }
921 usage
922 } else {
923 codex_headless_usage_value(&value).map(codex_usage_counts_from_value)
924 };
925
926 let (timestamp, timestamp_inferred) = timestamp_from_nested_value(&value)
927 .map(|timestamp| (timestamp, false))
928 .unwrap_or((fallback_timestamp, true));
929 if timestamp_inferred {
930 ctx.scan.diagnostics.timestamp_fallbacks += 1;
931 }
932
933 let explicit_model = codex_model_from_value(&value, None);
934 if let Some(model_name) = explicit_model
935 .as_ref()
936 .and_then(|model| {
937 model
938 .provider_model_id
939 .as_ref()
940 .or(model.name.as_ref())
941 .or(model.normalized_name.as_ref())
942 })
943 .cloned()
944 {
945 current_model = Some(model_name);
946 current_model_is_fallback = false;
947 }
948 let model_explicit = explicit_model.is_some();
949 let mut model_inferred = false;
950 let model = explicit_model.or_else(|| {
951 current_model.as_deref().map(model_info).or_else(|| {
952 model_inferred = true;
953 current_model_is_fallback = true;
954 Some(model_info("gpt-5"))
955 })
956 });
957 if current_model_is_fallback && !model_inferred {
958 model_inferred = true;
959 }
960 if model_inferred {
961 ctx.scan.diagnostics.model_fallbacks += 1;
962 }
963
964 let usage = usage.and_then(|usage| {
965 ctx.scan.diagnostics.candidate_usage_rows += 1;
966 if usage.computed_total() == 0 {
967 ctx.scan.diagnostics.skipped_zero_events += 1;
968 None
969 } else {
970 Some(usage)
971 }
972 });
973
974 records.push(CodexLineRecord {
975 line_number: index + 1,
976 value,
977 timestamp,
978 timestamp_inferred,
979 session_raw: event_session_raw,
980 model,
981 model_inferred,
982 model_explicit,
983 usage,
984 is_token_count_event,
985 is_task_started,
986 is_task_complete,
987 message_role,
988 project: current_project
989 .clone()
990 .or_else(|| project_context_from_path_fallback(root, path)),
991 });
992 }
993
994 let mut active_turns: Vec<ActiveCodexTurn> = Vec::new();
995 let mut consumed_usage_lines = HashSet::new();
996
997 for record in &records {
998 if record.is_task_started {
999 let started_at = codex_task_timestamp(&record.value, &["/payload/started_at"])
1000 .unwrap_or(record.timestamp);
1001 active_turns.push(ActiveCodexTurn {
1002 started_at,
1003 session_raw: record.session_raw.clone(),
1004 model: record.model.clone(),
1005 model_inferred: record.model_inferred,
1006 timestamp_inferred: record.timestamp_inferred,
1007 message_counts: CodexMessageCounts::default(),
1008 last_usage: record.usage.clone(),
1009 accumulated_usage: record.usage.clone(),
1010 usage_lines: record
1011 .usage
1012 .as_ref()
1013 .map(|_| vec![record.line_number])
1014 .unwrap_or_default(),
1015 project: record.project.clone(),
1016 });
1017 if record.usage.is_some() {
1018 consumed_usage_lines.insert(record.line_number);
1019 }
1020 continue;
1021 }
1022
1023 if let Some(turn) = active_turns
1024 .iter_mut()
1025 .rfind(|turn| turn.session_raw == record.session_raw)
1026 {
1027 if record.model_explicit {
1028 turn.model = record.model.clone();
1029 turn.model_inferred = record.model_inferred;
1030 }
1031 turn.timestamp_inferred |= record.timestamp_inferred;
1032 if record.project.is_some() {
1033 turn.project = record.project.clone();
1034 }
1035 if let Some(role) = record.message_role.as_deref() {
1036 turn.message_counts.total = turn.message_counts.total.saturating_add(1);
1037 match role {
1038 "user" => turn.message_counts.user = turn.message_counts.user.saturating_add(1),
1039 "assistant" => {
1040 turn.message_counts.assistant =
1041 turn.message_counts.assistant.saturating_add(1)
1042 }
1043 "developer" => {
1044 turn.message_counts.developer =
1045 turn.message_counts.developer.saturating_add(1)
1046 }
1047 _ => {}
1048 }
1049 }
1050 if let Some(usage) = record.usage.clone() {
1051 if !record.is_task_complete {
1052 turn.accumulated_usage = Some(
1053 turn.accumulated_usage
1054 .as_ref()
1055 .map(|accumulated| sum_usage_counts(accumulated, &usage))
1056 .unwrap_or_else(|| usage.clone()),
1057 );
1058 turn.last_usage = Some(usage);
1059 turn.usage_lines.push(record.line_number);
1060 }
1061 }
1062 }
1063
1064 if record.is_task_complete {
1065 let Some(turn_index) = active_turns
1066 .iter()
1067 .rposition(|turn| turn.session_raw == record.session_raw)
1068 else {
1069 continue;
1070 };
1071 let turn = active_turns.remove(turn_index);
1072 let completed_at = codex_task_timestamp(&record.value, &["/payload/completed_at"])
1073 .unwrap_or(record.timestamp);
1074 let usage = record
1075 .usage
1076 .clone()
1077 .or(turn.accumulated_usage.clone())
1078 .or(turn.last_usage.clone());
1079 let Some(usage) = usage else {
1080 continue;
1081 };
1082 for line_number in turn.usage_lines {
1083 consumed_usage_lines.insert(line_number);
1084 }
1085 if record.usage.is_some() {
1086 consumed_usage_lines.insert(record.line_number);
1087 }
1088 let explicit_duration_ms = codex_task_u64(
1089 &record.value,
1090 &["/payload/duration_ms", "/payload/durationMs"],
1091 );
1092 let duration_ms = explicit_duration_ms
1093 .or_else(|| codex_duration_from_turn_timestamps(turn.started_at, completed_at));
1094 let latency_source = explicit_duration_ms
1095 .map(|_| LatencySource::Explicit)
1096 .or_else(|| duration_ms.map(|_| LatencySource::Inferred));
1097 let time_to_first_token_ms = codex_task_u64(
1098 &record.value,
1099 &[
1100 "/payload/time_to_first_token_ms",
1101 "/payload/timeToFirstTokenMs",
1102 ],
1103 );
1104 let event = usage_event(
1105 ctx.adapter,
1106 ctx.source,
1107 ctx.options,
1108 ProviderEventParts {
1109 timestamp: completed_at,
1110 session_started_at: Some(turn.started_at),
1111 session_ended_at: Some(completed_at),
1112 duration_seconds: duration_ms.map(|value| value / 1000),
1113 model: record.model.clone().or(turn.model.clone()),
1114 usage,
1115 runtime: Some(RuntimeInfo {
1116 runtime_name: None,
1117 host_id: None,
1118 latency_ms: duration_ms,
1119 latency_source,
1120 time_to_first_token_ms,
1121 prompt_eval_duration_ms: None,
1122 eval_duration_ms: None,
1123 total_messages: Some(turn.message_counts.total),
1124 user_messages: Some(turn.message_counts.user),
1125 assistant_messages: Some(turn.message_counts.assistant),
1126 developer_messages: Some(turn.message_counts.developer),
1127 }),
1128 session_raw: turn.session_raw,
1129 project: record
1130 .project
1131 .clone()
1132 .or(turn.project.clone())
1133 .or_else(|| project_context_from_path_fallback(root, path)),
1134 event_kind: "codex_turn_usage",
1135 source_file: path,
1136 line_number: record.line_number,
1137 model_inferred: record.model_inferred || turn.model_inferred,
1138 timestamp_inferred: record.timestamp_inferred || turn.timestamp_inferred,
1139 deduplication: EventDeduplication::PathIndependent,
1140 },
1141 );
1142 push_deduped(ctx.scan, ctx.seen, event);
1143 }
1144 }
1145
1146 for record in records {
1147 let Some(usage) = record.usage else {
1148 continue;
1149 };
1150 if consumed_usage_lines.contains(&record.line_number) {
1151 continue;
1152 }
1153 let event = usage_event(
1154 ctx.adapter,
1155 ctx.source,
1156 ctx.options,
1157 ProviderEventParts {
1158 timestamp: record.timestamp,
1159 session_started_at: None,
1160 session_ended_at: None,
1161 duration_seconds: None,
1162 model: record.model,
1163 usage,
1164 runtime: None,
1165 session_raw: record.session_raw,
1166 project: record
1167 .project
1168 .or_else(|| project_context_from_path_fallback(root, path)),
1169 event_kind: if record.is_token_count_event {
1170 "codex_token_count"
1171 } else {
1172 "codex_headless_usage"
1173 },
1174 source_file: path,
1175 line_number: record.line_number,
1176 model_inferred: record.model_inferred,
1177 timestamp_inferred: record.timestamp_inferred,
1178 deduplication: if record.is_token_count_event {
1179 EventDeduplication::PathIndependent
1180 } else {
1181 EventDeduplication::SessionScoped
1182 },
1183 },
1184 );
1185 push_deduped(ctx.scan, ctx.seen, event);
1186 }
1187
1188 Ok(())
1189}
1190
1191#[derive(Debug, Clone)]
1192struct CodexLineRecord {
1193 line_number: usize,
1194 value: Value,
1195 timestamp: DateTime<Utc>,
1196 timestamp_inferred: bool,
1197 session_raw: String,
1198 model: Option<ModelInfo>,
1199 model_inferred: bool,
1200 model_explicit: bool,
1201 usage: Option<UsageCounts>,
1202 is_token_count_event: bool,
1203 is_task_started: bool,
1204 is_task_complete: bool,
1205 message_role: Option<String>,
1206 project: Option<ProjectInfo>,
1207}
1208
1209#[derive(Debug, Clone, Default)]
1210struct CodexMessageCounts {
1211 total: u64,
1212 user: u64,
1213 assistant: u64,
1214 developer: u64,
1215}
1216
1217#[derive(Debug, Clone)]
1218struct ActiveCodexTurn {
1219 started_at: DateTime<Utc>,
1220 session_raw: String,
1221 model: Option<ModelInfo>,
1222 model_inferred: bool,
1223 timestamp_inferred: bool,
1224 message_counts: CodexMessageCounts,
1225 last_usage: Option<UsageCounts>,
1226 accumulated_usage: Option<UsageCounts>,
1227 usage_lines: Vec<usize>,
1228 project: Option<ProjectInfo>,
1229}
1230
1231fn push_deduped(scan: &mut AdapterScan, seen: &mut HashSet<String>, event: UsageEvent) {
1232 let key = event
1233 .parse_evidence
1234 .as_ref()
1235 .and_then(|evidence| evidence.source_record_id.clone())
1236 .unwrap_or_else(|| event.event_id.0.clone());
1237 if seen.insert(key) {
1238 scan.events.push(event);
1239 } else {
1240 scan.diagnostics.duplicate_events += 1;
1241 }
1242}
1243
1244struct ProviderEventParts<'a> {
1245 timestamp: DateTime<Utc>,
1246 session_started_at: Option<DateTime<Utc>>,
1247 session_ended_at: Option<DateTime<Utc>>,
1248 duration_seconds: Option<u64>,
1249 model: Option<ModelInfo>,
1250 usage: UsageCounts,
1251 runtime: Option<RuntimeInfo>,
1252 session_raw: String,
1253 project: Option<ProjectInfo>,
1254 event_kind: &'static str,
1255 source_file: &'a Path,
1256 line_number: usize,
1257 model_inferred: bool,
1258 timestamp_inferred: bool,
1259 deduplication: EventDeduplication,
1260}
1261
1262fn usage_event<A: ProviderAdapter + ?Sized>(
1263 adapter: &A,
1264 source: &SourceLocation,
1265 options: &ScanOptions,
1266 parts: ProviderEventParts<'_>,
1267) -> UsageEvent {
1268 let session_hash = hash_text(&parts.session_raw);
1269 let session_started_at = parts.session_started_at.unwrap_or(parts.timestamp);
1270 let session_ended_at = parts.session_ended_at.unwrap_or(parts.timestamp);
1271 let project_key = project_bucket_key(parts.project.as_ref());
1272 let model_key = parts
1273 .model
1274 .as_ref()
1275 .and_then(|model| model.normalized_name.as_deref().or(model.name.as_deref()))
1276 .unwrap_or("unknown");
1277 let (event_key_version, semantic_key) = match parts.deduplication {
1278 EventDeduplication::SessionScoped => (
1279 SESSION_SCOPED_EVENT_KEY_VERSION,
1280 if parts.session_started_at.is_some() || parts.session_ended_at.is_some() {
1281 format!(
1282 "{SESSION_SCOPED_EVENT_KEY_VERSION}:{}:{}:{}:{}:{}:{}:{}:{}:{}:{}",
1283 parts.event_kind,
1284 session_hash,
1285 session_started_at.timestamp_millis(),
1286 session_ended_at.timestamp_millis(),
1287 model_key,
1288 parts.usage.input_tokens.unwrap_or(0),
1289 parts.usage.cache_read_tokens.unwrap_or(0),
1290 parts.usage.output_tokens.unwrap_or(0),
1291 parts.usage.reasoning_tokens.unwrap_or(0),
1292 parts.usage.computed_total()
1293 )
1294 } else {
1295 format!(
1296 "{SESSION_SCOPED_EVENT_KEY_VERSION}:{}:{}:{}:{}:{}:{}:{}:{}:{}",
1297 parts.event_kind,
1298 session_hash,
1299 parts.timestamp.timestamp_millis(),
1300 model_key,
1301 parts.usage.input_tokens.unwrap_or(0),
1302 parts.usage.cache_read_tokens.unwrap_or(0),
1303 parts.usage.output_tokens.unwrap_or(0),
1304 parts.usage.reasoning_tokens.unwrap_or(0),
1305 parts.usage.computed_total()
1306 )
1307 },
1308 ),
1309 EventDeduplication::PathIndependent => (
1310 PATH_INDEPENDENT_EVENT_KEY_VERSION,
1311 if parts.session_started_at.is_some() || parts.session_ended_at.is_some() {
1312 format!(
1313 "{PATH_INDEPENDENT_EVENT_KEY_VERSION}:{}:{}:{}:{}:{}:{}:{}:{}:{}:{}",
1314 parts.event_kind,
1315 &project_key,
1316 session_started_at.timestamp_millis(),
1317 session_ended_at.timestamp_millis(),
1318 model_key,
1319 parts.usage.input_tokens.unwrap_or(0),
1320 parts.usage.cache_read_tokens.unwrap_or(0),
1321 parts.usage.output_tokens.unwrap_or(0),
1322 parts.usage.reasoning_tokens.unwrap_or(0),
1323 parts.usage.computed_total()
1324 )
1325 } else {
1326 format!(
1327 "{PATH_INDEPENDENT_EVENT_KEY_VERSION}:{}:{}:{}:{}:{}:{}:{}:{}:{}",
1328 parts.event_kind,
1329 &project_key,
1330 parts.timestamp.timestamp_millis(),
1331 model_key,
1332 parts.usage.input_tokens.unwrap_or(0),
1333 parts.usage.cache_read_tokens.unwrap_or(0),
1334 parts.usage.output_tokens.unwrap_or(0),
1335 parts.usage.reasoning_tokens.unwrap_or(0),
1336 parts.usage.computed_total()
1337 )
1338 },
1339 ),
1340 };
1341 let event_id = semantic_event_id(adapter.provider(), &source.source_id, &semantic_key);
1342 let file_path_hash = hash_text(&canonical_display(parts.source_file));
1343 let source_record_id = format!("usage_key_{}", &hash_text(&semantic_key)[..32]);
1344 let cost = estimate_cost(adapter.provider(), parts.model.as_ref(), &parts.usage);
1345
1346 UsageEvent {
1347 schema_version: USAGE_EVENT_SCHEMA_VERSION.to_string(),
1348 event_id,
1349 device_id: options.device_id.clone(),
1350 provider: adapter.provider().to_string(),
1351 source_id: source.source_id.clone(),
1352 provider_account_id: None,
1353 subscription_id: None,
1354 source: EventSource {
1355 adapter_id: adapter.id().to_string(),
1356 adapter_version: adapter.version().to_string(),
1357 source_kind: source.source_kind.clone(),
1358 location_origin: Some(source.location_origin.clone()),
1359 source_type: "jsonl".to_string(),
1360 source_path_hash: source.path_hash.clone(),
1361 source_record_id: Some(source_record_id.clone()),
1362 parse_confidence: if parts.model_inferred || parts.timestamp_inferred {
1363 Confidence::Medium
1364 } else {
1365 Confidence::High
1366 },
1367 },
1368 session: SessionInfo {
1369 session_id: format!("session_{}", &session_hash[..24]),
1370 local_session_id_hash: Some(session_hash),
1371 title: None,
1372 started_at: session_started_at,
1373 ended_at: parts.session_ended_at,
1374 duration_seconds: parts.duration_seconds,
1375 },
1376 model: parts.model,
1377 runtime: parts.runtime,
1378 cost,
1379 parse_evidence: Some(ParseEvidence {
1380 event_key_version: event_key_version.to_string(),
1381 source_file_path_hash: Some(file_path_hash),
1382 source_line_number: Some(parts.line_number as u64),
1383 source_record_id: Some(semantic_key),
1384 model_inferred: parts.model_inferred,
1385 timestamp_inferred: parts.timestamp_inferred,
1386 account_identity_source: IdentitySource::Unresolved,
1387 }),
1388 usage: parts.usage,
1389 project: parts.project,
1390 git: None,
1391 privacy: metadata_only_privacy(),
1392 created_at: parts.timestamp,
1393 imported_at: Utc::now(),
1394 }
1395}
1396
1397fn claude_usage_counts_from_value(value: &Value) -> UsageCounts {
1398 let input = number_at_any(
1399 value,
1400 &[
1401 "input_tokens",
1402 "inputTokens",
1403 "prompt_tokens",
1404 "promptTokens",
1405 "input",
1406 ],
1407 );
1408 let output = number_at_any(
1409 value,
1410 &[
1411 "output_tokens",
1412 "outputTokens",
1413 "completion_tokens",
1414 "completionTokens",
1415 "output",
1416 ],
1417 );
1418 let cache_creation = number_at_any(
1419 value,
1420 &[
1421 "cache_creation_input_tokens",
1422 "cacheCreationInputTokens",
1423 "cacheCreationTokens",
1424 "cache_creation_tokens",
1425 ],
1426 );
1427 let cache_read = number_at_any(
1428 value,
1429 &[
1430 "cache_read_input_tokens",
1431 "cacheReadInputTokens",
1432 "cacheReadTokens",
1433 "cache_read_tokens",
1434 "cached_input_tokens",
1435 ],
1436 );
1437 let reasoning = number_at_any(
1438 value,
1439 &[
1440 "reasoning_tokens",
1441 "reasoningTokens",
1442 "reasoning_output_tokens",
1443 "reasoningOutputTokens",
1444 ],
1445 );
1446 let total = number_at_any(value, &["total_tokens", "totalTokens", "total"]);
1447 let output = output
1448 .or_else(|| infer_missing_output(total, input, cache_creation, cache_read, reasoning));
1449
1450 UsageCounts {
1451 input_tokens: input,
1452 output_tokens: output,
1453 cache_creation_tokens: cache_creation,
1454 cache_read_tokens: cache_read,
1455 reasoning_tokens: reasoning,
1456 total_tokens: total,
1457 requests: Some(1),
1458 local_prompt_eval_tokens: None,
1459 local_eval_tokens: None,
1460 }
1461}
1462
1463fn codex_usage_counts_from_value(value: &Value) -> UsageCounts {
1464 let raw_input = number_at_any(value, &["input_tokens", "prompt_tokens", "input"]);
1465 let raw_output = number_at_any(value, &["output_tokens", "completion_tokens", "output"]);
1466 let raw_cache_creation = number_at_any(
1467 value,
1468 &[
1469 "cache_creation_input_tokens",
1470 "cacheCreationInputTokens",
1471 "cache_creation_tokens",
1472 "cacheCreationTokens",
1473 ],
1474 );
1475 let raw_cache_read = number_at_any(
1476 value,
1477 &[
1478 "cached_input_tokens",
1479 "cache_read_input_tokens",
1480 "cached_tokens",
1481 ],
1482 );
1483 let raw_reasoning = number_at_any(value, &["reasoning_output_tokens", "reasoning_tokens"]);
1484 let total = number_at_any(value, &["total_tokens", "total"]);
1485
1486 normalize_codex_usage_counts(
1487 raw_input,
1488 raw_output,
1489 raw_cache_creation,
1490 raw_cache_read,
1491 raw_reasoning,
1492 total,
1493 )
1494}
1495
1496fn infer_missing_output(
1497 total: Option<u64>,
1498 input: Option<u64>,
1499 cache_creation: Option<u64>,
1500 cache_read: Option<u64>,
1501 reasoning: Option<u64>,
1502) -> Option<u64> {
1503 total.and_then(|total| {
1504 let known = input.unwrap_or(0)
1505 + cache_creation.unwrap_or(0)
1506 + cache_read.unwrap_or(0)
1507 + reasoning.unwrap_or(0);
1508 (total > known).then_some(total - known)
1509 })
1510}
1511
1512fn sum_usage_counts(left: &UsageCounts, right: &UsageCounts) -> UsageCounts {
1513 fn sum_field(left: Option<u64>, right: Option<u64>) -> Option<u64> {
1514 if left.is_some() || right.is_some() {
1515 Some(left.unwrap_or(0).saturating_add(right.unwrap_or(0)))
1516 } else {
1517 None
1518 }
1519 }
1520
1521 UsageCounts {
1522 input_tokens: sum_field(left.input_tokens, right.input_tokens),
1523 output_tokens: sum_field(left.output_tokens, right.output_tokens),
1524 cache_creation_tokens: sum_field(left.cache_creation_tokens, right.cache_creation_tokens),
1525 cache_read_tokens: sum_field(left.cache_read_tokens, right.cache_read_tokens),
1526 reasoning_tokens: sum_field(left.reasoning_tokens, right.reasoning_tokens),
1527 total_tokens: sum_field(left.total_tokens, right.total_tokens),
1528 requests: sum_field(left.requests, right.requests),
1529 local_prompt_eval_tokens: sum_field(
1530 left.local_prompt_eval_tokens,
1531 right.local_prompt_eval_tokens,
1532 ),
1533 local_eval_tokens: sum_field(left.local_eval_tokens, right.local_eval_tokens),
1534 }
1535}
1536
1537fn normalize_codex_usage_counts(
1541 raw_input: Option<u64>,
1542 raw_output: Option<u64>,
1543 raw_cache_creation: Option<u64>,
1544 raw_cache_read: Option<u64>,
1545 raw_reasoning: Option<u64>,
1546 total: Option<u64>,
1547) -> UsageCounts {
1548 let cache_creation = match (raw_input, raw_cache_creation) {
1549 (Some(input), Some(cache_creation)) => Some(cache_creation.min(input)),
1550 _ => raw_cache_creation,
1551 };
1552 let cache_read = match (raw_input, raw_cache_read) {
1553 (Some(input), Some(cache_read)) => Some(cache_read.min(input)),
1554 _ => raw_cache_read,
1555 };
1556 let reasoning = match (raw_output, raw_reasoning) {
1557 (Some(output), Some(reasoning)) => Some(reasoning.min(output)),
1558 _ => raw_reasoning,
1559 };
1560 let input = raw_input.map(|input| {
1561 input
1562 .saturating_sub(cache_creation.unwrap_or(0))
1563 .saturating_sub(cache_read.unwrap_or(0))
1564 });
1565 let output = raw_output
1566 .map(|output| output.saturating_sub(reasoning.unwrap_or(0)))
1567 .or_else(|| infer_missing_output(total, input, cache_creation, cache_read, reasoning));
1568 let total = total.or_else(|| {
1569 (input.is_some()
1570 || output.is_some()
1571 || cache_creation.is_some()
1572 || cache_read.is_some()
1573 || reasoning.is_some())
1574 .then_some(
1575 input
1576 .unwrap_or(0)
1577 .saturating_add(output.unwrap_or(0))
1578 .saturating_add(cache_creation.unwrap_or(0))
1579 .saturating_add(cache_read.unwrap_or(0))
1580 .saturating_add(reasoning.unwrap_or(0)),
1581 )
1582 });
1583
1584 UsageCounts {
1585 input_tokens: input,
1586 output_tokens: output,
1587 cache_creation_tokens: cache_creation,
1588 cache_read_tokens: cache_read,
1589 reasoning_tokens: reasoning,
1590 total_tokens: total,
1591 requests: Some(1),
1592 local_prompt_eval_tokens: None,
1593 local_eval_tokens: None,
1594 }
1595}
1596
1597fn subtract_usage_counts(current: &UsageCounts, previous: Option<&UsageCounts>) -> UsageCounts {
1598 let subtract = |left: Option<u64>, right: Option<u64>| {
1599 let value = left.unwrap_or(0).saturating_sub(right.unwrap_or(0));
1600 (value > 0).then_some(value)
1601 };
1602 UsageCounts {
1603 input_tokens: subtract(
1604 current.input_tokens,
1605 previous.and_then(|usage| usage.input_tokens),
1606 ),
1607 output_tokens: subtract(
1608 current.output_tokens,
1609 previous.and_then(|usage| usage.output_tokens),
1610 ),
1611 cache_creation_tokens: subtract(
1612 current.cache_creation_tokens,
1613 previous.and_then(|usage| usage.cache_creation_tokens),
1614 ),
1615 cache_read_tokens: subtract(
1616 current.cache_read_tokens,
1617 previous.and_then(|usage| usage.cache_read_tokens),
1618 ),
1619 reasoning_tokens: subtract(
1620 current.reasoning_tokens,
1621 previous.and_then(|usage| usage.reasoning_tokens),
1622 ),
1623 total_tokens: subtract(
1624 current.total_tokens,
1625 previous.and_then(|usage| usage.total_tokens),
1626 ),
1627 requests: Some(1),
1628 local_prompt_eval_tokens: None,
1629 local_eval_tokens: None,
1630 }
1631}
1632
1633fn number_at_any(value: &Value, keys: &[&str]) -> Option<u64> {
1634 keys.iter()
1635 .find_map(|key| value.get(*key).and_then(value_as_u64))
1636}
1637
1638fn value_as_u64(value: &Value) -> Option<u64> {
1639 value
1640 .as_u64()
1641 .or_else(|| {
1642 value
1643 .as_i64()
1644 .and_then(|value| (value >= 0).then_some(value as u64))
1645 })
1646 .or_else(|| value.as_str().and_then(|text| text.parse::<u64>().ok()))
1647}
1648
1649fn timestamp_from_nested_value(value: &Value) -> Option<DateTime<Utc>> {
1650 for candidate in [
1651 value.get("timestamp"),
1652 value.get("created_at"),
1653 value.get("createdAt"),
1654 value.get("time"),
1655 value.pointer("/message/timestamp"),
1656 value.pointer("/data/timestamp"),
1657 value.pointer("/result/timestamp"),
1658 value.pointer("/response/timestamp"),
1659 ]
1660 .into_iter()
1661 .flatten()
1662 {
1663 if let Some(timestamp) = timestamp_from_scalar(candidate) {
1664 return Some(timestamp);
1665 }
1666 }
1667 None
1668}
1669
1670fn timestamp_from_scalar(value: &Value) -> Option<DateTime<Utc>> {
1671 if let Some(text) = value.as_str() {
1672 if let Ok(parsed) = DateTime::parse_from_rfc3339(text) {
1673 return Some(parsed.with_timezone(&Utc));
1674 }
1675 if let Ok(millis) = text.parse::<i64>() {
1676 return timestamp_from_number(millis);
1677 }
1678 }
1679 value.as_i64().and_then(timestamp_from_number)
1680}
1681
1682fn stats_cache_date_end(value: &Value) -> Option<DateTime<Utc>> {
1683 timestamp_from_scalar(value).or_else(|| {
1684 let text = value.as_str()?;
1685 let date = NaiveDate::parse_from_str(text, "%Y-%m-%d").ok()?;
1686 Some(date.and_hms_opt(23, 59, 59)?.and_utc())
1687 })
1688}
1689
1690fn timestamp_from_number(value: i64) -> Option<DateTime<Utc>> {
1691 if value > 10_000_000_000 {
1692 Utc.timestamp_millis_opt(value).single()
1693 } else {
1694 Utc.timestamp_opt(value, 0).single()
1695 }
1696}
1697
1698fn file_modified_timestamp(path: &Path) -> Option<DateTime<Utc>> {
1699 path.metadata()
1700 .ok()
1701 .and_then(|metadata| metadata.modified().ok())
1702 .map(DateTime::<Utc>::from)
1703}
1704
1705fn model_from_nested_value(value: &Value, fallback: Option<&str>) -> Option<ModelInfo> {
1706 let model = [
1707 value.get("model"),
1708 value.get("model_name"),
1709 value.pointer("/metadata/model"),
1710 value.pointer("/message/model"),
1711 value.pointer("/usage/model"),
1712 value.pointer("/request/model"),
1713 value.pointer("/data/model"),
1714 value.pointer("/data/model_name"),
1715 value.pointer("/data/metadata/model"),
1716 value.pointer("/result/model"),
1717 value.pointer("/result/model_name"),
1718 value.pointer("/result/metadata/model"),
1719 value.pointer("/response/model"),
1720 value.pointer("/response/model_name"),
1721 value.pointer("/response/metadata/model"),
1722 value.pointer("/payload/model"),
1723 value.pointer("/payload/model_name"),
1724 value.pointer("/payload/metadata/model"),
1725 value.pointer("/payload/info/model"),
1726 value.pointer("/payload/info/model_name"),
1727 value.pointer("/payload/info/metadata/model"),
1728 ]
1729 .into_iter()
1730 .flatten()
1731 .find_map(Value::as_str)
1732 .or(fallback)?;
1733 Some(model_info(model))
1734}
1735
1736fn model_info(model: &str) -> ModelInfo {
1737 let normalized = normalize_model_name(model);
1738 ModelInfo {
1739 name: Some(model.to_string()),
1740 normalized_name: Some(normalized),
1741 provider_model_id: Some(model.to_string()),
1742 }
1743}
1744
1745fn is_codex_session_meta(value: &Value) -> bool {
1746 value.get("type").and_then(Value::as_str) == Some("session_meta")
1747}
1748
1749fn codex_model_from_value(value: &Value, fallback: Option<&str>) -> Option<ModelInfo> {
1750 model_from_nested_value(value, fallback)
1751}
1752
1753fn is_codex_turn_context(value: &Value) -> bool {
1754 value.get("type").and_then(Value::as_str) == Some("turn_context")
1755}
1756
1757fn is_codex_token_count(value: &Value) -> bool {
1758 value.get("type").and_then(Value::as_str) == Some("event_msg")
1759 && value.pointer("/payload/type").and_then(Value::as_str) == Some("token_count")
1760}
1761
1762fn is_codex_task_started(value: &Value) -> bool {
1763 value.get("type").and_then(Value::as_str) == Some("event_msg")
1764 && value.pointer("/payload/type").and_then(Value::as_str) == Some("task_started")
1765}
1766
1767fn is_codex_task_complete(value: &Value) -> bool {
1768 value.get("type").and_then(Value::as_str) == Some("event_msg")
1769 && value.pointer("/payload/type").and_then(Value::as_str) == Some("task_complete")
1770}
1771
1772fn codex_visible_message_role(value: &Value) -> Option<&str> {
1773 (value.get("type").and_then(Value::as_str) == Some("response_item")
1774 && value.pointer("/payload/type").and_then(Value::as_str) == Some("message"))
1775 .then(|| value.pointer("/payload/role").and_then(Value::as_str))
1776 .flatten()
1777}
1778
1779fn codex_line_could_have_usage_or_context(line: &str) -> bool {
1780 line.contains("\"session_meta\"")
1781 || line.contains("\"turn_context\"")
1782 || line.contains("\"token_count\"")
1783 || line.contains("\"task_started\"")
1784 || line.contains("\"task_complete\"")
1785 || line.contains("\"response_item\"")
1786 || line.contains("\"usage\"")
1787 || line.contains("\"input_tokens\"")
1788 || line.contains("\"prompt_tokens\"")
1789}
1790
1791fn codex_task_timestamp(value: &Value, pointers: &[&str]) -> Option<DateTime<Utc>> {
1792 pointers
1793 .iter()
1794 .filter_map(|pointer| value.pointer(pointer))
1795 .find_map(timestamp_from_scalar)
1796}
1797
1798fn codex_task_u64(value: &Value, pointers: &[&str]) -> Option<u64> {
1799 pointers
1800 .iter()
1801 .filter_map(|pointer| value.pointer(pointer))
1802 .find_map(value_as_u64)
1803}
1804
1805fn codex_duration_from_turn_timestamps(
1806 started_at: DateTime<Utc>,
1807 completed_at: DateTime<Utc>,
1808) -> Option<u64> {
1809 let millis = completed_at
1810 .signed_duration_since(started_at)
1811 .num_milliseconds();
1812 (millis >= 0).then_some(millis as u64)
1813}
1814
1815fn load_claude_session_projects(
1816 projects_root: &Path,
1817) -> HashMap<String, ClaudeSessionProjectMetadata> {
1818 let mut projects = HashMap::new();
1819 if !projects_root.exists() {
1820 return projects;
1821 }
1822
1823 for entry in WalkDir::new(projects_root).follow_links(false) {
1824 let Ok(entry) = entry else {
1825 continue;
1826 };
1827 if !entry.file_type().is_file() || entry.file_name() != "sessions-index.json" {
1828 continue;
1829 }
1830 let Ok(text) = std::fs::read_to_string(entry.path()) else {
1831 continue;
1832 };
1833 let Ok(value) = serde_json::from_str::<Value>(&text) else {
1834 continue;
1835 };
1836 if let Some(project_store_root) = entry.path().parent() {
1837 let original_path = value
1838 .get("originalPath")
1839 .and_then(Value::as_str)
1840 .map(expand_home_path)
1841 .or_else(|| {
1842 value
1843 .get("entries")
1844 .and_then(Value::as_array)
1845 .and_then(|entries| entries.first())
1846 .and_then(|item| item.get("projectPath"))
1847 .and_then(Value::as_str)
1848 .map(expand_home_path)
1849 });
1850 if let Some(project_path) = original_path {
1851 projects.insert(
1852 canonical_display(project_store_root),
1853 ClaudeSessionProjectMetadata {
1854 project_path: Some(project_path),
1855 git_branch: None,
1856 },
1857 );
1858 }
1859 }
1860 let Some(entries) = value.get("entries").and_then(Value::as_array) else {
1861 continue;
1862 };
1863 for item in entries {
1864 let Some(full_path) = item.get("fullPath").and_then(Value::as_str) else {
1865 continue;
1866 };
1867 let metadata = ClaudeSessionProjectMetadata {
1868 project_path: item
1869 .get("projectPath")
1870 .and_then(Value::as_str)
1871 .map(expand_home_path),
1872 git_branch: item
1873 .get("gitBranch")
1874 .and_then(Value::as_str)
1875 .map(ToOwned::to_owned),
1876 };
1877 let full_path = Path::new(full_path);
1878 projects.insert(canonical_display(full_path), metadata.clone());
1879 if full_path.extension().and_then(|ext| ext.to_str()) == Some("jsonl") {
1880 projects.insert(canonical_display(&full_path.with_extension("")), metadata);
1881 }
1882 }
1883 }
1884
1885 projects
1886}
1887
1888fn codex_project_context_from_value(value: &Value) -> Option<ProjectInfo> {
1889 let payload = value.get("payload");
1890 let project_path = payload
1891 .and_then(|payload| payload.get("cwd"))
1892 .and_then(Value::as_str)
1893 .map(expand_home_path);
1894 let repository_url = payload
1895 .and_then(|payload| payload.get("git"))
1896 .and_then(|git| git.get("repository_url"))
1897 .and_then(Value::as_str)
1898 .map(ToOwned::to_owned);
1899 let branch = payload
1900 .and_then(|payload| payload.get("git"))
1901 .and_then(|git| git.get("branch"))
1902 .and_then(Value::as_str)
1903 .map(ToOwned::to_owned);
1904 resolve_project_context(project_path, repository_url, branch)
1905}
1906
1907fn resolve_project_context(
1908 project_path: Option<PathBuf>,
1909 repository_url: Option<String>,
1910 branch: Option<String>,
1911) -> Option<ProjectInfo> {
1912 let git = project_path
1913 .as_deref()
1914 .and_then(read_git_repository_metadata);
1915 let normalized_remote = repository_url
1916 .as_deref()
1917 .and_then(normalize_git_remote)
1918 .or_else(|| {
1919 git.as_ref()
1920 .and_then(|metadata| metadata.normalized_remote.clone())
1921 });
1922 let repo_remote_hash = normalized_remote.as_ref().map(|remote| hash_text(remote));
1923 let repo_label = normalized_remote
1924 .as_deref()
1925 .map(repo_label_from_normalized_remote)
1926 .or_else(|| {
1927 git.as_ref()
1928 .and_then(|metadata| metadata.repo_label.clone())
1929 });
1930 let branch_label = branch.or_else(|| {
1931 git.as_ref()
1932 .and_then(|metadata| metadata.branch_label.clone())
1933 });
1934 let branch_hash = branch_label.as_ref().map(|branch| hash_text(branch));
1935 let project_label = project_path
1936 .as_deref()
1937 .and_then(project_label_from_path)
1938 .or_else(|| repo_label.clone());
1939 let path_hash_value = project_path.as_deref().map(path_hash);
1940 let path_label = project_path.as_deref().map(display_path);
1941
1942 ProjectContext {
1943 project_label,
1944 repo_remote_hash,
1945 repo_label,
1946 branch_hash,
1947 branch_label,
1948 path_hash: path_hash_value,
1949 path_label,
1950 }
1951 .into_project_info()
1952}
1953
1954fn project_context_from_path_fallback(root: &Path, path: &Path) -> Option<ProjectInfo> {
1955 let project_key = project_key_from_path(root, path)?;
1956 if matches!(project_key.as_str(), "sessions" | "archived_sessions") {
1957 return None;
1958 }
1959 let project_path = root.join(&project_key);
1960 ProjectContext {
1961 project_label: Some(project_key),
1962 path_hash: Some(path_hash(&project_path)),
1963 path_label: Some(display_path(&project_path)),
1964 ..ProjectContext::default()
1965 }
1966 .into_project_info()
1967}
1968
1969#[derive(Debug, Clone, Default)]
1970struct GitRepositoryMetadata {
1971 normalized_remote: Option<String>,
1972 repo_label: Option<String>,
1973 branch_label: Option<String>,
1974}
1975
1976fn read_git_repository_metadata(path: &Path) -> Option<GitRepositoryMetadata> {
1977 let repo_root = find_git_repo_root(path)?;
1978 let git_dir = git_dir_for_repo_root(&repo_root)?;
1979 let common_dir = git_common_dir(&git_dir).unwrap_or_else(|| git_dir.clone());
1980 let config_path = if git_dir.join("config").is_file() {
1981 git_dir.join("config")
1982 } else {
1983 common_dir.join("config")
1984 };
1985 let remote = read_git_remote_url(&config_path);
1986 let normalized_remote = remote.as_deref().and_then(normalize_git_remote);
1987 let repo_label = normalized_remote
1988 .as_deref()
1989 .map(repo_label_from_normalized_remote)
1990 .or_else(|| project_label_from_path(&repo_root));
1991
1992 Some(GitRepositoryMetadata {
1993 normalized_remote,
1994 repo_label,
1995 branch_label: read_git_head_branch(&git_dir),
1996 })
1997}
1998
1999fn find_git_repo_root(path: &Path) -> Option<PathBuf> {
2000 let mut current = if path.is_dir() {
2001 path.to_path_buf()
2002 } else {
2003 path.parent()?.to_path_buf()
2004 };
2005 loop {
2006 if current.join(".git").exists() {
2007 return Some(current);
2008 }
2009 if !current.pop() {
2010 return None;
2011 }
2012 }
2013}
2014
2015fn git_dir_for_repo_root(repo_root: &Path) -> Option<PathBuf> {
2016 let dot_git = repo_root.join(".git");
2017 if dot_git.is_dir() {
2018 return Some(dot_git);
2019 }
2020 let text = std::fs::read_to_string(dot_git).ok()?;
2021 let gitdir = text.trim().strip_prefix("gitdir:")?.trim();
2022 let path = PathBuf::from(gitdir);
2023 if path.is_absolute() {
2024 Some(path)
2025 } else {
2026 Some(repo_root.join(path))
2027 }
2028}
2029
2030fn git_common_dir(git_dir: &Path) -> Option<PathBuf> {
2031 let text = std::fs::read_to_string(git_dir.join("commondir")).ok()?;
2032 let value = text.trim();
2033 if value.is_empty() {
2034 return None;
2035 }
2036 let path = PathBuf::from(value);
2037 if path.is_absolute() {
2038 Some(path)
2039 } else {
2040 Some(git_dir.join(path))
2041 }
2042}
2043
2044fn read_git_remote_url(config_path: &Path) -> Option<String> {
2045 let text = std::fs::read_to_string(config_path).ok()?;
2046 let mut current_remote: Option<String> = None;
2047 let mut first_remote_url: Option<String> = None;
2048 let mut origin_remote_url: Option<String> = None;
2049
2050 for line in text.lines() {
2051 let trimmed = line.trim();
2052 if trimmed.starts_with("[remote \"") && trimmed.ends_with("\"]") {
2053 current_remote = trimmed
2054 .trim_start_matches("[remote \"")
2055 .trim_end_matches("\"]")
2056 .split('"')
2057 .next()
2058 .map(ToOwned::to_owned);
2059 continue;
2060 }
2061 if trimmed.starts_with('[') {
2062 current_remote = None;
2063 continue;
2064 }
2065 let Some(remote_name) = current_remote.as_deref() else {
2066 continue;
2067 };
2068 let Some((key, value)) = trimmed.split_once('=') else {
2069 continue;
2070 };
2071 if key.trim() != "url" {
2072 continue;
2073 }
2074 let url = value.trim().to_string();
2075 if first_remote_url.is_none() {
2076 first_remote_url = Some(url.clone());
2077 }
2078 if remote_name == "origin" {
2079 origin_remote_url = Some(url);
2080 }
2081 }
2082
2083 origin_remote_url.or(first_remote_url)
2084}
2085
2086fn read_git_head_branch(git_dir: &Path) -> Option<String> {
2087 let text = std::fs::read_to_string(git_dir.join("HEAD")).ok()?;
2088 let head = text.trim();
2089 head.strip_prefix("ref: refs/heads/").map(ToOwned::to_owned)
2090}
2091
2092fn normalize_git_remote(value: &str) -> Option<String> {
2093 let trimmed = value.trim().trim_end_matches('/');
2094 if trimmed.is_empty() {
2095 return None;
2096 }
2097
2098 let host_and_path = if let Some(rest) = trimmed.strip_prefix("git@") {
2099 let (host, path) = rest.split_once(':')?;
2100 format!("{host}/{path}")
2101 } else if let Some((_, rest)) = trimmed.split_once("://") {
2102 let rest = rest.trim_start_matches('/');
2103 let (authority, path) = rest.split_once('/')?;
2104 let host = authority.rsplit('@').next().unwrap_or(authority);
2105 format!("{host}/{path}")
2106 } else {
2107 trimmed.to_string()
2108 };
2109
2110 let mut parts: Vec<String> = host_and_path
2111 .split('/')
2112 .map(str::trim)
2113 .filter(|part| !part.is_empty())
2114 .map(|part| part.to_ascii_lowercase())
2115 .collect();
2116 if parts.len() < 2 {
2117 return None;
2118 }
2119 if let Some(last) = parts.last_mut() {
2120 if let Some(stripped) = last.strip_suffix(".git") {
2121 *last = stripped.to_string();
2122 }
2123 }
2124 Some(parts.join("/"))
2125}
2126
2127fn repo_label_from_normalized_remote(remote: &str) -> String {
2128 let parts: Vec<&str> = remote.split('/').filter(|part| !part.is_empty()).collect();
2129 if parts.len() >= 3 {
2130 format!("{}/{}", parts[parts.len() - 2], parts[parts.len() - 1])
2131 } else {
2132 remote.to_string()
2133 }
2134}
2135
2136fn project_label_from_path(path: &Path) -> Option<String> {
2137 path.file_name()
2138 .and_then(|name| name.to_str())
2139 .filter(|name| !name.is_empty())
2140 .map(ToOwned::to_owned)
2141 .or_else(|| {
2142 let display = display_path(path);
2143 (!display.is_empty()).then_some(display)
2144 })
2145}
2146
2147fn codex_headless_usage_value(value: &Value) -> Option<&Value> {
2148 [
2149 value.get("usage"),
2150 value.pointer("/data/usage"),
2151 value.pointer("/result/usage"),
2152 value.pointer("/response/usage"),
2153 value.get("token_count"),
2154 value.pointer("/event_msg/token_count"),
2155 ]
2156 .into_iter()
2157 .flatten()
2158 .next()
2159}
2160
2161fn codex_auth_snapshot(root: &Path) -> Option<VerifiedSourceState> {
2162 let auth_path = root.join("auth.json");
2163 let value = std::fs::read_to_string(&auth_path).ok()?;
2164 let value: Value = serde_json::from_str(&value).ok()?;
2165 let payload = string_at_any(
2166 &value,
2167 &["id_token", "idToken", "/tokens/id_token", "/tokens/idToken"],
2168 )
2169 .and_then(|token| jwt_payload_value(&token));
2170 let auth = payload
2171 .as_ref()
2172 .and_then(|payload| payload.pointer("/https:~1~1api.openai.com~1auth"))
2173 .or_else(|| value.pointer("/https:~1~1api.openai.com~1auth"));
2174
2175 let provider_user_id = auth
2176 .and_then(|auth| string_at_any(auth, &["chatgpt_account_id", "chatgpt_user_id", "user_id"]))
2177 .or_else(|| {
2178 string_at_any(
2179 &value,
2180 &[
2181 "account_id",
2182 "accountId",
2183 "chatgpt_account_id",
2184 "chatgpt_user_id",
2185 "/tokens/account_id",
2186 "/tokens/accountId",
2187 ],
2188 )
2189 });
2190 let email = payload
2191 .as_ref()
2192 .and_then(|payload| {
2193 string_at_any(
2194 payload,
2195 &["email", "/https:~1~1api.openai.com~1profile~1email"],
2196 )
2197 })
2198 .or_else(|| string_at_any(&value, &["email", "user_email"]))
2199 .map(|email| email.to_ascii_lowercase());
2200 if provider_user_id.is_none() && email.is_none() {
2201 return None;
2202 }
2203
2204 let plan_type = auth.and_then(|auth| string_at_any(auth, &["chatgpt_plan_type"]));
2205 let plan_name = plan_type.as_deref().map(display_codex_plan_name);
2206 let authenticated_at = payload
2207 .as_ref()
2208 .and_then(|payload| timestamp_at_any(payload, &["auth_time", "iat"]))
2209 .or_else(|| file_modified_at(&auth_path));
2210 let verified_at = auth
2211 .and_then(|auth| timestamp_at_any(auth, &["chatgpt_subscription_last_checked"]))
2212 .or(authenticated_at);
2213 let paid_at =
2214 auth.and_then(|auth| timestamp_at_any(auth, &["chatgpt_subscription_active_start"]));
2215 let current_period_ends_at =
2216 auth.and_then(|auth| timestamp_at_any(auth, &["chatgpt_subscription_active_until"]));
2217 let subscription = plan_type.as_deref().and_then(|plan_type| {
2218 codex_verified_subscription(plan_type, paid_at, current_period_ends_at, verified_at)
2219 });
2220
2221 Some(VerifiedSourceState {
2222 provider_user_id,
2223 email,
2224 account_label: None,
2225 plan_name,
2226 authenticated_at,
2227 verified_at,
2228 subscription,
2229 })
2230}
2231
2232fn file_modified_at(path: &Path) -> Option<DateTime<Utc>> {
2233 let modified = std::fs::metadata(path).ok()?.modified().ok()?;
2234 Some(DateTime::<Utc>::from(modified))
2235}
2236
2237fn string_at_any(value: &Value, keys: &[&str]) -> Option<String> {
2238 keys.iter()
2239 .filter_map(|key| {
2240 if key.starts_with('/') {
2241 value.pointer(key)
2242 } else {
2243 value.get(*key)
2244 }
2245 })
2246 .find_map(Value::as_str)
2247 .map(str::trim)
2248 .filter(|text| !text.is_empty())
2249 .map(ToOwned::to_owned)
2250}
2251
2252fn timestamp_at_any(value: &Value, keys: &[&str]) -> Option<DateTime<Utc>> {
2253 keys.iter()
2254 .filter_map(|key| {
2255 if key.starts_with('/') {
2256 value.pointer(key)
2257 } else {
2258 value.get(*key)
2259 }
2260 })
2261 .find_map(parse_timestamp_value)
2262}
2263
2264fn parse_timestamp_value(value: &Value) -> Option<DateTime<Utc>> {
2265 match value {
2266 Value::String(text) => DateTime::parse_from_rfc3339(text)
2267 .ok()
2268 .map(|parsed| parsed.with_timezone(&Utc)),
2269 Value::Number(number) => number
2270 .as_i64()
2271 .and_then(|seconds| Utc.timestamp_opt(seconds, 0).single()),
2272 _ => None,
2273 }
2274}
2275
2276fn display_codex_plan_name(plan_type: &str) -> String {
2277 match plan_type.trim().to_ascii_lowercase().as_str() {
2278 "plus" => "Plus".to_string(),
2279 "pro" => "Pro".to_string(),
2280 "free" => "Free".to_string(),
2281 other => other
2282 .split(['_', '-', ' '])
2283 .filter(|part| !part.is_empty())
2284 .map(|part| {
2285 let mut chars = part.chars();
2286 let Some(first) = chars.next() else {
2287 return String::new();
2288 };
2289 format!(
2290 "{}{}",
2291 first.to_ascii_uppercase(),
2292 chars.as_str().to_ascii_lowercase()
2293 )
2294 })
2295 .collect::<Vec<_>>()
2296 .join(" "),
2297 }
2298}
2299
2300fn codex_verified_subscription(
2301 plan_type: &str,
2302 paid_at: Option<DateTime<Utc>>,
2303 current_period_ends_at: Option<DateTime<Utc>>,
2304 verified_at: Option<DateTime<Utc>>,
2305) -> Option<VerifiedSubscriptionState> {
2306 let started_at = paid_at?;
2307 let (plan_name, price) = match plan_type.trim().to_ascii_lowercase().as_str() {
2308 "plus" => ("Plus".to_string(), 2000),
2309 "pro" => ("Pro".to_string(), 20000),
2310 _ => return None,
2311 };
2312 Some(VerifiedSubscriptionState {
2313 plan_name,
2314 price,
2315 currency: "USD".to_string(),
2316 billing_period: BillingPeriod::Monthly,
2317 paid_at,
2318 started_at,
2319 ended_at: None,
2320 current_period_ends_at,
2321 status: SubscriptionStatus::Active,
2322 verified_at,
2323 })
2324}
2325
2326fn jwt_payload_value(token: &str) -> Option<Value> {
2327 let payload = token.split('.').nth(1)?;
2328 let bytes = decode_base64_url(payload)?;
2329 serde_json::from_slice(&bytes).ok()
2330}
2331
2332fn decode_base64_url(value: &str) -> Option<Vec<u8>> {
2333 let mut bits = 0u32;
2334 let mut bit_count = 0u8;
2335 let mut out = Vec::new();
2336 for byte in value.bytes() {
2337 if byte == b'=' {
2338 break;
2339 }
2340 let six = match byte {
2341 b'A'..=b'Z' => byte - b'A',
2342 b'a'..=b'z' => byte - b'a' + 26,
2343 b'0'..=b'9' => byte - b'0' + 52,
2344 b'+' | b'-' => 62,
2345 b'/' | b'_' => 63,
2346 _ => return None,
2347 } as u32;
2348 bits = (bits << 6) | six;
2349 bit_count += 6;
2350 if bit_count >= 8 {
2351 bit_count -= 8;
2352 out.push(((bits >> bit_count) & 0xff) as u8);
2353 }
2354 }
2355 Some(out)
2356}
2357
2358fn fallback_session_id(path: &Path) -> String {
2359 path.file_stem()
2360 .and_then(|stem| stem.to_str())
2361 .unwrap_or("unknown")
2362 .to_string()
2363}
2364
2365fn session_raw_from_value(value: &Value) -> Option<String> {
2366 [
2367 value.get("session_id"),
2368 value.get("sessionId"),
2369 value.pointer("/message/sessionId"),
2370 value.pointer("/message/session_id"),
2371 value.pointer("/data/session_id"),
2372 value.pointer("/result/session_id"),
2373 value.pointer("/response/session_id"),
2374 ]
2375 .into_iter()
2376 .flatten()
2377 .find_map(Value::as_str)
2378 .map(ToOwned::to_owned)
2379}
2380
2381fn codex_session_id(usage_root: &Path, path: &Path) -> String {
2382 path.strip_prefix(usage_root)
2383 .unwrap_or(path)
2384 .with_extension("")
2385 .components()
2386 .filter_map(|component| component.as_os_str().to_str())
2387 .collect::<Vec<_>>()
2388 .join("/")
2389}
2390
2391fn project_key_from_path(root: &Path, path: &Path) -> Option<String> {
2392 let relative = path.strip_prefix(root).ok()?;
2393 relative
2394 .components()
2395 .next()
2396 .and_then(|component| component.as_os_str().to_str())
2397 .filter(|part| !part.is_empty())
2398 .map(ToOwned::to_owned)
2399}
2400
2401fn metadata_only_privacy() -> PrivacyInfo {
2402 PrivacyInfo {
2403 mode: PrivacyMode::MetadataOnly,
2404 contains_prompt_text: false,
2405 contains_response_text: false,
2406 contains_file_paths: false,
2407 }
2408}
2409
2410#[cfg(test)]
2411mod tests {
2412 use super::*;
2413 use std::io::Write;
2414
2415 fn options() -> ScanOptions {
2416 ScanOptions {
2417 device_id: "device".to_string(),
2418 selected_cache_keys: None,
2419 }
2420 }
2421
2422 fn write_git_fixture(repo_root: &Path, remote: &str, branch: &str) {
2423 let git_dir = repo_root.join(".git");
2424 std::fs::create_dir_all(&git_dir).expect("git dir");
2425 std::fs::write(
2426 git_dir.join("config"),
2427 format!(
2428 "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = {remote}\n"
2429 ),
2430 )
2431 .expect("git config");
2432 std::fs::write(git_dir.join("HEAD"), format!("ref: refs/heads/{branch}\n"))
2433 .expect("git head");
2434 }
2435
2436 #[test]
2437 fn codex_discovers_one_logical_source_per_home() {
2438 let adapter = CodexAdapter;
2439 let source = codex_source_for_root(
2440 &adapter,
2441 Path::new("/tmp/codex-home"),
2442 LocationOrigin::Configured,
2443 );
2444
2445 assert_eq!(source.provider, CODEX_PROVIDER);
2446 assert_eq!(source.path_label.as_deref(), Some("/tmp/codex-home"));
2447 }
2448
2449 #[test]
2450 fn claude_normalizes_projects_path_to_config_root() {
2451 let adapter = ClaudeCodeAdapter;
2452 let source = claude_source_for_root(
2453 &adapter,
2454 Path::new("/tmp/claude-home/projects"),
2455 LocationOrigin::Configured,
2456 );
2457
2458 assert_eq!(source.provider, CLAUDE_CODE_PROVIDER);
2459 assert_eq!(source.path_label.as_deref(), Some("/tmp/claude-home"));
2460 }
2461
2462 #[test]
2463 fn git_remote_normalization_merges_ssh_and_https() {
2464 assert_eq!(
2465 normalize_git_remote("git@github.com:Owner/Repo.git"),
2466 normalize_git_remote("https://github.com/Owner/Repo.git")
2467 );
2468 assert_eq!(
2469 normalize_git_remote("ssh://git@github.com/Owner/Repo.git"),
2470 Some("github.com/owner/repo".to_string())
2471 );
2472 }
2473
2474 #[test]
2475 fn project_context_requires_path_or_repo_identity() {
2476 let project = ProjectContext {
2477 project_label: Some("scratch".to_string()),
2478 ..ProjectContext::default()
2479 }
2480 .into_project_info();
2481
2482 assert_eq!(project, None);
2483 }
2484
2485 #[test]
2486 fn claude_extracts_project_path_and_git_metadata_from_sessions_index() {
2487 let dir = tempfile::tempdir().expect("tempdir");
2488 let root = dir.path();
2489 let projects = root.join("projects");
2490 let project_store = projects.join("example-workspace");
2491 let workspace = root.join("workspace").join("ExampleWorkspace");
2492 std::fs::create_dir_all(&project_store).expect("project store");
2493 std::fs::create_dir_all(&workspace).expect("workspace");
2494 write_git_fixture(
2495 &workspace,
2496 "https://github.com/example-org/example-workspace.git",
2497 "main",
2498 );
2499
2500 let session_path = project_store.join("session.jsonl");
2501 std::fs::write(
2502 &session_path,
2503 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"message\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}}\n",
2504 )
2505 .expect("session");
2506 std::fs::write(
2507 project_store.join("sessions-index.json"),
2508 format!(
2509 "{{\"version\":1,\"entries\":[{{\"sessionId\":\"abc\",\"fullPath\":\"{}\",\"gitBranch\":\"main\",\"projectPath\":\"{}\"}}]}}",
2510 session_path.display(),
2511 workspace.display()
2512 ),
2513 )
2514 .expect("session index");
2515
2516 let source = SourceLocation::local_adapter(
2517 CLAUDE_CODE_PROVIDER,
2518 "test",
2519 "0",
2520 root,
2521 LocationOrigin::Configured,
2522 );
2523 let scan = scan_claude_source(&ClaudeCodeAdapter, &source, &options()).expect("scan");
2524
2525 assert_eq!(scan.events.len(), 1);
2526 let project = scan.events[0].project.as_ref().expect("project");
2527 assert_eq!(
2528 project.path_label.as_deref(),
2529 Some(workspace.to_string_lossy().as_ref())
2530 );
2531 assert_eq!(project.project_label.as_deref(), Some("ExampleWorkspace"));
2532 assert_eq!(
2533 project.repo_label.as_deref(),
2534 Some("example-org/example-workspace")
2535 );
2536 assert_eq!(project.branch_label.as_deref(), Some("main"));
2537 }
2538
2539 #[test]
2540 fn claude_subagent_transcripts_inherit_project_path_from_sessions_index() {
2541 let dir = tempfile::tempdir().expect("tempdir");
2542 let root = dir.path();
2543 let projects = root.join("projects");
2544 let project_store = projects.join("example-workspace");
2545 let workspace = root.join("workspace").join("ExampleWorkspace");
2546 std::fs::create_dir_all(&project_store).expect("project store");
2547 std::fs::create_dir_all(&workspace).expect("workspace");
2548 write_git_fixture(
2549 &workspace,
2550 "https://github.com/example-org/example-workspace.git",
2551 "feature/example-subagent-fix",
2552 );
2553
2554 let session_file = project_store.join("session-123.jsonl");
2555 let subagent_dir = project_store.join("session-123").join("subagents");
2556 std::fs::create_dir_all(&subagent_dir).expect("subagent dir");
2557 let subagent_file = subagent_dir.join("agent-a.jsonl");
2558 std::fs::write(
2559 &subagent_file,
2560 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"message\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}}\n",
2561 )
2562 .expect("subagent session");
2563 std::fs::write(
2564 project_store.join("sessions-index.json"),
2565 format!(
2566 "{{\"version\":1,\"entries\":[{{\"sessionId\":\"session-123\",\"fullPath\":\"{}\",\"gitBranch\":\"feature/example-subagent-fix\",\"projectPath\":\"{}\"}}]}}",
2567 session_file.display(),
2568 workspace.display()
2569 ),
2570 )
2571 .expect("session index");
2572
2573 let source = SourceLocation::local_adapter(
2574 CLAUDE_CODE_PROVIDER,
2575 "test",
2576 "0",
2577 root,
2578 LocationOrigin::Configured,
2579 );
2580 let scan = scan_claude_source(&ClaudeCodeAdapter, &source, &options()).expect("scan");
2581
2582 assert_eq!(scan.events.len(), 1);
2583 let project = scan.events[0].project.as_ref().expect("project");
2584 assert_eq!(
2585 project.path_label.as_deref(),
2586 Some(workspace.to_string_lossy().as_ref())
2587 );
2588 assert_eq!(project.project_label.as_deref(), Some("ExampleWorkspace"));
2589 assert_eq!(
2590 project.repo_label.as_deref(),
2591 Some("example-org/example-workspace")
2592 );
2593 assert_eq!(
2594 project.branch_label.as_deref(),
2595 Some("feature/example-subagent-fix")
2596 );
2597 }
2598
2599 #[test]
2600 fn claude_project_store_root_falls_back_to_original_path_when_session_index_misses() {
2601 let dir = tempfile::tempdir().expect("tempdir");
2602 let root = dir.path();
2603 let projects = root.join("projects");
2604 let project_store = projects.join("-home-example-src-ExampleWorkspace");
2605 let workspace = root.join("workspace").join("ExampleWorkspace");
2606 std::fs::create_dir_all(&project_store).expect("project store");
2607 std::fs::create_dir_all(&workspace).expect("workspace");
2608 write_git_fixture(
2609 &workspace,
2610 "https://github.com/example-org/example-workspace.git",
2611 "main",
2612 );
2613
2614 let subagent_dir = project_store.join("unindexed-session").join("subagents");
2615 std::fs::create_dir_all(&subagent_dir).expect("subagent dir");
2616 let subagent_file = subagent_dir.join("agent-a.jsonl");
2617 std::fs::write(
2618 &subagent_file,
2619 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"message\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}}\n",
2620 )
2621 .expect("subagent session");
2622 std::fs::write(
2623 project_store.join("sessions-index.json"),
2624 format!(
2625 "{{\"version\":1,\"originalPath\":\"{}\",\"entries\":[{{\"sessionId\":\"indexed-session\",\"fullPath\":\"{}\",\"gitBranch\":\"main\",\"projectPath\":\"{}\"}}]}}",
2626 workspace.display(),
2627 project_store.join("indexed-session.jsonl").display(),
2628 workspace.display()
2629 ),
2630 )
2631 .expect("session index");
2632
2633 let source = SourceLocation::local_adapter(
2634 CLAUDE_CODE_PROVIDER,
2635 "test",
2636 "0",
2637 root,
2638 LocationOrigin::Configured,
2639 );
2640 let scan = scan_claude_source(&ClaudeCodeAdapter, &source, &options()).expect("scan");
2641
2642 assert_eq!(scan.events.len(), 1);
2643 let project = scan.events[0].project.as_ref().expect("project");
2644 assert_eq!(
2645 project.path_label.as_deref(),
2646 Some(workspace.to_string_lossy().as_ref())
2647 );
2648 assert_eq!(project.project_label.as_deref(), Some("ExampleWorkspace"));
2649 assert_eq!(
2650 project.repo_label.as_deref(),
2651 Some("example-org/example-workspace")
2652 );
2653 }
2654
2655 #[test]
2656 fn codex_extracts_cwd_and_git_metadata_from_session_meta() {
2657 let dir = tempfile::tempdir().expect("tempdir");
2658 let codex_root = dir.path().join("codex");
2659 let sessions = codex_root.join("sessions");
2660 let workspace = dir.path().join("workspace").join("ai-stats");
2661 std::fs::create_dir_all(&sessions).expect("sessions");
2662 std::fs::create_dir_all(&workspace).expect("workspace");
2663 write_git_fixture(&workspace, "git@github.com:StarkDmi/StatsAI.git", "main");
2664
2665 let session_path = sessions.join("session.jsonl");
2666 let mut file = File::create(&session_path).expect("session file");
2667 writeln!(
2668 file,
2669 r#"{{"timestamp":"2026-06-01T08:00:00Z","type":"session_meta","payload":{{"cwd":"{}","git":{{"repository_url":"git@github.com:StarkDmi/StatsAI.git","branch":"main"}}}}}}"#,
2670 workspace.display()
2671 )
2672 .expect("write session meta");
2673 writeln!(
2674 file,
2675 r#"{{"timestamp":"2026-06-01T08:01:00Z","usage":{{"input_tokens":10,"output_tokens":5}},"model":"gpt-5"}}"#
2676 )
2677 .expect("write usage");
2678
2679 let source = SourceLocation::local_adapter(
2680 CODEX_PROVIDER,
2681 "test",
2682 "0",
2683 &codex_root,
2684 LocationOrigin::Configured,
2685 );
2686 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
2687
2688 assert_eq!(scan.events.len(), 1);
2689 let project = scan.events[0].project.as_ref().expect("project");
2690 assert_eq!(
2691 project.path_label.as_deref(),
2692 Some(workspace.to_string_lossy().as_ref())
2693 );
2694 assert_eq!(project.project_label.as_deref(), Some("ai-stats"));
2695 assert_eq!(project.repo_label.as_deref(), Some("starkdmi/statsai"));
2696 assert_eq!(project.branch_label.as_deref(), Some("main"));
2697 }
2698
2699 #[test]
2700 fn claude_source_scans_projects_child_when_config_root_is_given() {
2701 let dir = tempfile::tempdir().expect("tempdir");
2702 let projects = dir.path().join("projects");
2703 let transcripts = dir.path().join("transcripts");
2704 std::fs::create_dir_all(&projects).expect("projects");
2705 std::fs::create_dir_all(&transcripts).expect("transcripts");
2706
2707 let mut project_file = File::create(projects.join("session.jsonl")).expect("project file");
2708 writeln!(
2709 project_file,
2710 "{{\"timestamp\":\"2026-05-01T00:00:00Z\",\"message\":{{\"usage\":{{\"input_tokens\":1,\"output_tokens\":2}}}}}}"
2711 )
2712 .expect("write project");
2713 let mut transcript_file =
2714 File::create(transcripts.join("transcript.jsonl")).expect("transcript file");
2715 writeln!(
2716 transcript_file,
2717 "{{\"message\":{{\"usage\":{{\"input_tokens\":3,\"output_tokens\":4}}}}}}"
2718 )
2719 .expect("write transcript");
2720
2721 let source = SourceLocation::local_adapter(
2722 CLAUDE_CODE_PROVIDER,
2723 "test",
2724 "0",
2725 dir.path(),
2726 LocationOrigin::Configured,
2727 );
2728
2729 let scan = scan_claude_source(&ClaudeCodeAdapter, &source, &options()).expect("scan");
2730 assert_eq!(scan.events.len(), 1);
2731 assert_eq!(scan.diagnostics.raw_rows, 1);
2732 assert_eq!(scan.events[0].usage.computed_total(), 3);
2733 }
2734
2735 #[test]
2736 fn claude_stats_cache_is_parsed_as_summary_not_events() {
2737 let dir = tempfile::tempdir().expect("tempdir");
2738 std::fs::create_dir_all(dir.path().join("projects")).expect("projects");
2739 let mut file = File::create(dir.path().join("stats-cache.json")).expect("stats cache");
2740 writeln!(
2741 file,
2742 r#"{{
2743 "version": 2,
2744 "lastComputedDate": "2026-05-13",
2745 "firstSessionDate": "2026-01-21T17:21:43.119Z",
2746 "totalSessions": 61,
2747 "totalMessages": 15679,
2748 "modelUsage": {{
2749 "claude-opus-4-5-thinking": {{
2750 "inputTokens": 113622256,
2751 "outputTokens": 387,
2752 "cacheReadInputTokens": 282480618,
2753 "cacheCreationInputTokens": 10,
2754 "costUSD": 12.5
2755 }},
2756 "google/antigravity-empty": {{
2757 "inputTokens": 0,
2758 "outputTokens": 0,
2759 "cacheReadInputTokens": 0,
2760 "cacheCreationInputTokens": 0
2761 }}
2762 }}
2763 }}"#
2764 )
2765 .expect("write");
2766 let source = SourceLocation::local_adapter(
2767 CLAUDE_CODE_PROVIDER,
2768 "test",
2769 "0",
2770 dir.path(),
2771 LocationOrigin::Configured,
2772 );
2773
2774 let scan = scan_claude_source(&ClaudeCodeAdapter, &source, &options()).expect("scan");
2775
2776 assert!(scan.events.is_empty());
2777 assert_eq!(scan.summaries.len(), 1);
2778 assert_eq!(scan.diagnostics.skipped_zero_events, 1);
2779 assert_eq!(
2780 scan.summaries[0]
2781 .model
2782 .as_ref()
2783 .and_then(|model| model.name.as_deref()),
2784 Some("claude-opus-4-5-thinking")
2785 );
2786 assert_eq!(scan.summaries[0].usage.input_tokens, Some(113622256));
2787 assert_eq!(scan.summaries[0].usage.cache_read_tokens, Some(282480618));
2788 assert_eq!(scan.summaries[0].usage.cache_creation_tokens, Some(10));
2789 assert_eq!(scan.summaries[0].usage.output_tokens, Some(387));
2790 assert_eq!(scan.summaries[0].cost.provider_reported_usd, Some(1250));
2791 assert_eq!(scan.summaries[0].metadata.total_sessions, Some(61));
2792 assert_eq!(scan.summaries[0].metadata.total_messages, Some(15679));
2793 }
2794
2795 #[test]
2796 fn claude_scan_respects_selected_cache_keys() {
2797 let dir = tempfile::tempdir().expect("tempdir");
2798 let projects = dir.path().join("projects");
2799 std::fs::create_dir_all(&projects).expect("projects");
2800
2801 let first = projects.join("a.jsonl");
2802 let second = projects.join("b.jsonl");
2803 std::fs::write(
2804 &first,
2805 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"message\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}}\n",
2806 )
2807 .expect("first");
2808 std::fs::write(
2809 &second,
2810 "{\"timestamp\":\"2026-05-01T00:01:00Z\",\"message\":{\"usage\":{\"input_tokens\":3,\"output_tokens\":4}}}\n",
2811 )
2812 .expect("second");
2813
2814 let source = SourceLocation::local_adapter(
2815 CLAUDE_CODE_PROVIDER,
2816 "test",
2817 "0",
2818 dir.path(),
2819 LocationOrigin::Configured,
2820 );
2821 let selected = [canonical_display(&first)].into_iter().collect();
2822 let scan = scan_claude_source(
2823 &ClaudeCodeAdapter,
2824 &source,
2825 &ScanOptions {
2826 device_id: "device".to_string(),
2827 selected_cache_keys: Some(selected),
2828 },
2829 )
2830 .expect("scan");
2831
2832 assert_eq!(scan.events.len(), 1);
2833 assert_eq!(scan.diagnostics.files_scanned, 1);
2834 assert_eq!(scan.diagnostics.files_skipped_unchanged, 1);
2835 assert_eq!(scan.events[0].usage.computed_total(), 3);
2836 }
2837
2838 #[test]
2839 fn codex_source_scans_sessions_and_archived_sessions() {
2840 let dir = tempfile::tempdir().expect("tempdir");
2841 let sessions = dir.path().join("sessions");
2842 let archived = dir.path().join("archived_sessions");
2843 std::fs::create_dir_all(&sessions).expect("sessions");
2844 std::fs::create_dir_all(&archived).expect("archived");
2845
2846 let mut active_file = File::create(sessions.join("active.jsonl")).expect("active fixture");
2847 writeln!(
2848 active_file,
2849 "{{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{{\"input_tokens\":1,\"output_tokens\":2}}}}"
2850 )
2851 .expect("write active");
2852 let mut archived_file = File::create(archived.join("old.jsonl")).expect("archived fixture");
2853 writeln!(
2854 archived_file,
2855 "{{\"timestamp\":\"2026-05-02T00:00:00Z\",\"usage\":{{\"input_tokens\":3,\"output_tokens\":4}}}}"
2856 )
2857 .expect("write archived");
2858
2859 let source = SourceLocation::local_adapter(
2860 CODEX_PROVIDER,
2861 "test",
2862 "0",
2863 dir.path(),
2864 LocationOrigin::Configured,
2865 );
2866
2867 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
2868 assert_eq!(scan.events.len(), 2);
2869 assert_eq!(scan.diagnostics.raw_rows, 2);
2870 }
2871
2872 #[test]
2873 fn codex_scan_respects_selected_cache_keys() {
2874 let dir = tempfile::tempdir().expect("tempdir");
2875 let sessions = dir.path().join("sessions");
2876 std::fs::create_dir_all(&sessions).expect("sessions");
2877 let first = sessions.join("a.jsonl");
2878 let second = sessions.join("b.jsonl");
2879 std::fs::write(
2880 &first,
2881 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
2882 )
2883 .expect("first");
2884 std::fs::write(
2885 &second,
2886 "{\"timestamp\":\"2026-05-01T00:01:00Z\",\"usage\":{\"input_tokens\":3,\"output_tokens\":4}}\n",
2887 )
2888 .expect("second");
2889 let source = SourceLocation::local_adapter(
2890 CODEX_PROVIDER,
2891 "test",
2892 "0",
2893 dir.path(),
2894 LocationOrigin::Configured,
2895 );
2896
2897 let selected = [canonical_display(&second)].into_iter().collect();
2898 let scan = scan_codex_source(
2899 &CodexAdapter,
2900 &source,
2901 &ScanOptions {
2902 device_id: "device".to_string(),
2903 selected_cache_keys: Some(selected),
2904 },
2905 )
2906 .expect("scan");
2907
2908 assert_eq!(scan.events.len(), 1);
2909 assert_eq!(scan.diagnostics.files_scanned, 1);
2910 assert_eq!(scan.diagnostics.files_skipped_unchanged, 1);
2911 assert_eq!(scan.events[0].usage.computed_total(), 7);
2912 }
2913
2914 #[test]
2915 fn codex_scan_candidates_change_when_auth_json_changes() {
2916 let dir = tempfile::tempdir().expect("tempdir");
2917 let sessions = dir.path().join("sessions");
2918 std::fs::create_dir_all(&sessions).expect("sessions");
2919 let session_path = sessions.join("session.jsonl");
2920 std::fs::write(
2921 &session_path,
2922 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
2923 )
2924 .expect("session");
2925 std::fs::write(
2926 dir.path().join("auth.json"),
2927 "{\"chatgpt_account_id\":\"acct-one\"}\n",
2928 )
2929 .expect("auth one");
2930
2931 let source = SourceLocation::local_adapter(
2932 CODEX_PROVIDER,
2933 "test",
2934 "0",
2935 dir.path(),
2936 LocationOrigin::Configured,
2937 );
2938
2939 let first = codex_scan_candidates(&source, "test-adapter").expect("first candidates");
2940 std::thread::sleep(std::time::Duration::from_millis(5));
2941 std::fs::write(
2942 dir.path().join("auth.json"),
2943 "{\"chatgpt_account_id\":\"acct-two\"}\n",
2944 )
2945 .expect("auth two");
2946 let second = codex_scan_candidates(&source, "test-adapter").expect("second candidates");
2947
2948 assert_eq!(first.len(), 1);
2949 assert_eq!(second.len(), 1);
2950 assert_eq!(first[0].cache_key, canonical_display(&session_path));
2951 assert_ne!(first[0].cache_signature, second[0].cache_signature);
2952 }
2953
2954 #[test]
2955 fn codex_scan_candidates_are_stable_for_same_source() {
2956 let dir = tempfile::tempdir().expect("tempdir");
2957 let sessions = dir.path().join("sessions");
2958 std::fs::create_dir_all(&sessions).expect("sessions");
2959 let session_path = sessions.join("session.jsonl");
2960 std::fs::write(
2961 &session_path,
2962 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
2963 )
2964 .expect("session");
2965
2966 let hinted = SourceLocation::local_adapter(
2967 CODEX_PROVIDER,
2968 "test",
2969 "0",
2970 dir.path(),
2971 LocationOrigin::Configured,
2972 );
2973 let remapped = SourceLocation::local_adapter(
2974 CODEX_PROVIDER,
2975 "test",
2976 "0",
2977 dir.path(),
2978 LocationOrigin::Configured,
2979 );
2980
2981 let first = codex_scan_candidates(&hinted, "test-adapter").expect("first candidates");
2982 let second = codex_scan_candidates(&remapped, "test-adapter").expect("second candidates");
2983
2984 assert_eq!(first.len(), 1);
2985 assert_eq!(second.len(), 1);
2986 assert_eq!(first[0].cache_key, canonical_display(&session_path));
2987 assert_eq!(first[0].cache_signature, second[0].cache_signature);
2988 }
2989
2990 #[test]
2991 fn codex_scan_candidates_invalidate_legacy_cache_namespace() {
2992 let dir = tempfile::tempdir().expect("tempdir");
2993 let sessions = dir.path().join("sessions");
2994 std::fs::create_dir_all(&sessions).expect("sessions");
2995 let session_path = sessions.join("session.jsonl");
2996 std::fs::write(
2997 &session_path,
2998 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
2999 )
3000 .expect("session");
3001
3002 let source = SourceLocation::local_adapter(
3003 CODEX_PROVIDER,
3004 "test",
3005 "0",
3006 dir.path(),
3007 LocationOrigin::Configured,
3008 );
3009
3010 let legacy_namespace = {
3011 let adapter_id = source.adapter_id.as_deref().unwrap_or("");
3012 let path_hash = source.path_hash.as_deref().unwrap_or("");
3013 hash_text(&format!(
3014 "{SCAN_CACHE_SIGNATURE_VERSION}:{}:{:?}:{adapter_id}:{}:{path_hash}",
3015 source.provider, source.source_kind, "test-adapter"
3016 ))
3017 };
3018 let legacy_candidate = scan_candidate(session_path.clone(), None, &legacy_namespace);
3019 let current = codex_scan_candidates(&source, "test-adapter").expect("current candidates");
3020
3021 assert_eq!(current.len(), 1);
3022 assert_eq!(current[0].cache_key, canonical_display(&session_path));
3023 assert_ne!(legacy_candidate.cache_signature, current[0].cache_signature);
3024 }
3025
3026 #[test]
3027 fn codex_source_path_pointing_at_sessions_uses_parent_auth_root() {
3028 let dir = tempfile::tempdir().expect("tempdir");
3029 let root = dir.path().join(".codex");
3030 let sessions = root.join("sessions");
3031 std::fs::create_dir_all(&sessions).expect("sessions");
3032 std::fs::write(
3033 root.join("auth.json"),
3034 "{\"chatgpt_account_id\":\"acct-real\"}\n",
3035 )
3036 .expect("auth");
3037 std::fs::write(
3038 sessions.join("session.jsonl"),
3039 "{\"timestamp\":\"2026-05-01T00:01:00Z\",\"usage\":{\"input_tokens\":3,\"output_tokens\":4}}\n",
3040 )
3041 .expect("session");
3042
3043 let source = SourceLocation::local_adapter(
3044 CODEX_PROVIDER,
3045 "test",
3046 "0",
3047 &sessions,
3048 LocationOrigin::Configured,
3049 );
3050
3051 let candidates = codex_scan_candidates(&source, "test-adapter").expect("candidates");
3052 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3053
3054 assert_eq!(candidates.len(), 1);
3055 assert_eq!(
3056 candidates[0].cache_key,
3057 canonical_display(&sessions.join("session.jsonl"))
3058 );
3059 assert_eq!(scan.events.len(), 1);
3060 assert_eq!(
3061 scan.verified_source_state
3062 .as_ref()
3063 .and_then(|state| state.provider_user_id.as_deref()),
3064 Some("acct-real")
3065 );
3066 }
3067
3068 #[test]
3069 fn codex_root_without_usage_directories_has_no_candidates() {
3070 let dir = tempfile::tempdir().expect("tempdir");
3071 let root = dir.path().join("not-a-codex-home");
3072 std::fs::create_dir_all(&root).expect("root");
3073 std::fs::write(
3074 root.join("history.jsonl"),
3075 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
3076 )
3077 .expect("history");
3078
3079 let source = SourceLocation::local_adapter(
3080 CODEX_PROVIDER,
3081 "test",
3082 "0",
3083 &root,
3084 LocationOrigin::Configured,
3085 );
3086
3087 let candidates = codex_scan_candidates(&source, "test-adapter").expect("candidates");
3088 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3089
3090 assert!(candidates.is_empty());
3091 assert!(scan.events.is_empty());
3092 }
3093
3094 #[test]
3095 fn claude_scan_candidates_change_when_sessions_index_changes() {
3096 let dir = tempfile::tempdir().expect("tempdir");
3097 let projects = dir.path().join("projects");
3098 let project_store = projects.join("example-workspace");
3099 std::fs::create_dir_all(&project_store).expect("project store");
3100 let session_path = project_store.join("session.jsonl");
3101 std::fs::write(
3102 &session_path,
3103 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
3104 )
3105 .expect("session");
3106 let sessions_index = project_store.join("sessions-index.json");
3107 std::fs::write(
3108 &sessions_index,
3109 format!(
3110 "{{\"version\":1,\"entries\":[{{\"sessionId\":\"session-1\",\"fullPath\":\"{}\",\"projectPath\":\"/tmp/workspace-a\"}}]}}",
3111 session_path.display()
3112 ),
3113 )
3114 .expect("session index");
3115
3116 let source = SourceLocation::local_adapter(
3117 CLAUDE_CODE_PROVIDER,
3118 "test",
3119 "0",
3120 dir.path(),
3121 LocationOrigin::Configured,
3122 );
3123
3124 let first = claude_scan_candidates(&source, "test-adapter").expect("first candidates");
3125 std::thread::sleep(std::time::Duration::from_millis(5));
3126 std::fs::write(
3127 &sessions_index,
3128 format!(
3129 "{{\"version\":1,\"entries\":[{{\"sessionId\":\"session-1\",\"fullPath\":\"{}\",\"projectPath\":\"/tmp/workspace-b\"}}]}}",
3130 session_path.display()
3131 ),
3132 )
3133 .expect("updated session index");
3134
3135 let second = claude_scan_candidates(&source, "test-adapter").expect("second candidates");
3136
3137 assert_eq!(first.len(), 1);
3138 assert_eq!(second.len(), 1);
3139 assert_eq!(first[0].cache_key, canonical_display(&session_path));
3140 assert_eq!(second[0].cache_key, canonical_display(&session_path));
3141 assert_ne!(first[0].cache_signature, second[0].cache_signature);
3142 }
3143
3144 #[test]
3145 fn claude_scan_candidates_invalidate_legacy_cache_namespace() {
3146 let dir = tempfile::tempdir().expect("tempdir");
3147 let projects = dir.path().join("projects");
3148 let project_store = projects.join("example-workspace");
3149 std::fs::create_dir_all(&project_store).expect("project store");
3150 let session_path = project_store.join("session.jsonl");
3151 std::fs::write(
3152 &session_path,
3153 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
3154 )
3155 .expect("session");
3156
3157 let source = SourceLocation::local_adapter(
3158 CLAUDE_CODE_PROVIDER,
3159 "test",
3160 "0",
3161 dir.path(),
3162 LocationOrigin::Configured,
3163 );
3164
3165 let legacy_namespace = {
3166 let adapter_id = source.adapter_id.as_deref().unwrap_or("");
3167 let path_hash = source.path_hash.as_deref().unwrap_or("");
3168 hash_text(&format!(
3169 "{SCAN_CACHE_SIGNATURE_VERSION}:{}:{:?}:{adapter_id}:{}:{path_hash}:{}",
3170 source.provider, source.source_kind, "test-adapter", "project-context.v1"
3171 ))
3172 };
3173 let legacy_candidate = scan_candidate(session_path.clone(), None, &legacy_namespace);
3174 let current = claude_scan_candidates(&source, "test-adapter").expect("current candidates");
3175
3176 assert_eq!(current.len(), 1);
3177 assert_eq!(current[0].cache_key, canonical_display(&session_path));
3178 assert_ne!(legacy_candidate.cache_signature, current[0].cache_signature);
3179 }
3180
3181 #[test]
3182 fn codex_dedupes_copied_branch_history_and_keeps_branch_delta() {
3183 let dir = tempfile::tempdir().expect("tempdir");
3184 let sessions = dir.path().join("sessions");
3185 std::fs::create_dir_all(&sessions).expect("sessions");
3186
3187 let mut parent =
3188 File::create(sessions.join("2026-05-12T08-00-00-parent.jsonl")).expect("parent");
3189 writeln!(
3190 parent,
3191 r#"{{"timestamp":"2026-05-12T08:00:00.000Z","type":"turn_context","payload":{{"model":"gpt-5.2"}}}}"#
3192 )
3193 .expect("write parent context");
3194 writeln!(
3195 parent,
3196 r#"{{"timestamp":"2026-05-12T08:01:00.000Z","type":"event_msg","payload":{{"type":"token_count","info":{{"total_token_usage":{{"input_tokens":1000,"cached_input_tokens":100,"output_tokens":200,"reasoning_output_tokens":20,"total_tokens":1200}}}}}}}}"#
3197 )
3198 .expect("write parent tokens");
3199
3200 let mut branch =
3201 File::create(sessions.join("2026-05-12T08-02-00-branch.jsonl")).expect("branch");
3202 writeln!(
3203 branch,
3204 r#"{{"timestamp":"2026-05-12T08:00:00.000Z","type":"turn_context","payload":{{"model":"gpt-5.2"}}}}"#
3205 )
3206 .expect("write branch context");
3207 writeln!(
3208 branch,
3209 r#"{{"timestamp":"2026-05-12T08:01:00.000Z","type":"event_msg","payload":{{"type":"token_count","info":{{"total_token_usage":{{"input_tokens":1000,"cached_input_tokens":100,"output_tokens":200,"reasoning_output_tokens":20,"total_tokens":1200}}}}}}}}"#
3210 )
3211 .expect("write branch copied parent tokens");
3212 writeln!(
3213 branch,
3214 r#"{{"timestamp":"2026-05-12T08:02:00.000Z","type":"event_msg","payload":{{"type":"token_count","info":{{"total_token_usage":{{"input_tokens":1600,"cached_input_tokens":300,"output_tokens":450,"reasoning_output_tokens":40,"total_tokens":2050}}}}}}}}"#
3215 )
3216 .expect("write branch delta tokens");
3217
3218 let source = SourceLocation::local_adapter(
3219 CODEX_PROVIDER,
3220 "test",
3221 "0",
3222 dir.path(),
3223 LocationOrigin::Configured,
3224 );
3225
3226 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3227
3228 assert_eq!(scan.events.len(), 2);
3229 assert_eq!(scan.diagnostics.duplicate_events, 1);
3230
3231 assert_eq!(scan.events[0].usage.input_tokens, Some(900));
3232 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(100));
3233 assert_eq!(scan.events[0].usage.output_tokens, Some(180));
3234 assert_eq!(scan.events[0].usage.reasoning_tokens, Some(20));
3235 assert_eq!(scan.events[0].usage.total_tokens, Some(1200));
3236
3237 assert_eq!(scan.events[1].usage.input_tokens, Some(400));
3238 assert_eq!(scan.events[1].usage.cache_read_tokens, Some(200));
3239 assert_eq!(scan.events[1].usage.output_tokens, Some(230));
3240 assert_eq!(scan.events[1].usage.reasoning_tokens, Some(20));
3241 assert_eq!(scan.events[1].usage.total_tokens, Some(850));
3242 }
3243
3244 #[test]
3245 fn codex_prefers_active_session_copy_over_archived_duplicate() {
3246 let dir = tempfile::tempdir().expect("tempdir");
3247 let sessions = dir.path().join("sessions");
3248 let archived = dir.path().join("archived_sessions");
3249 std::fs::create_dir_all(&sessions).expect("sessions");
3250 std::fs::create_dir_all(&archived).expect("archived");
3251
3252 let active_path = sessions.join("dup.jsonl");
3253 let archived_path = archived.join("dup.jsonl");
3254 std::fs::write(
3255 &active_path,
3256 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
3257 )
3258 .expect("active write");
3259 std::fs::write(
3260 &archived_path,
3261 "{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{\"input_tokens\":1,\"output_tokens\":2}}\n",
3262 )
3263 .expect("archived write");
3264
3265 let source = SourceLocation::local_adapter(
3266 CODEX_PROVIDER,
3267 "test",
3268 "0",
3269 dir.path(),
3270 LocationOrigin::Configured,
3271 );
3272
3273 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3274 let active_hash = hash_text(&canonical_display(&active_path));
3275
3276 assert_eq!(scan.events.len(), 1);
3277 assert_eq!(scan.diagnostics.duplicate_events, 1);
3278 assert_eq!(
3279 scan.events[0]
3280 .parse_evidence
3281 .as_ref()
3282 .and_then(|evidence| evidence.source_file_path_hash.as_deref()),
3283 Some(active_hash.as_str())
3284 );
3285 }
3286
3287 #[test]
3288 fn codex_uses_last_token_usage_not_cumulative_total() {
3289 let dir = tempfile::tempdir().expect("tempdir");
3290 let sessions = dir.path().join("sessions");
3291 std::fs::create_dir_all(&sessions).expect("sessions");
3292 let mut file = File::create(sessions.join("session.jsonl")).expect("fixture");
3293 writeln!(
3294 file,
3295 r#"{{"timestamp":"2026-05-01T00:00:00Z","type":"event_msg","payload":{{"type":"token_count","info":{{"model":"gpt-5-codex","total_token_usage":{{"input_tokens":900,"cached_input_tokens":300,"output_tokens":100,"reasoning_output_tokens":50,"total_tokens":1000}},"last_token_usage":{{"input_tokens":90,"cached_input_tokens":30,"output_tokens":10,"reasoning_output_tokens":5,"total_tokens":100}}}}}}}}"#
3296 )
3297 .expect("write");
3298 let source = SourceLocation::local_adapter(
3299 CODEX_PROVIDER,
3300 "test",
3301 "0",
3302 dir.path(),
3303 LocationOrigin::Configured,
3304 );
3305
3306 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3307
3308 assert_eq!(scan.events.len(), 1);
3309 assert_eq!(scan.events[0].usage.input_tokens, Some(60));
3310 assert_eq!(scan.events[0].usage.output_tokens, Some(5));
3311 assert_eq!(scan.events[0].usage.computed_total(), 100);
3312 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(30));
3313 assert_eq!(scan.events[0].usage.reasoning_tokens, Some(5));
3314 assert!(scan.events[0].cost.estimated_api_equivalent_usd.is_some());
3315 }
3316
3317 #[test]
3318 fn codex_subtracts_cumulative_total_usage_when_last_usage_is_missing() {
3319 let dir = tempfile::tempdir().expect("tempdir");
3320 let sessions = dir.path().join("sessions");
3321 std::fs::create_dir_all(&sessions).expect("sessions");
3322 let mut file = File::create(sessions.join("session.jsonl")).expect("fixture");
3323 writeln!(
3324 file,
3325 r#"{{"timestamp":"2026-05-01T00:00:00Z","type":"event_msg","payload":{{"type":"token_count","info":{{"model":"gpt-5","total_token_usage":{{"input_tokens":100,"cached_input_tokens":10,"output_tokens":50,"total_tokens":150}}}}}}}}"#
3326 )
3327 .expect("write first");
3328 writeln!(
3329 file,
3330 r#"{{"timestamp":"2026-05-01T00:01:00Z","type":"event_msg","payload":{{"type":"token_count","info":{{"model":"gpt-5","total_token_usage":{{"input_tokens":250,"cached_input_tokens":30,"output_tokens":75,"total_tokens":325}}}}}}}}"#
3331 )
3332 .expect("write second");
3333 let source = SourceLocation::local_adapter(
3334 CODEX_PROVIDER,
3335 "test",
3336 "0",
3337 dir.path(),
3338 LocationOrigin::Configured,
3339 );
3340
3341 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3342
3343 assert_eq!(scan.events.len(), 2);
3344 assert_eq!(scan.events[0].usage.input_tokens, Some(90));
3345 assert_eq!(scan.events[1].usage.input_tokens, Some(130));
3346 assert_eq!(scan.events[1].usage.cache_read_tokens, Some(20));
3347 assert_eq!(scan.events[1].usage.output_tokens, Some(25));
3348 assert_eq!(scan.events[1].usage.total_tokens, Some(175));
3349 }
3350
3351 #[test]
3352 fn codex_rollout_turns_include_runtime_and_message_metrics() {
3353 let dir = tempfile::tempdir().expect("tempdir");
3354 let sessions = dir.path().join("sessions");
3355 std::fs::create_dir_all(&sessions).expect("sessions");
3356 let mut file = File::create(sessions.join("rollout.jsonl")).expect("fixture");
3357 writeln!(
3358 file,
3359 r#"{{"timestamp":"2026-05-01T00:00:00Z","type":"turn_context","payload":{{"model":"gpt-5"}}}}"#
3360 )
3361 .expect("write context");
3362 writeln!(
3363 file,
3364 r#"{{"timestamp":"2026-05-01T00:00:01Z","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-05-01T00:00:01Z"}}}}"#
3365 )
3366 .expect("write start");
3367 writeln!(
3368 file,
3369 r#"{{"timestamp":"2026-05-01T00:00:02Z","type":"response_item","payload":{{"type":"message","role":"user","content":[{{"type":"input_text","text":"hello"}}]}}}}"#
3370 )
3371 .expect("write user");
3372 writeln!(
3373 file,
3374 r#"{{"timestamp":"2026-05-01T00:00:05Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}}}}}}}}"#
3375 )
3376 .expect("write tokens");
3377 writeln!(
3378 file,
3379 r#"{{"timestamp":"2026-05-01T00:00:06Z","type":"response_item","payload":{{"type":"message","role":"assistant","content":[{{"type":"output_text","text":"hi"}}]}}}}"#
3380 )
3381 .expect("write assistant");
3382 writeln!(
3383 file,
3384 r#"{{"timestamp":"2026-05-01T00:00:06Z","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-05-01T00:00:06Z","duration_ms":5000,"time_to_first_token_ms":1200}}}}"#
3385 )
3386 .expect("write complete");
3387
3388 let source = SourceLocation::local_adapter(
3389 CODEX_PROVIDER,
3390 "test",
3391 "0",
3392 dir.path(),
3393 LocationOrigin::Configured,
3394 );
3395
3396 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3397
3398 assert_eq!(scan.events.len(), 1);
3399 assert_eq!(scan.events[0].usage.input_tokens, Some(60));
3400 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(20));
3401 assert_eq!(scan.events[0].usage.output_tokens, Some(30));
3402 assert_eq!(scan.events[0].usage.reasoning_tokens, Some(10));
3403 assert_eq!(scan.events[0].usage.total_tokens, Some(120));
3404 assert_eq!(
3405 scan.events[0].session.started_at.to_rfc3339(),
3406 "2026-05-01T00:00:01+00:00"
3407 );
3408 assert_eq!(
3409 scan.events[0]
3410 .session
3411 .ended_at
3412 .expect("ended_at")
3413 .to_rfc3339(),
3414 "2026-05-01T00:00:06+00:00"
3415 );
3416 assert_eq!(scan.events[0].session.duration_seconds, Some(5));
3417 let runtime = scan.events[0].runtime.as_ref().expect("runtime");
3418 assert_eq!(runtime.latency_ms, Some(5000));
3419 assert_eq!(runtime.latency_source, Some(LatencySource::Explicit));
3420 assert_eq!(runtime.time_to_first_token_ms, Some(1200));
3421 assert_eq!(runtime.total_messages, Some(2));
3422 assert_eq!(runtime.user_messages, Some(1));
3423 assert_eq!(runtime.assistant_messages, Some(1));
3424 assert_eq!(runtime.developer_messages, Some(0));
3425 }
3426
3427 #[test]
3428 fn codex_task_complete_usage_is_not_emitted_twice() {
3429 let dir = tempfile::tempdir().expect("tempdir");
3430 let sessions = dir.path().join("sessions");
3431 std::fs::create_dir_all(&sessions).expect("sessions");
3432 let mut file = File::create(sessions.join("completion-usage.jsonl")).expect("fixture");
3433 writeln!(
3434 file,
3435 r#"{{"timestamp":"2026-05-01T00:00:00Z","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-05-01T00:00:00Z"}}}}"#
3436 )
3437 .expect("write start");
3438 writeln!(
3439 file,
3440 r#"{{"timestamp":"2026-05-01T00:00:02Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}}}}}}}}"#
3441 )
3442 .expect("write token count");
3443 writeln!(
3444 file,
3445 r#"{{"timestamp":"2026-05-01T00:00:03Z","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-05-01T00:00:03Z","duration_ms":3000}},"usage":{{"input_tokens":90,"cached_input_tokens":30,"output_tokens":45,"reasoning_output_tokens":15,"total_tokens":150}}}}"#
3446 )
3447 .expect("write completion");
3448
3449 let source = SourceLocation::local_adapter(
3450 CODEX_PROVIDER,
3451 "test",
3452 "0",
3453 dir.path(),
3454 LocationOrigin::Configured,
3455 );
3456
3457 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3458
3459 assert_eq!(scan.events.len(), 1);
3460 assert_eq!(scan.events[0].usage.input_tokens, Some(60));
3461 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(30));
3462 assert_eq!(scan.events[0].usage.output_tokens, Some(30));
3463 assert_eq!(scan.events[0].usage.reasoning_tokens, Some(15));
3464 assert_eq!(scan.events[0].usage.total_tokens, Some(150));
3465 }
3466
3467 #[test]
3468 fn codex_rollout_turns_match_interleaved_records_by_session_id() {
3469 let dir = tempfile::tempdir().expect("tempdir");
3470 let sessions = dir.path().join("sessions");
3471 std::fs::create_dir_all(&sessions).expect("sessions");
3472 let mut file = File::create(sessions.join("interleaved.jsonl")).expect("fixture");
3473 writeln!(
3474 file,
3475 r#"{{"timestamp":"2026-05-01T00:00:00Z","session_id":"session-a","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-05-01T00:00:00Z"}}}}"#
3476 )
3477 .expect("write session a start");
3478 writeln!(
3479 file,
3480 r#"{{"timestamp":"2026-05-01T00:00:01Z","session_id":"session-b","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-05-01T00:00:01Z"}}}}"#
3481 )
3482 .expect("write session b start");
3483 writeln!(
3484 file,
3485 r#"{{"timestamp":"2026-05-01T00:00:02Z","session_id":"session-a","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":140}},"total_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":140}}}}}}}}"#
3486 )
3487 .expect("write session a tokens");
3488 writeln!(
3489 file,
3490 r#"{{"timestamp":"2026-05-01T00:00:03Z","session_id":"session-a","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-05-01T00:00:03Z"}}}}"#
3491 )
3492 .expect("write session a complete");
3493 writeln!(
3494 file,
3495 r#"{{"timestamp":"2026-05-01T00:00:04Z","session_id":"session-b","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":160,"cached_input_tokens":40,"output_tokens":60,"reasoning_output_tokens":20,"total_tokens":280}},"total_token_usage":{{"input_tokens":160,"cached_input_tokens":40,"output_tokens":60,"reasoning_output_tokens":20,"total_tokens":280}}}}}}}}"#
3496 )
3497 .expect("write session b tokens");
3498 writeln!(
3499 file,
3500 r#"{{"timestamp":"2026-05-01T00:00:05Z","session_id":"session-b","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-05-01T00:00:05Z"}}}}"#
3501 )
3502 .expect("write session b complete");
3503
3504 let source = SourceLocation::local_adapter(
3505 CODEX_PROVIDER,
3506 "test",
3507 "0",
3508 dir.path(),
3509 LocationOrigin::Configured,
3510 );
3511
3512 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3513
3514 assert_eq!(scan.events.len(), 2);
3515 let mut events = scan.events.iter().collect::<Vec<_>>();
3516 events.sort_by_key(|event| event.usage.total_tokens);
3517
3518 assert_eq!(events[0].usage.total_tokens, Some(140));
3519 assert_eq!(
3520 events[0]
3521 .session
3522 .local_session_id_hash
3523 .as_deref()
3524 .expect("session a hash"),
3525 hash_text("session-a")
3526 );
3527 assert_eq!(
3528 events[0].session.started_at.to_rfc3339(),
3529 "2026-05-01T00:00:00+00:00"
3530 );
3531 assert_eq!(
3532 events[0]
3533 .session
3534 .ended_at
3535 .expect("session a ended")
3536 .to_rfc3339(),
3537 "2026-05-01T00:00:03+00:00"
3538 );
3539 assert_eq!(events[0].session.duration_seconds, Some(3));
3540
3541 assert_eq!(events[1].usage.total_tokens, Some(280));
3542 assert_eq!(
3543 events[1]
3544 .session
3545 .local_session_id_hash
3546 .as_deref()
3547 .expect("session b hash"),
3548 hash_text("session-b")
3549 );
3550 assert_eq!(
3551 events[1].session.started_at.to_rfc3339(),
3552 "2026-05-01T00:00:01+00:00"
3553 );
3554 assert_eq!(
3555 events[1]
3556 .session
3557 .ended_at
3558 .expect("session b ended")
3559 .to_rfc3339(),
3560 "2026-05-01T00:00:05+00:00"
3561 );
3562 assert_eq!(events[1].session.duration_seconds, Some(4));
3563 }
3564
3565 #[test]
3566 fn codex_turn_usage_consumes_all_token_count_lines() {
3567 let dir = tempfile::tempdir().expect("tempdir");
3568 let sessions = dir.path().join("sessions");
3569 std::fs::create_dir_all(&sessions).expect("sessions");
3570 let mut file = File::create(sessions.join("multi-token-count.jsonl")).expect("fixture");
3571 writeln!(
3572 file,
3573 r#"{{"timestamp":"2026-05-01T00:00:00Z","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-05-01T00:00:00Z"}}}}"#
3574 )
3575 .expect("write start");
3576 writeln!(
3577 file,
3578 r#"{{"timestamp":"2026-05-01T00:00:01Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":40,"cached_input_tokens":10,"output_tokens":20,"reasoning_output_tokens":5,"total_tokens":60}},"total_token_usage":{{"input_tokens":40,"cached_input_tokens":10,"output_tokens":20,"reasoning_output_tokens":5,"total_tokens":60}}}}}}}}"#
3579 )
3580 .expect("write first token count");
3581 writeln!(
3582 file,
3583 r#"{{"timestamp":"2026-05-01T00:00:02Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":120,"cached_input_tokens":30,"output_tokens":60,"reasoning_output_tokens":15,"total_tokens":180}}}}}}}}"#
3584 )
3585 .expect("write second token count");
3586 writeln!(
3587 file,
3588 r#"{{"timestamp":"2026-05-01T00:00:03Z","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-05-01T00:00:03Z","duration_ms":3000}}}}"#
3589 )
3590 .expect("write completion");
3591
3592 let source = SourceLocation::local_adapter(
3593 CODEX_PROVIDER,
3594 "test",
3595 "0",
3596 dir.path(),
3597 LocationOrigin::Configured,
3598 );
3599
3600 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3601
3602 assert_eq!(scan.events.len(), 1);
3603 assert_eq!(scan.events[0].usage.input_tokens, Some(90));
3604 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(30));
3605 assert_eq!(scan.events[0].usage.output_tokens, Some(45));
3606 assert_eq!(scan.events[0].usage.reasoning_tokens, Some(15));
3607 assert_eq!(scan.events[0].usage.total_tokens, Some(180));
3608 assert_eq!(scan.events[0].usage.requests, Some(2));
3609 }
3610
3611 #[test]
3612 fn codex_rollout_derives_runtime_from_turn_timestamps_when_duration_is_missing() {
3613 let dir = tempfile::tempdir().expect("tempdir");
3614 let sessions = dir.path().join("sessions");
3615 std::fs::create_dir_all(&sessions).expect("sessions");
3616 let mut file = File::create(sessions.join("legacy-rollout.jsonl")).expect("fixture");
3617 writeln!(
3618 file,
3619 r#"{{"timestamp":"2026-04-11T00:00:00Z","type":"turn_context","payload":{{"model":"gpt-5"}}}}"#
3620 )
3621 .expect("write context");
3622 writeln!(
3623 file,
3624 r#"{{"timestamp":"2026-04-11T00:00:01Z","type":"event_msg","payload":{{"type":"task_started"}}}}"#
3625 )
3626 .expect("write start");
3627 writeln!(
3628 file,
3629 r#"{{"timestamp":"2026-04-11T00:00:02Z","type":"response_item","payload":{{"type":"message","role":"user","content":[{{"type":"input_text","text":"hello"}}]}}}}"#
3630 )
3631 .expect("write user");
3632 writeln!(
3633 file,
3634 r#"{{"timestamp":"2026-04-11T00:00:05Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":80,"cached_input_tokens":20,"output_tokens":40,"reasoning_output_tokens":10,"total_tokens":120}}}}}}}}"#
3635 )
3636 .expect("write tokens");
3637 writeln!(
3638 file,
3639 r#"{{"timestamp":"2026-04-11T00:00:06Z","type":"response_item","payload":{{"type":"message","role":"assistant","content":[{{"type":"output_text","text":"hi"}}]}}}}"#
3640 )
3641 .expect("write assistant");
3642 writeln!(
3643 file,
3644 r#"{{"timestamp":"2026-04-11T00:00:06Z","type":"event_msg","payload":{{"type":"task_complete"}}}}"#
3645 )
3646 .expect("write complete");
3647
3648 let source = SourceLocation::local_adapter(
3649 CODEX_PROVIDER,
3650 "test",
3651 "0",
3652 dir.path(),
3653 LocationOrigin::Configured,
3654 );
3655
3656 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3657
3658 assert_eq!(scan.events.len(), 1);
3659 assert_eq!(
3660 scan.events[0].session.started_at.to_rfc3339(),
3661 "2026-04-11T00:00:01+00:00"
3662 );
3663 assert_eq!(
3664 scan.events[0]
3665 .session
3666 .ended_at
3667 .expect("ended_at")
3668 .to_rfc3339(),
3669 "2026-04-11T00:00:06+00:00"
3670 );
3671 assert_eq!(scan.events[0].session.duration_seconds, Some(5));
3672 let runtime = scan.events[0].runtime.as_ref().expect("runtime");
3673 assert_eq!(runtime.latency_ms, Some(5000));
3674 assert_eq!(runtime.latency_source, Some(LatencySource::Inferred));
3675 assert_eq!(runtime.time_to_first_token_ms, None);
3676 assert_eq!(runtime.total_messages, Some(2));
3677 assert_eq!(runtime.user_messages, Some(1));
3678 assert_eq!(runtime.assistant_messages, Some(1));
3679 assert_eq!(runtime.developer_messages, Some(0));
3680 }
3681
3682 #[test]
3683 fn codex_path_independent_turn_dedupe_keeps_distinct_projects() {
3684 let dir = tempfile::tempdir().expect("tempdir");
3685 let codex_root = dir.path().join("codex");
3686 let sessions = codex_root.join("sessions");
3687 let workspace_a = dir.path().join("workspace-a").join("ai-stats");
3688 let workspace_b = dir.path().join("workspace-b").join("ai-stats");
3689 std::fs::create_dir_all(&sessions).expect("sessions");
3690 std::fs::create_dir_all(&workspace_a).expect("workspace a");
3691 std::fs::create_dir_all(&workspace_b).expect("workspace b");
3692 write_git_fixture(&workspace_a, "git@github.com:StarkDmi/StatsAI.git", "main");
3693 write_git_fixture(&workspace_b, "git@github.com:StarkDmi/StatsAI.git", "main");
3694
3695 for (name, workspace) in [("a.jsonl", &workspace_a), ("b.jsonl", &workspace_b)] {
3696 let mut file = File::create(sessions.join(name)).expect("fixture");
3697 writeln!(
3698 file,
3699 r#"{{"timestamp":"2026-06-01T08:00:00Z","type":"session_meta","payload":{{"cwd":"{}","git":{{"repository_url":"git@github.com:StarkDmi/StatsAI.git","branch":"main"}}}}}}"#,
3700 workspace.display()
3701 )
3702 .expect("write session meta");
3703 writeln!(
3704 file,
3705 r#"{{"timestamp":"2026-06-01T08:00:00Z","type":"turn_context","payload":{{"model":"gpt-5"}}}}"#
3706 )
3707 .expect("write context");
3708 writeln!(
3709 file,
3710 r#"{{"timestamp":"2026-06-01T08:00:01Z","type":"event_msg","payload":{{"type":"task_started","started_at":"2026-06-01T08:00:01Z"}}}}"#
3711 )
3712 .expect("write start");
3713 writeln!(
3714 file,
3715 r#"{{"timestamp":"2026-06-01T08:00:03Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":60,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":60,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":120}}}}}}}}"#
3716 )
3717 .expect("write tokens");
3718 writeln!(
3719 file,
3720 r#"{{"timestamp":"2026-06-01T08:00:04Z","type":"event_msg","payload":{{"type":"task_complete","completed_at":"2026-06-01T08:00:04Z","duration_ms":3000}}}}"#
3721 )
3722 .expect("write complete");
3723 }
3724
3725 let source = SourceLocation::local_adapter(
3726 CODEX_PROVIDER,
3727 "test",
3728 "0",
3729 &codex_root,
3730 LocationOrigin::Configured,
3731 );
3732 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3733
3734 assert_eq!(scan.events.len(), 2);
3735 assert_eq!(scan.diagnostics.duplicate_events, 0);
3736
3737 let mut project_paths = scan
3738 .events
3739 .iter()
3740 .map(|event| {
3741 event
3742 .project
3743 .as_ref()
3744 .and_then(|project| project.path_label.clone())
3745 .expect("project path")
3746 })
3747 .collect::<Vec<_>>();
3748 project_paths.sort();
3749
3750 assert_eq!(
3751 project_paths,
3752 vec![
3753 workspace_a.to_string_lossy().to_string(),
3754 workspace_b.to_string_lossy().to_string(),
3755 ]
3756 );
3757 }
3758
3759 #[test]
3760 fn codex_path_independent_usage_dedupe_keeps_distinct_branches() {
3761 let dir = tempfile::tempdir().expect("tempdir");
3762 let codex_root = dir.path().join("codex");
3763 let sessions = codex_root.join("sessions");
3764 let workspace = dir.path().join("workspace").join("ai-stats");
3765 std::fs::create_dir_all(&sessions).expect("sessions");
3766 std::fs::create_dir_all(&workspace).expect("workspace");
3767 write_git_fixture(&workspace, "git@github.com:StarkDmi/StatsAI.git", "main");
3768
3769 for (name, branch_name) in [("main.jsonl", "main"), ("feature.jsonl", "feature-x")] {
3770 let mut file = File::create(sessions.join(name)).expect("fixture");
3771 writeln!(
3772 file,
3773 r#"{{"timestamp":"2026-06-03T08:00:00Z","type":"session_meta","payload":{{"cwd":"{}","git":{{"repository_url":"git@github.com:StarkDmi/StatsAI.git","branch":"{}"}}}}}}"#,
3774 workspace.display(),
3775 branch_name
3776 )
3777 .expect("write session meta");
3778 writeln!(
3779 file,
3780 r#"{{"timestamp":"2026-06-03T08:00:01Z","type":"event_msg","payload":{{"type":"token_count","info":{{"last_token_usage":{{"input_tokens":60,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":120}},"total_token_usage":{{"input_tokens":60,"cached_input_tokens":20,"output_tokens":30,"reasoning_output_tokens":10,"total_tokens":120}}}}}}}}"#
3781 )
3782 .expect("write usage");
3783 }
3784
3785 let source = SourceLocation::local_adapter(
3786 CODEX_PROVIDER,
3787 "test",
3788 "0",
3789 &codex_root,
3790 LocationOrigin::Configured,
3791 );
3792
3793 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3794
3795 assert_eq!(scan.events.len(), 2);
3796 assert_eq!(scan.diagnostics.duplicate_events, 0);
3797
3798 let mut branches = scan
3799 .events
3800 .iter()
3801 .map(|event| {
3802 event
3803 .project
3804 .as_ref()
3805 .and_then(|project| project.branch_label.clone())
3806 .expect("branch")
3807 })
3808 .collect::<Vec<_>>();
3809 branches.sort();
3810
3811 assert_eq!(branches, vec!["feature-x".to_string(), "main".to_string()]);
3812 }
3813
3814 #[test]
3815 fn codex_headless_usage_shapes_are_parsed() {
3816 let dir = tempfile::tempdir().expect("tempdir");
3817 let sessions = dir.path().join("sessions");
3818 std::fs::create_dir_all(&sessions).expect("sessions");
3819 let mut file = File::create(sessions.join("exec.jsonl")).expect("fixture");
3820 writeln!(
3821 file,
3822 r#"{{"data":{{"timestamp":"2026-05-01T00:00:00Z","model":"gpt-5","usage":{{"prompt_tokens":10,"completion_tokens":5,"cached_tokens":3}}}}}}"#
3823 )
3824 .expect("write");
3825 let source = SourceLocation::local_adapter(
3826 CODEX_PROVIDER,
3827 "test",
3828 "0",
3829 dir.path(),
3830 LocationOrigin::Configured,
3831 );
3832
3833 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3834
3835 assert_eq!(scan.events.len(), 1);
3836 assert_eq!(scan.events[0].usage.input_tokens, Some(7));
3837 assert_eq!(scan.events[0].usage.output_tokens, Some(5));
3838 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(3));
3839 }
3840
3841 #[test]
3842 fn duplicated_semantic_events_are_deduped_within_source() {
3843 let dir = tempfile::tempdir().expect("tempdir");
3844 let sessions = dir.path().join("sessions");
3845 std::fs::create_dir_all(&sessions).expect("sessions");
3846 for name in ["a.jsonl", "b.jsonl"] {
3847 let mut file = File::create(sessions.join(name)).expect("fixture");
3848 writeln!(
3849 file,
3850 "{{\"timestamp\":\"2026-05-01T00:00:00Z\",\"session_id\":\"same\",\"usage\":{{\"input_tokens\":1,\"output_tokens\":2}}}}"
3851 )
3852 .expect("write");
3853 }
3854 let source = SourceLocation::local_adapter(
3855 CODEX_PROVIDER,
3856 "test",
3857 "0",
3858 dir.path(),
3859 LocationOrigin::Configured,
3860 );
3861
3862 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3863
3864 assert_eq!(scan.events.len(), 1);
3865 assert_eq!(scan.diagnostics.duplicate_events, 1);
3866 }
3867
3868 #[test]
3869 fn codex_auth_json_exposes_verified_source_state_without_stamping_events() {
3870 let dir = tempfile::tempdir().expect("tempdir");
3871 let sessions = dir.path().join("sessions");
3872 std::fs::create_dir_all(&sessions).expect("sessions");
3873 std::fs::write(
3874 dir.path().join("auth.json"),
3875 serde_json::json!({
3876 "email": "existing@example.com",
3877 "https://api.openai.com/auth": {
3878 "chatgpt_account_id": "acct-real",
3879 "chatgpt_plan_type": "plus",
3880 "chatgpt_subscription_active_start": "2026-05-29T10:12:43+00:00",
3881 "chatgpt_subscription_active_until": "2026-06-29T10:12:43+00:00",
3882 "chatgpt_subscription_last_checked": "2026-05-29T10:14:56.058278+00:00"
3883 }
3884 })
3885 .to_string(),
3886 )
3887 .expect("auth");
3888 let mut file = File::create(sessions.join("session.jsonl")).expect("fixture");
3889 writeln!(
3890 file,
3891 "{{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{{\"input_tokens\":1,\"output_tokens\":2}}}}"
3892 )
3893 .expect("write");
3894 let source = SourceLocation::local_adapter(
3895 CODEX_PROVIDER,
3896 "test",
3897 "0",
3898 dir.path(),
3899 LocationOrigin::Configured,
3900 );
3901
3902 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3903
3904 let verified = scan
3905 .verified_source_state
3906 .as_ref()
3907 .expect("verified source state");
3908 assert_eq!(verified.provider_user_id.as_deref(), Some("acct-real"));
3909 assert_eq!(verified.email.as_deref(), Some("existing@example.com"));
3910 assert_eq!(verified.plan_name.as_deref(), Some("Plus"));
3911 assert!(verified.authenticated_at.is_some());
3912 assert_eq!(
3913 verified.verified_at.map(|value| value.to_rfc3339()),
3914 Some("2026-05-29T10:14:56.058278+00:00".to_string())
3915 );
3916 let subscription = verified.subscription.as_ref().expect("subscription");
3917 assert_eq!(subscription.plan_name, "Plus");
3918 assert_eq!(subscription.price, 2000);
3919 assert_eq!(
3920 subscription.started_at.to_rfc3339(),
3921 "2026-05-29T10:12:43+00:00"
3922 );
3923 assert_eq!(
3924 subscription
3925 .current_period_ends_at
3926 .map(|value| value.to_rfc3339()),
3927 Some("2026-06-29T10:12:43+00:00".to_string())
3928 );
3929 assert_eq!(subscription.ended_at, None);
3930 assert_eq!(scan.events[0].provider_account_id, None);
3931 assert_ne!(
3932 scan.events[0]
3933 .parse_evidence
3934 .as_ref()
3935 .map(|evidence| evidence.account_identity_source.clone()),
3936 Some(IdentitySource::LocalAuth)
3937 );
3938 }
3939
3940 #[test]
3941 fn codex_auth_json_reads_nested_tokens_id_token_shape() {
3942 let dir = tempfile::tempdir().expect("tempdir");
3943 let sessions = dir.path().join("sessions");
3944 std::fs::create_dir_all(&sessions).expect("sessions");
3945 std::fs::write(
3946 dir.path().join("auth.json"),
3947 serde_json::json!({
3948 "auth_mode": "chatgpt",
3949 "OPENAI_API_KEY": null,
3950 "tokens": {
3951 "id_token": "eyJhbGciOiJub25lIn0.eyJlbWFpbCI6ImV4aXN0aW5nQGV4YW1wbGUuY29tIiwiaWF0IjoxNzQ4NTEzNTYzLCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYWNjdC1yZWFsIiwiY2hhdGdwdF9wbGFuX3R5cGUiOiJwbHVzIiwiY2hhdGdwdF9zdWJzY3JpcHRpb25fYWN0aXZlX3N0YXJ0IjoiMjAyNi0wNS0yOVQxMDoxMjo0MyswMDowMCIsImNoYXRncHRfc3Vic2NyaXB0aW9uX2FjdGl2ZV91bnRpbCI6IjIwMjYtMDYtMjlUMTA6MTI6NDMrMDA6MDAiLCJjaGF0Z3B0X3N1YnNjcmlwdGlvbl9sYXN0X2NoZWNrZWQiOiIyMDI2LTA1LTI5VDEwOjE0OjU2LjA1ODI3OCswMDowMCJ9fQ.",
3952 "access_token": "unused",
3953 "refresh_token": "unused",
3954 "account_id": "41412a8c-6e19-4d33-9b67-6fb4b4dc0734"
3955 },
3956 "last_refresh": "2026-05-19T19:56:03.481816Z"
3957 })
3958 .to_string(),
3959 )
3960 .expect("auth");
3961 let mut file = File::create(sessions.join("session.jsonl")).expect("fixture");
3962 writeln!(
3963 file,
3964 "{{\"timestamp\":\"2026-05-01T00:00:00Z\",\"usage\":{{\"input_tokens\":1,\"output_tokens\":2}}}}"
3965 )
3966 .expect("write");
3967 let source = SourceLocation::local_adapter(
3968 CODEX_PROVIDER,
3969 "test",
3970 "0",
3971 dir.path(),
3972 LocationOrigin::Configured,
3973 );
3974
3975 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
3976
3977 let verified = scan
3978 .verified_source_state
3979 .as_ref()
3980 .expect("verified source state");
3981 assert_eq!(verified.provider_user_id.as_deref(), Some("acct-real"));
3982 assert_eq!(verified.email.as_deref(), Some("existing@example.com"));
3983 assert_eq!(verified.plan_name.as_deref(), Some("Plus"));
3984 assert!(verified.authenticated_at.is_some());
3985 assert_eq!(
3986 verified.verified_at.map(|value| value.to_rfc3339()),
3987 Some("2026-05-29T10:14:56.058278+00:00".to_string())
3988 );
3989 let subscription = verified.subscription.as_ref().expect("subscription");
3990 assert_eq!(subscription.plan_name, "Plus");
3991 assert_eq!(subscription.price, 2000);
3992 assert_eq!(
3993 subscription.started_at.to_rfc3339(),
3994 "2026-05-29T10:12:43+00:00"
3995 );
3996 assert_eq!(
3997 subscription
3998 .current_period_ends_at
3999 .map(|value| value.to_rfc3339()),
4000 Some("2026-06-29T10:12:43+00:00".to_string())
4001 );
4002 assert_eq!(subscription.ended_at, None);
4003 assert_eq!(scan.events[0].provider_account_id, None);
4004 }
4005
4006 #[test]
4007 fn codex_probe_verified_source_state_uses_parent_auth_for_sessions_path() {
4008 let dir = tempfile::tempdir().expect("tempdir");
4009 let sessions = dir.path().join("sessions");
4010 std::fs::create_dir_all(&sessions).expect("sessions");
4011 std::fs::write(
4012 dir.path().join("auth.json"),
4013 serde_json::json!({
4014 "email": "existing@example.com",
4015 "https://api.openai.com/auth": {
4016 "chatgpt_account_id": "acct-real",
4017 "chatgpt_plan_type": "plus",
4018 "chatgpt_subscription_active_start": "2026-05-29T10:12:43+00:00",
4019 "chatgpt_subscription_active_until": "2026-06-29T10:12:43+00:00",
4020 "chatgpt_subscription_last_checked": "2026-05-29T10:14:56.058278+00:00"
4021 }
4022 })
4023 .to_string(),
4024 )
4025 .expect("auth");
4026
4027 let source = SourceLocation::local_adapter(
4028 CODEX_PROVIDER,
4029 "test",
4030 "0",
4031 &sessions,
4032 LocationOrigin::Configured,
4033 );
4034
4035 let verified = CodexAdapter
4036 .probe_verified_source_state(&source)
4037 .expect("probe")
4038 .expect("verified source state");
4039
4040 assert_eq!(verified.provider_user_id.as_deref(), Some("acct-real"));
4041 assert_eq!(verified.email.as_deref(), Some("existing@example.com"));
4042 assert_eq!(verified.plan_name.as_deref(), Some("Plus"));
4043 }
4044
4045 #[test]
4046 fn usage_counts_support_common_shapes() {
4047 let value: Value = serde_json::json!({
4048 "inputTokens": 10,
4049 "outputTokens": 20,
4050 "cacheCreationInputTokens": 2,
4051 "cacheReadInputTokens": 3
4052 });
4053 let usage = claude_usage_counts_from_value(&value);
4054 assert_eq!(usage.input_tokens, Some(10));
4055 assert_eq!(usage.output_tokens, Some(20));
4056 assert_eq!(usage.cache_creation_tokens, Some(2));
4057 assert_eq!(usage.cache_read_tokens, Some(3));
4058 assert_eq!(usage.computed_total(), 35);
4059 }
4060
4061 #[test]
4062 fn codex_usage_counts_normalize_inclusive_subtotals() {
4063 let value: Value = serde_json::json!({
4064 "input_tokens": 100,
4065 "cached_input_tokens": 30,
4066 "output_tokens": 10,
4067 "reasoning_output_tokens": 5,
4068 "total_tokens": 110
4069 });
4070
4071 let usage = codex_usage_counts_from_value(&value);
4072
4073 assert_eq!(usage.input_tokens, Some(70));
4074 assert_eq!(usage.cache_read_tokens, Some(30));
4075 assert_eq!(usage.output_tokens, Some(5));
4076 assert_eq!(usage.reasoning_tokens, Some(5));
4077 assert_eq!(usage.computed_total(), 110);
4078 }
4079
4080 #[test]
4081 fn codex_caps_cached_input_to_input() {
4082 let dir = tempfile::tempdir().expect("tempdir");
4083 let sessions = dir.path().join("sessions");
4084 std::fs::create_dir_all(&sessions).expect("sessions");
4085 let mut file = File::create(sessions.join("session.jsonl")).expect("fixture");
4086 writeln!(
4087 file,
4088 r#"{{"timestamp":"2026-05-01T00:00:00Z","usage":{{"input_tokens":10,"cached_input_tokens":30,"output_tokens":5}}}}"#
4089 )
4090 .expect("write");
4091 let source = SourceLocation::local_adapter(
4092 CODEX_PROVIDER,
4093 "test",
4094 "0",
4095 dir.path(),
4096 LocationOrigin::Configured,
4097 );
4098
4099 let scan = scan_codex_source(&CodexAdapter, &source, &options()).expect("scan");
4100
4101 assert_eq!(scan.events[0].usage.input_tokens, Some(0));
4102 assert_eq!(scan.events[0].usage.cache_read_tokens, Some(10));
4103 }
4104}