Skip to main content

codex_ops/stats/
reports.rs

1use super::StatSort;
2use crate::pricing::TokenUsage as PricingTokenUsage;
3use crate::time::{local_to_utc, StatGroupBy};
4use chrono::{DateTime, Datelike, Local, SecondsFormat, Timelike, Utc};
5use serde::Serialize;
6
7const FULL_SCAN_ACCURACY_NOTE: &str =
8    "Note: This report used balanced scanning, not a full scan. It reads in-range files and checks a bounded lookback by last token_count timestamp. Use -F, --full-scan to check all pre-range rollout files for exact local token_count results.";
9
10#[derive(Debug, Clone)]
11pub struct UsageRecordsReadOptions {
12    pub start: DateTime<Utc>,
13    pub end: DateTime<Utc>,
14    pub sessions_dir: std::path::PathBuf,
15    pub scan_all_files: bool,
16    pub account_history_file: Option<std::path::PathBuf>,
17    pub account_id: Option<String>,
18}
19
20#[derive(Clone, Debug)]
21pub struct UsageRecordsReport {
22    pub start: DateTime<Utc>,
23    pub end: DateTime<Utc>,
24    pub sessions_dir: String,
25    pub records: Vec<UsageRecord>,
26    pub diagnostics: UsageDiagnostics,
27}
28
29#[derive(Clone, Debug, Default, Serialize, PartialEq)]
30#[serde(rename_all = "camelCase")]
31pub struct TokenUsage {
32    pub input_tokens: i64,
33    pub cached_input_tokens: i64,
34    pub output_tokens: i64,
35    pub reasoning_output_tokens: i64,
36    pub total_tokens: i64,
37}
38
39impl TokenUsage {
40    pub(super) fn add(&mut self, other: &TokenUsage) {
41        self.input_tokens += other.input_tokens;
42        self.cached_input_tokens += other.cached_input_tokens;
43        self.output_tokens += other.output_tokens;
44        self.reasoning_output_tokens += other.reasoning_output_tokens;
45        self.total_tokens += other.total_tokens;
46    }
47
48    pub(super) fn is_empty(&self) -> bool {
49        self.input_tokens == 0
50            && self.cached_input_tokens == 0
51            && self.output_tokens == 0
52            && self.reasoning_output_tokens == 0
53            && self.total_tokens == 0
54    }
55
56    pub(super) fn pricing_usage(&self) -> PricingTokenUsage {
57        PricingTokenUsage {
58            input_tokens: self.input_tokens.max(0) as u64,
59            cached_input_tokens: self.cached_input_tokens.max(0) as u64,
60            output_tokens: self.output_tokens.max(0) as u64,
61        }
62    }
63}
64
65#[derive(Clone, Debug)]
66pub struct UsageRecord {
67    pub timestamp: DateTime<Utc>,
68    pub session_id: String,
69    pub model: String,
70    pub reasoning_effort: Option<String>,
71    pub cwd: String,
72    pub account_id: Option<String>,
73    pub file_path: String,
74    pub usage: TokenUsage,
75}
76
77#[derive(Clone, Copy)]
78pub(super) struct UsageRecordView<'a> {
79    pub(super) timestamp: DateTime<Utc>,
80    pub(super) session_id: &'a str,
81    pub(super) model: &'a str,
82    pub(super) reasoning_effort: Option<&'a str>,
83    pub(super) cwd: &'a str,
84    pub(super) account_id: Option<&'a str>,
85    pub(super) file_path: &'a str,
86    pub(super) usage: &'a TokenUsage,
87}
88
89impl UsageRecordView<'_> {
90    pub(super) fn to_owned_record(self) -> UsageRecord {
91        UsageRecord {
92            timestamp: self.timestamp,
93            session_id: self.session_id.to_string(),
94            model: self.model.to_string(),
95            reasoning_effort: self.reasoning_effort.map(str::to_string),
96            cwd: self.cwd.to_string(),
97            account_id: self.account_id.map(str::to_string),
98            file_path: self.file_path.to_string(),
99            usage: self.usage.clone(),
100        }
101    }
102}
103
104#[derive(Clone, Debug, Serialize, PartialEq)]
105#[serde(rename_all = "camelCase")]
106pub struct UsageStatRow {
107    pub(super) key: String,
108    pub(super) sessions: usize,
109    pub(super) calls: i64,
110    pub(super) usage: TokenUsage,
111    pub(super) credits: f64,
112    pub(super) usd: f64,
113    pub(super) priced_calls: i64,
114    pub(super) unpriced_calls: i64,
115}
116
117#[derive(Clone, Debug, Serialize, PartialEq)]
118#[serde(rename_all = "camelCase")]
119pub struct UsageUnpricedModelRow {
120    pub(super) model: String,
121    pub(super) pricing_key: String,
122    pub(super) calls: i64,
123    pub(super) total_tokens: i64,
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub(super) note: Option<String>,
126    pub(super) pricing_stub: String,
127}
128
129#[derive(Clone, Debug, Serialize, PartialEq)]
130#[serde(rename_all = "camelCase")]
131pub struct UsageDiagnostics {
132    pub scan_all_files: bool,
133    pub scanned_directories: i64,
134    pub skipped_directories: i64,
135    pub read_files: i64,
136    pub skipped_files: i64,
137    pub prefiltered_files: i64,
138    pub read_lines: i64,
139    pub invalid_json_lines: i64,
140    pub token_count_events: i64,
141    pub included_usage_events: i64,
142    pub skipped_events: SkippedEvents,
143    pub file_read_concurrency: i64,
144}
145
146#[derive(Clone, Debug, Default, Serialize, PartialEq)]
147#[serde(rename_all = "camelCase")]
148pub struct SkippedEvents {
149    pub missing_metadata: i64,
150    pub missing_usage: i64,
151    pub empty_usage: i64,
152    pub out_of_range: i64,
153    pub account_mismatch: i64,
154}
155
156impl UsageDiagnostics {
157    pub(super) fn new(file_read_concurrency: i64, scan_all_files: bool) -> Self {
158        Self {
159            scan_all_files,
160            scanned_directories: 0,
161            skipped_directories: 0,
162            read_files: 0,
163            skipped_files: 0,
164            prefiltered_files: 0,
165            read_lines: 0,
166            invalid_json_lines: 0,
167            token_count_events: 0,
168            included_usage_events: 0,
169            skipped_events: SkippedEvents::default(),
170            file_read_concurrency,
171        }
172    }
173
174    pub(super) fn merge_file_scan(&mut self, other: &UsageDiagnostics) {
175        self.prefiltered_files += other.prefiltered_files;
176        self.read_lines += other.read_lines;
177        self.invalid_json_lines += other.invalid_json_lines;
178        self.token_count_events += other.token_count_events;
179        self.included_usage_events += other.included_usage_events;
180        self.skipped_events.missing_metadata += other.skipped_events.missing_metadata;
181        self.skipped_events.missing_usage += other.skipped_events.missing_usage;
182        self.skipped_events.empty_usage += other.skipped_events.empty_usage;
183        self.skipped_events.out_of_range += other.skipped_events.out_of_range;
184        self.skipped_events.account_mismatch += other.skipped_events.account_mismatch;
185    }
186}
187
188#[derive(Clone, Debug)]
189pub(super) struct UsageStatsReport {
190    pub(super) start: DateTime<Utc>,
191    pub(super) end: DateTime<Utc>,
192    pub(super) group_by: StatGroupBy,
193    pub(super) include_reasoning_effort: bool,
194    pub(super) sort_by: Option<StatSort>,
195    pub(super) limit: Option<usize>,
196    pub(super) sessions_dir: String,
197    pub(super) rows: Vec<UsageStatRow>,
198    pub(super) totals: UsageStatRow,
199    pub(super) unpriced_models: Vec<UsageUnpricedModelRow>,
200    pub(super) diagnostics: Option<UsageDiagnostics>,
201}
202
203#[derive(Clone, Debug)]
204pub(super) struct UsageSessionRow {
205    pub(super) session_id: String,
206    pub(super) model: String,
207    pub(super) cwd: String,
208    pub(super) first_seen: DateTime<Utc>,
209    pub(super) last_seen: DateTime<Utc>,
210    pub(super) calls: i64,
211    pub(super) usage: TokenUsage,
212    pub(super) credits: f64,
213    pub(super) usd: f64,
214    pub(super) priced_calls: i64,
215    pub(super) unpriced_calls: i64,
216    pub(super) file_path: String,
217}
218
219#[derive(Clone, Debug)]
220pub(super) struct UsageSessionEventRow {
221    pub(super) timestamp: DateTime<Utc>,
222    pub(super) model: String,
223    pub(super) reasoning_effort: Option<String>,
224    pub(super) cwd: String,
225    pub(super) usage: TokenUsage,
226    pub(super) credits: f64,
227    pub(super) usd: f64,
228    pub(super) priced: bool,
229    pub(super) file_path: String,
230}
231
232#[derive(Clone, Debug)]
233pub(super) struct UsageSessionCompactRow {
234    pub(super) start: DateTime<Utc>,
235    pub(super) end: DateTime<Utc>,
236    pub(super) events: usize,
237    pub(super) model: String,
238    pub(super) reasoning_effort: Option<String>,
239    pub(super) usage: TokenUsage,
240    pub(super) credits: f64,
241    pub(super) usd: f64,
242    pub(super) unpriced_calls: i64,
243}
244
245#[derive(Clone, Debug)]
246pub(super) struct UsageSessionsReport {
247    pub(super) start: DateTime<Utc>,
248    pub(super) end: DateTime<Utc>,
249    pub(super) sort_by: Option<StatSort>,
250    pub(super) limit: usize,
251    pub(super) sessions_dir: String,
252    pub(super) rows: Vec<UsageSessionRow>,
253    pub(super) totals: UsageStatRow,
254    pub(super) unpriced_models: Vec<UsageUnpricedModelRow>,
255    pub(super) diagnostics: Option<UsageDiagnostics>,
256}
257
258#[derive(Clone, Debug)]
259pub(super) struct UsageSessionDetailReport {
260    pub(super) start: DateTime<Utc>,
261    pub(super) end: DateTime<Utc>,
262    pub(super) session_id: String,
263    pub(super) limit: Option<usize>,
264    pub(super) sessions_dir: String,
265    pub(super) summary: Option<UsageSessionRow>,
266    pub(super) rows: Vec<UsageSessionEventRow>,
267    pub(super) by_model: Vec<UsageStatRow>,
268    pub(super) by_cwd: Vec<UsageStatRow>,
269    pub(super) by_reasoning_effort: Vec<UsageStatRow>,
270    pub(super) model_switches: i64,
271    pub(super) cwd_switches: i64,
272    pub(super) reasoning_effort_switches: i64,
273    pub(super) totals: UsageStatRow,
274    pub(super) unpriced_models: Vec<UsageUnpricedModelRow>,
275    pub(super) diagnostics: Option<UsageDiagnostics>,
276}
277
278#[derive(Serialize)]
279#[serde(rename_all = "camelCase")]
280pub(super) struct UsageStatsJson<'a> {
281    start: String,
282    end: String,
283    group_by: &'static str,
284    include_reasoning_effort: bool,
285    #[serde(skip_serializing_if = "Option::is_none")]
286    sort_by: Option<&'static str>,
287    #[serde(skip_serializing_if = "Option::is_none")]
288    limit: Option<usize>,
289    sessions_dir: &'a str,
290    rows: &'a [UsageStatRow],
291    totals: &'a UsageStatRow,
292    unpriced_models: &'a [UsageUnpricedModelRow],
293    warnings: Vec<String>,
294    #[serde(skip_serializing_if = "Option::is_none")]
295    diagnostics: Option<&'a UsageDiagnostics>,
296}
297
298#[derive(Serialize)]
299#[serde(rename_all = "camelCase")]
300pub(super) struct UsageSessionsJson<'a> {
301    start: String,
302    end: String,
303    #[serde(skip_serializing_if = "Option::is_none")]
304    sort_by: Option<&'static str>,
305    limit: usize,
306    sessions_dir: &'a str,
307    rows: Vec<UsageSessionRowJson<'a>>,
308    totals: &'a UsageStatRow,
309    unpriced_models: &'a [UsageUnpricedModelRow],
310    warnings: Vec<String>,
311    #[serde(skip_serializing_if = "Option::is_none")]
312    diagnostics: Option<&'a UsageDiagnostics>,
313}
314
315#[derive(Serialize)]
316#[serde(rename_all = "camelCase")]
317pub(super) struct UsageSessionRowJson<'a> {
318    session_id: &'a str,
319    model: &'a str,
320    cwd: &'a str,
321    first_seen: String,
322    last_seen: String,
323    calls: i64,
324    usage: &'a TokenUsage,
325    credits: f64,
326    usd: f64,
327    priced_calls: i64,
328    unpriced_calls: i64,
329    file_path: &'a str,
330}
331
332#[derive(Serialize)]
333#[serde(rename_all = "camelCase")]
334pub(super) struct UsageSessionDetailJson<'a> {
335    start: String,
336    end: String,
337    session_id: &'a str,
338    #[serde(skip_serializing_if = "Option::is_none")]
339    limit: Option<usize>,
340    sessions_dir: &'a str,
341    #[serde(skip_serializing_if = "Option::is_none")]
342    summary: Option<UsageSessionRowJson<'a>>,
343    rows: Vec<UsageSessionEventRowJson<'a>>,
344    by_model: &'a [UsageStatRow],
345    by_cwd: &'a [UsageStatRow],
346    by_reasoning_effort: &'a [UsageStatRow],
347    model_switches: i64,
348    cwd_switches: i64,
349    reasoning_effort_switches: i64,
350    totals: &'a UsageStatRow,
351    unpriced_models: &'a [UsageUnpricedModelRow],
352    warnings: Vec<String>,
353    #[serde(skip_serializing_if = "Option::is_none")]
354    diagnostics: Option<&'a UsageDiagnostics>,
355}
356
357#[derive(Serialize)]
358#[serde(rename_all = "camelCase")]
359pub(super) struct UsageSessionEventRowJson<'a> {
360    timestamp: String,
361    model: &'a str,
362    #[serde(skip_serializing_if = "Option::is_none")]
363    reasoning_effort: Option<&'a str>,
364    cwd: &'a str,
365    usage: &'a TokenUsage,
366    credits: f64,
367    usd: f64,
368    priced: bool,
369    file_path: &'a str,
370}
371
372pub(super) fn to_usage_stats_json(report: &UsageStatsReport) -> UsageStatsJson<'_> {
373    UsageStatsJson {
374        start: iso_string(report.start),
375        end: iso_string(report.end),
376        group_by: report.group_by.as_str(),
377        include_reasoning_effort: report.include_reasoning_effort,
378        sort_by: report.sort_by.map(StatSort::as_str),
379        limit: report.limit,
380        sessions_dir: &report.sessions_dir,
381        rows: &report.rows,
382        totals: &report.totals,
383        unpriced_models: &report.unpriced_models,
384        warnings: usage_warnings(report.start, report.end, report.diagnostics.as_ref()),
385        diagnostics: report.diagnostics.as_ref(),
386    }
387}
388
389pub(super) fn to_usage_sessions_json(report: &UsageSessionsReport) -> UsageSessionsJson<'_> {
390    UsageSessionsJson {
391        start: iso_string(report.start),
392        end: iso_string(report.end),
393        sort_by: report.sort_by.map(StatSort::as_str),
394        limit: report.limit,
395        sessions_dir: &report.sessions_dir,
396        rows: report.rows.iter().map(to_session_row_json).collect(),
397        totals: &report.totals,
398        unpriced_models: &report.unpriced_models,
399        warnings: usage_warnings(report.start, report.end, report.diagnostics.as_ref()),
400        diagnostics: report.diagnostics.as_ref(),
401    }
402}
403
404pub(super) fn to_usage_session_detail_json(
405    report: &UsageSessionDetailReport,
406) -> UsageSessionDetailJson<'_> {
407    UsageSessionDetailJson {
408        start: iso_string(report.start),
409        end: iso_string(report.end),
410        session_id: &report.session_id,
411        limit: report.limit,
412        sessions_dir: &report.sessions_dir,
413        summary: report.summary.as_ref().map(to_session_row_json),
414        rows: report.rows.iter().map(to_session_event_row_json).collect(),
415        by_model: &report.by_model,
416        by_cwd: &report.by_cwd,
417        by_reasoning_effort: &report.by_reasoning_effort,
418        model_switches: report.model_switches,
419        cwd_switches: report.cwd_switches,
420        reasoning_effort_switches: report.reasoning_effort_switches,
421        totals: &report.totals,
422        unpriced_models: &report.unpriced_models,
423        warnings: usage_warnings(report.start, report.end, report.diagnostics.as_ref()),
424        diagnostics: report.diagnostics.as_ref(),
425    }
426}
427
428fn to_session_row_json(row: &UsageSessionRow) -> UsageSessionRowJson<'_> {
429    UsageSessionRowJson {
430        session_id: &row.session_id,
431        model: &row.model,
432        cwd: &row.cwd,
433        first_seen: iso_string(row.first_seen),
434        last_seen: iso_string(row.last_seen),
435        calls: row.calls,
436        usage: &row.usage,
437        credits: row.credits,
438        usd: row.usd,
439        priced_calls: row.priced_calls,
440        unpriced_calls: row.unpriced_calls,
441        file_path: &row.file_path,
442    }
443}
444
445fn to_session_event_row_json(row: &UsageSessionEventRow) -> UsageSessionEventRowJson<'_> {
446    UsageSessionEventRowJson {
447        timestamp: iso_string(row.timestamp),
448        model: &row.model,
449        reasoning_effort: row.reasoning_effort.as_deref(),
450        cwd: &row.cwd,
451        usage: &row.usage,
452        credits: row.credits,
453        usd: row.usd,
454        priced: row.priced,
455        file_path: &row.file_path,
456    }
457}
458
459pub(super) fn usage_warnings(
460    start: DateTime<Utc>,
461    end: DateTime<Utc>,
462    diagnostics: Option<&UsageDiagnostics>,
463) -> Vec<String> {
464    if should_suggest_full_scan(start, end, diagnostics) {
465        vec![FULL_SCAN_ACCURACY_NOTE.to_string()]
466    } else {
467        Vec::new()
468    }
469}
470
471pub(super) fn should_suggest_full_scan(
472    start: DateTime<Utc>,
473    end: DateTime<Utc>,
474    diagnostics: Option<&UsageDiagnostics>,
475) -> bool {
476    diagnostics
477        .is_some_and(|diagnostics| !diagnostics.scan_all_files && !is_all_usage_range(start, end))
478}
479
480pub(super) fn is_all_usage_range(start: DateTime<Utc>, end: DateTime<Utc>) -> bool {
481    start == local_to_utc(1900, 1, 1, 0, 0, 0, 0)
482        && end == local_to_utc(9999, 12, 31, 23, 59, 59, 999)
483}
484
485pub(super) fn format_report_range(start: DateTime<Utc>, end: DateTime<Utc>) -> String {
486    if is_all_usage_range(start, end) {
487        "all".to_string()
488    } else {
489        format!("{} to {}", format_date_time(start), format_date_time(end))
490    }
491}
492
493pub(super) fn format_group_by(report: &UsageStatsReport) -> String {
494    if report.group_by == StatGroupBy::Model && report.include_reasoning_effort {
495        "model + reasoning_effort".to_string()
496    } else {
497        report.group_by.as_str().to_string()
498    }
499}
500
501pub(super) fn format_date_time(date: DateTime<Utc>) -> String {
502    let local = date.with_timezone(&Local);
503    format!(
504        "{}-{:02}-{:02} {:02}:{:02}:{:02}",
505        local.year(),
506        local.month(),
507        local.day(),
508        local.hour(),
509        local.minute(),
510        local.second()
511    )
512}
513
514fn iso_string(value: DateTime<Utc>) -> String {
515    value.to_rfc3339_opts(SecondsFormat::Millis, true)
516}