Skip to main content

entelix_core/ir/
usage.rs

1//! `Usage` — token and safety accounting reported by the provider.
2
3use serde::{Deserialize, Serialize};
4
5use crate::ir::safety::SafetyRating;
6
7/// Per-call accounting from the vendor.
8///
9/// Token fields are `u32` with default `0` — every shipping codec
10/// populates them, and `0` is the natural "no cache hit" /
11/// "no reasoning" value. Whether the field is *meaningful* for the
12/// (codec, model) pair is governed by [`Capabilities`](crate::ir::Capabilities)
13/// flags, not by an option/null distinction here.
14#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
15#[non_exhaustive]
16pub struct Usage {
17    /// Tokens consumed from the prompt this call.
18    pub input_tokens: u32,
19    /// Tokens produced as output (assistant content).
20    pub output_tokens: u32,
21    /// Tokens served from the prompt cache (typically discounted).
22    #[serde(default)]
23    pub cached_input_tokens: u32,
24    /// Tokens written to the prompt cache (typically billed at a premium).
25    #[serde(default)]
26    pub cache_creation_input_tokens: u32,
27    /// Tokens spent on internal reasoning (Anthropic thinking, OpenAI
28    /// o-series reasoning, Gemini thinking budget).
29    #[serde(default)]
30    pub reasoning_tokens: u32,
31    /// Per-category safety scores reported by the vendor (Gemini today;
32    /// empty otherwise).
33    #[serde(default)]
34    pub safety_ratings: Vec<SafetyRating>,
35}
36
37impl Usage {
38    /// Construct a `Usage` from the two universally-populated token
39    /// counts; cache, reasoning, and safety fields stay at their
40    /// defaults (`0` / `Vec::new()`). Use the `with_*` setters to
41    /// override the rest.
42    #[must_use]
43    pub fn new(input_tokens: u32, output_tokens: u32) -> Self {
44        Self {
45            input_tokens,
46            output_tokens,
47            ..Self::default()
48        }
49    }
50
51    /// Override `cached_input_tokens` (prompt-cache reads).
52    #[must_use]
53    pub const fn with_cached_input_tokens(mut self, tokens: u32) -> Self {
54        self.cached_input_tokens = tokens;
55        self
56    }
57
58    /// Override `cache_creation_input_tokens` (prompt-cache writes).
59    #[must_use]
60    pub const fn with_cache_creation_input_tokens(mut self, tokens: u32) -> Self {
61        self.cache_creation_input_tokens = tokens;
62        self
63    }
64
65    /// Override `reasoning_tokens` (Anthropic thinking, OpenAI o-series
66    /// reasoning, Gemini thinking budget).
67    #[must_use]
68    pub const fn with_reasoning_tokens(mut self, tokens: u32) -> Self {
69        self.reasoning_tokens = tokens;
70        self
71    }
72
73    /// Attach the vendor-reported per-category safety ratings.
74    #[must_use]
75    pub fn with_safety_ratings(mut self, ratings: Vec<SafetyRating>) -> Self {
76        self.safety_ratings = ratings;
77        self
78    }
79
80    /// Billable input tokens — fresh prompt input plus cache writes (which
81    /// vendors typically charge at a premium). Cache *reads* are excluded
82    /// because vendors discount them, often heavily.
83    #[must_use]
84    pub const fn billable_input(&self) -> u32 {
85        self.input_tokens
86            .saturating_add(self.cache_creation_input_tokens)
87    }
88
89    /// Sum of input + output tokens (rough cost proxy when no per-bucket
90    /// pricing is configured).
91    #[must_use]
92    pub const fn total(&self) -> u32 {
93        self.input_tokens.saturating_add(self.output_tokens)
94    }
95}