Skip to main content

lean_ctx/core/stats/
model.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Persistent store for all-time token savings, command stats, and daily history.
5#[derive(Serialize, Deserialize, Default, Clone)]
6pub struct StatsStore {
7    pub total_commands: u64,
8    pub total_input_tokens: u64,
9    pub total_output_tokens: u64,
10    pub first_use: Option<String>,
11    pub last_use: Option<String>,
12    pub commands: HashMap<String, CommandStats>,
13    pub daily: Vec<DayStats>,
14    #[serde(default)]
15    pub cep: CepStats,
16}
17
18/// Aggregated CEP (Cognitive Efficiency Protocol) metrics across sessions.
19#[derive(Serialize, Deserialize, Clone, Default)]
20pub struct CepStats {
21    pub sessions: u64,
22    pub total_cache_hits: u64,
23    pub total_cache_reads: u64,
24    pub total_tokens_original: u64,
25    pub total_tokens_compressed: u64,
26    pub modes: HashMap<String, u64>,
27    pub scores: Vec<CepSessionSnapshot>,
28    #[serde(default)]
29    pub last_session_pid: Option<u32>,
30    #[serde(default)]
31    pub last_session_original: Option<u64>,
32    #[serde(default)]
33    pub last_session_compressed: Option<u64>,
34}
35
36/// Point-in-time snapshot of CEP scores for a single session.
37#[derive(Serialize, Deserialize, Clone)]
38pub struct CepSessionSnapshot {
39    pub timestamp: String,
40    pub score: u32,
41    pub cache_hit_rate: u32,
42    pub mode_diversity: u32,
43    pub compression_rate: u32,
44    pub tool_calls: u64,
45    pub tokens_saved: u64,
46    pub complexity: String,
47}
48
49/// Per-command token statistics: invocation count and input/output totals.
50#[derive(Serialize, Deserialize, Clone, Default, Debug)]
51pub struct CommandStats {
52    pub count: u64,
53    pub input_tokens: u64,
54    pub output_tokens: u64,
55}
56
57/// Daily aggregate: command count and token totals for one calendar day.
58#[derive(Serialize, Deserialize, Clone)]
59pub struct DayStats {
60    pub date: String,
61    pub commands: u64,
62    pub input_tokens: u64,
63    pub output_tokens: u64,
64}
65
66/// High-level token savings summary for display.
67pub struct GainSummary {
68    pub total_saved: u64,
69    pub total_calls: u64,
70}
71
72/// Average LLM pricing per 1M tokens (blended across Claude, GPT, Gemini).
73pub const DEFAULT_INPUT_PRICE_PER_M: f64 = 2.50;
74pub const DEFAULT_OUTPUT_PRICE_PER_M: f64 = 10.0;
75
76/// LLM pricing model for estimating dollar savings from token compression.
77pub struct CostModel {
78    pub input_price_per_m: f64,
79    pub output_price_per_m: f64,
80    pub avg_verbose_output_per_call: u64,
81    pub avg_concise_output_per_call: u64,
82}
83
84impl Default for CostModel {
85    fn default() -> Self {
86        let env_model = std::env::var("LEAN_CTX_MODEL")
87            .or_else(|_| std::env::var("LCTX_MODEL"))
88            .ok();
89        let pricing = crate::core::gain::model_pricing::ModelPricing::load();
90        let quote = pricing.quote(env_model.as_deref());
91        Self {
92            input_price_per_m: quote.cost.input_per_m,
93            output_price_per_m: quote.cost.output_per_m,
94            avg_verbose_output_per_call: 180,
95            avg_concise_output_per_call: 120,
96        }
97    }
98}
99
100/// Detailed cost comparison: with vs. without lean-ctx compression.
101pub struct CostBreakdown {
102    pub input_cost_without: f64,
103    pub input_cost_with: f64,
104    pub output_cost_without: f64,
105    pub output_cost_with: f64,
106    pub total_cost_without: f64,
107    pub total_cost_with: f64,
108    pub total_saved: f64,
109    pub estimated_output_tokens_without: u64,
110    pub estimated_output_tokens_with: u64,
111    pub output_tokens_saved: u64,
112}
113
114impl CostModel {
115    /// Calculates the full cost breakdown from the stats store.
116    pub fn calculate(&self, store: &StatsStore) -> CostBreakdown {
117        let input_cost_without =
118            store.total_input_tokens as f64 / 1_000_000.0 * self.input_price_per_m;
119        let input_cost_with =
120            store.total_output_tokens as f64 / 1_000_000.0 * self.input_price_per_m;
121
122        let input_saved = store
123            .total_input_tokens
124            .saturating_sub(store.total_output_tokens);
125        let compression_rate = if store.total_input_tokens > 0 {
126            input_saved as f64 / store.total_input_tokens as f64
127        } else {
128            0.0
129        };
130        let est_output_without = store.total_commands * self.avg_verbose_output_per_call;
131        let est_output_with = if compression_rate > 0.01 {
132            store.total_commands * self.avg_concise_output_per_call
133        } else {
134            est_output_without
135        };
136        let output_saved = est_output_without.saturating_sub(est_output_with);
137
138        let output_cost_without = est_output_without as f64 / 1_000_000.0 * self.output_price_per_m;
139        let output_cost_with = est_output_with as f64 / 1_000_000.0 * self.output_price_per_m;
140
141        let total_without = input_cost_without + output_cost_without;
142        let total_with = input_cost_with + output_cost_with;
143
144        CostBreakdown {
145            input_cost_without,
146            input_cost_with,
147            output_cost_without,
148            output_cost_with,
149            total_cost_without: total_without,
150            total_cost_with: total_with,
151            total_saved: total_without - total_with,
152            estimated_output_tokens_without: est_output_without,
153            estimated_output_tokens_with: est_output_with,
154            output_tokens_saved: output_saved,
155        }
156    }
157}