Skip to main content

ccboard_core/models/
stats.rs

1//! Stats cache model from ~/.claude/stats-cache.json
2//!
3//! Note: The actual Claude Code stats-cache.json format differs from initial assumptions.
4//! Key fields: dailyActivity (array), dailyModelTokens (array), modelUsage (object),
5//! totalSessions, totalMessages, hourCounts.
6
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Top-level stats cache structure matching actual Claude Code format
11#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12#[serde(rename_all = "camelCase")]
13pub struct StatsCache {
14    /// Version of the stats format
15    #[serde(default)]
16    pub version: u32,
17
18    /// Last computed date (YYYY-MM-DD)
19    #[serde(default)]
20    pub last_computed_date: Option<String>,
21
22    /// Daily activity entries
23    #[serde(default)]
24    pub daily_activity: Vec<DailyActivityEntry>,
25
26    /// Daily model token usage
27    #[serde(default)]
28    pub daily_model_tokens: Vec<DailyModelTokens>,
29
30    /// Model usage breakdown
31    #[serde(default)]
32    pub model_usage: HashMap<String, ModelUsage>,
33
34    /// Total sessions
35    #[serde(default)]
36    pub total_sessions: u64,
37
38    /// Total messages
39    #[serde(default)]
40    pub total_messages: u64,
41
42    /// Longest session info
43    #[serde(default)]
44    pub longest_session: Option<LongestSession>,
45
46    /// First session date
47    #[serde(default)]
48    pub first_session_date: Option<String>,
49
50    /// Hour counts for heatmap (0-23 as strings)
51    #[serde(default)]
52    pub hour_counts: HashMap<String, u64>,
53
54    /// Total speculation time saved in ms
55    #[serde(default)]
56    pub total_speculation_time_saved_ms: u64,
57}
58
59/// Daily activity entry
60#[derive(Debug, Clone, Default, Serialize, Deserialize)]
61#[serde(rename_all = "camelCase")]
62pub struct DailyActivityEntry {
63    pub date: String,
64    #[serde(default)]
65    pub message_count: u64,
66    #[serde(default)]
67    pub session_count: u64,
68    #[serde(default)]
69    pub tool_call_count: u64,
70}
71
72/// Daily model tokens entry
73#[derive(Debug, Clone, Default, Serialize, Deserialize)]
74#[serde(rename_all = "camelCase")]
75pub struct DailyModelTokens {
76    pub date: String,
77    #[serde(default)]
78    pub tokens_by_model: HashMap<String, u64>,
79}
80
81/// Per-model usage statistics
82#[derive(Debug, Clone, Default, Serialize, Deserialize)]
83#[serde(rename_all = "camelCase")]
84pub struct ModelUsage {
85    #[serde(default)]
86    pub input_tokens: u64,
87    #[serde(default)]
88    pub output_tokens: u64,
89    #[serde(default)]
90    pub cache_read_input_tokens: u64,
91    #[serde(default)]
92    pub cache_creation_input_tokens: u64,
93    #[serde(default)]
94    pub web_search_requests: u64,
95    #[serde(default)]
96    pub cost_usd: f64,
97    #[serde(default)]
98    pub context_window: u64,
99    #[serde(default)]
100    pub max_output_tokens: u64,
101}
102
103impl ModelUsage {
104    pub fn total_tokens(&self) -> u64 {
105        self.input_tokens + self.output_tokens
106    }
107
108    pub fn total_with_cache(&self) -> u64 {
109        self.input_tokens
110            + self.output_tokens
111            + self.cache_read_input_tokens
112            + self.cache_creation_input_tokens
113    }
114}
115
116/// Longest session info
117#[derive(Debug, Clone, Default, Serialize, Deserialize)]
118#[serde(rename_all = "camelCase")]
119pub struct LongestSession {
120    #[serde(default)]
121    pub session_id: Option<String>,
122    #[serde(default)]
123    pub message_count: u64,
124    #[serde(default)]
125    pub date: Option<String>,
126}
127
128/// Legacy daily activity format for compatibility
129#[derive(Debug, Clone, Default, Serialize, Deserialize)]
130#[serde(rename_all = "camelCase")]
131pub struct DailyActivity {
132    #[serde(default)]
133    pub tokens: u64,
134    #[serde(default)]
135    pub input_tokens: u64,
136    #[serde(default)]
137    pub output_tokens: u64,
138    #[serde(default)]
139    pub messages: u64,
140    #[serde(default)]
141    pub sessions: u64,
142}
143
144impl StatsCache {
145    /// Calculate total input tokens across all models
146    pub fn total_input_tokens(&self) -> u64 {
147        self.model_usage.values().map(|m| m.input_tokens).sum()
148    }
149
150    /// Calculate total output tokens across all models
151    pub fn total_output_tokens(&self) -> u64 {
152        self.model_usage.values().map(|m| m.output_tokens).sum()
153    }
154
155    /// Calculate total tokens (input + output)
156    pub fn total_tokens(&self) -> u64 {
157        self.total_input_tokens() + self.total_output_tokens()
158    }
159
160    /// Calculate total cache read tokens
161    pub fn total_cache_read_tokens(&self) -> u64 {
162        self.model_usage
163            .values()
164            .map(|m| m.cache_read_input_tokens)
165            .sum()
166    }
167
168    /// Calculate total cache write tokens
169    pub fn total_cache_write_tokens(&self) -> u64 {
170        self.model_usage
171            .values()
172            .map(|m| m.cache_creation_input_tokens)
173            .sum()
174    }
175
176    /// Recalculate costs for all models using accurate pricing
177    ///
178    /// This should be called after loading stats from stats-cache.json to ensure
179    /// cost_usd fields are populated with accurate pricing data.
180    pub fn recalculate_costs(&mut self) {
181        for (model_name, usage) in self.model_usage.iter_mut() {
182            usage.cost_usd = crate::pricing::calculate_cost(
183                model_name,
184                usage.input_tokens,
185                usage.output_tokens,
186                usage.cache_creation_input_tokens,
187                usage.cache_read_input_tokens,
188            );
189        }
190    }
191
192    /// Get session count
193    pub fn session_count(&self) -> u64 {
194        self.total_sessions
195    }
196
197    /// Get message count
198    pub fn message_count(&self) -> u64 {
199        self.total_messages
200    }
201
202    /// Get top N models by token usage
203    pub fn top_models(&self, n: usize) -> Vec<(&str, &ModelUsage)> {
204        let mut models: Vec<_> = self
205            .model_usage
206            .iter()
207            .filter(|(_, usage)| usage.total_tokens() > 0)
208            .map(|(k, v)| (k.as_str(), v))
209            .collect();
210        models.sort_by(|a, b| b.1.total_tokens().cmp(&a.1.total_tokens()));
211        models.truncate(n);
212        models
213    }
214
215    /// Get recent N days of activity
216    pub fn recent_daily(&self, n: usize) -> Vec<&DailyActivityEntry> {
217        let len = self.daily_activity.len();
218        if len <= n {
219            self.daily_activity.iter().collect()
220        } else {
221            self.daily_activity[len - n..].iter().collect()
222        }
223    }
224
225    /// Calculate cache hit ratio
226    pub fn cache_ratio(&self) -> f64 {
227        let cache_read = self.total_cache_read_tokens();
228        let total_input = self.total_input_tokens() + cache_read;
229        if total_input == 0 {
230            return 0.0;
231        }
232        cache_read as f64 / total_input as f64
233    }
234
235    /// Context window size for Sonnet 4.5 (200K tokens)
236    pub const CONTEXT_WINDOW: u64 = 200_000;
237
238    /// Calculate context window saturation from session metadata
239    ///
240    /// NOTE: Requires session metadata to be passed from DataStore
241    /// since StatsCache doesn't have direct access to sessions.
242    pub fn calculate_context_saturation(
243        session_metadata: &[&crate::models::SessionMetadata],
244        last_n: usize,
245    ) -> ContextWindowStats {
246        if session_metadata.is_empty() {
247            return ContextWindowStats::default();
248        }
249
250        // Sort by last_timestamp descending (most recent first)
251        let mut sorted: Vec<_> = session_metadata
252            .iter()
253            .filter(|s| s.last_timestamp.is_some() && s.total_tokens > 0)
254            .collect();
255        sorted.sort_by(|a, b| b.last_timestamp.cmp(&a.last_timestamp));
256
257        // Take last N sessions
258        let recent: Vec<_> = sorted.into_iter().take(last_n).collect();
259
260        if recent.is_empty() {
261            return ContextWindowStats::default();
262        }
263
264        // Calculate saturation percentages (recent is most-recent-first; reverse for chronological)
265        let n = recent.len();
266        let pct_values: Vec<f64> = recent
267            .iter()
268            .rev() // oldest first for regression index ordering
269            .map(|s| (s.total_tokens as f64 / Self::CONTEXT_WINDOW as f64) * 100.0)
270            .collect();
271
272        let total_pct: f64 = pct_values.iter().sum();
273        let avg_pct = total_pct / n as f64;
274        let high_load_count = pct_values.iter().filter(|&&p| p > 85.0).count();
275        let peak_pct = pct_values.iter().cloned().fold(0.0f64, f64::max);
276
277        // Linear regression: slope of saturation over session index
278        // x[i] = i, y[i] = pct_values[i]
279        let trend_slope = if n >= 3 {
280            let n_f = n as f64;
281            let sum_x: f64 = (0..n).map(|i| i as f64).sum();
282            let sum_y: f64 = total_pct;
283            let sum_xy: f64 = pct_values
284                .iter()
285                .enumerate()
286                .map(|(i, &y)| i as f64 * y)
287                .sum();
288            let sum_x2: f64 = (0..n).map(|i| (i * i) as f64).sum();
289            let denom = n_f * sum_x2 - sum_x * sum_x;
290            if denom.abs() > f64::EPSILON {
291                (n_f * sum_xy - sum_x * sum_y) / denom
292            } else {
293                0.0
294            }
295        } else {
296            0.0
297        };
298
299        // Predict sessions until avg crosses 85% (only if trending up and currently below)
300        let sessions_until_high = if trend_slope > 0.1 && avg_pct < 85.0 {
301            let sessions_remaining = (85.0 - avg_pct) / trend_slope;
302            if sessions_remaining > 0.0 && sessions_remaining < 1000.0 {
303                Some(sessions_remaining.ceil() as usize)
304            } else {
305                None
306            }
307        } else {
308            None
309        };
310
311        ContextWindowStats {
312            avg_saturation_pct: avg_pct,
313            high_load_count,
314            peak_saturation_pct: peak_pct,
315            trend_slope,
316            sessions_until_high,
317        }
318    }
319}
320
321/// Context window saturation statistics
322#[derive(Debug, Clone, Default)]
323pub struct ContextWindowStats {
324    /// Average saturation percentage across last N sessions (0.0-100.0)
325    pub avg_saturation_pct: f64,
326
327    /// Count of sessions exceeding 85% saturation (high-load)
328    pub high_load_count: usize,
329
330    /// Peak saturation percentage (max session, for future use)
331    pub peak_saturation_pct: f64,
332
333    /// Linear regression slope over recent sessions (percentage points per session).
334    /// Positive = trending higher, negative = declining.
335    pub trend_slope: f64,
336
337    /// Predicted sessions until avg saturation crosses 85%.
338    /// `None` if slope is flat/negative or already above 85%.
339    pub sessions_until_high: Option<usize>,
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn test_stats_cache_defaults() {
348        let stats = StatsCache::default();
349        assert_eq!(stats.total_tokens(), 0);
350        assert!(stats.model_usage.is_empty());
351    }
352
353    #[test]
354    fn test_model_usage_total() {
355        let usage = ModelUsage {
356            input_tokens: 1000,
357            output_tokens: 500,
358            ..Default::default()
359        };
360        assert_eq!(usage.total_tokens(), 1500);
361    }
362
363    #[test]
364    fn test_cache_ratio() {
365        let mut stats = StatsCache::default();
366        stats.model_usage.insert(
367            "test".into(),
368            ModelUsage {
369                input_tokens: 800,
370                cache_read_input_tokens: 200,
371                ..Default::default()
372            },
373        );
374        assert!((stats.cache_ratio() - 0.2).abs() < 0.001);
375    }
376
377    #[test]
378    fn test_top_models() {
379        let mut stats = StatsCache::default();
380        stats.model_usage.insert(
381            "opus".to_string(),
382            ModelUsage {
383                input_tokens: 1000,
384                output_tokens: 500,
385                ..Default::default()
386            },
387        );
388        stats.model_usage.insert(
389            "sonnet".to_string(),
390            ModelUsage {
391                input_tokens: 2000,
392                output_tokens: 1000,
393                ..Default::default()
394            },
395        );
396
397        let top = stats.top_models(2);
398        assert_eq!(top[0].0, "sonnet");
399        assert_eq!(top[1].0, "opus");
400    }
401
402    #[test]
403    fn test_parse_real_format() {
404        let json = r#"{
405            "version": 2,
406            "lastComputedDate": "2026-01-31",
407            "dailyActivity": [
408                {"date": "2026-01-30", "messageCount": 100, "sessionCount": 5, "toolCallCount": 20}
409            ],
410            "modelUsage": {
411                "claude-opus-4-5": {
412                    "inputTokens": 1000,
413                    "outputTokens": 500,
414                    "cacheReadInputTokens": 200,
415                    "cacheCreationInputTokens": 100
416                }
417            },
418            "totalSessions": 10,
419            "totalMessages": 1000,
420            "hourCounts": {"10": 50, "14": 100}
421        }"#;
422
423        let stats: StatsCache = serde_json::from_str(json).unwrap();
424        assert_eq!(stats.version, 2);
425        assert_eq!(stats.total_sessions, 10);
426        assert_eq!(stats.total_messages, 1000);
427        assert_eq!(stats.daily_activity.len(), 1);
428        assert_eq!(stats.total_input_tokens(), 1000);
429        assert_eq!(stats.total_output_tokens(), 500);
430    }
431
432    #[test]
433    fn test_context_saturation_calculation() {
434        use crate::models::SessionMetadata;
435        use chrono::Utc;
436        use std::path::PathBuf;
437
438        let mut sessions = vec![];
439        let now = Utc::now();
440
441        // Create 5 test sessions with varying token counts
442        for (i, tokens) in [50_000u64, 100_000, 150_000, 170_000, 190_000]
443            .iter()
444            .enumerate()
445        {
446            let mut meta = SessionMetadata::from_path(
447                PathBuf::from(format!("/test{}.jsonl", i)),
448                "test".into(),
449            );
450            meta.total_tokens = *tokens;
451            meta.last_timestamp = Some(now - chrono::Duration::seconds((4 - i) as i64 * 60));
452            sessions.push(meta);
453        }
454
455        let refs: Vec<_> = sessions.iter().collect();
456        let stats = StatsCache::calculate_context_saturation(&refs, 30);
457
458        // Average: (25% + 50% + 75% + 85% + 95%) / 5 = 66%
459        assert!((stats.avg_saturation_pct - 66.0).abs() < 1.0);
460
461        // High-load count (>85%): 1 session (190K tokens = 95%)
462        assert_eq!(stats.high_load_count, 1);
463
464        // Peak saturation: 95%
465        assert!((stats.peak_saturation_pct - 95.0).abs() < 1.0);
466    }
467
468    #[test]
469    fn test_context_saturation_empty_sessions() {
470        let stats = StatsCache::calculate_context_saturation(&[], 30);
471        assert_eq!(stats.avg_saturation_pct, 0.0);
472        assert_eq!(stats.high_load_count, 0);
473    }
474
475    #[test]
476    fn test_context_saturation_fewer_than_requested() {
477        use crate::models::SessionMetadata;
478        use chrono::Utc;
479        use std::path::PathBuf;
480
481        let mut sessions = vec![];
482        let now = Utc::now();
483
484        // Only 3 sessions, requesting last 30
485        for (i, tokens) in [60_000u64, 80_000, 120_000].iter().enumerate() {
486            let mut meta = SessionMetadata::from_path(
487                PathBuf::from(format!("/test{}.jsonl", i)),
488                "test".into(),
489            );
490            meta.total_tokens = *tokens;
491            meta.last_timestamp = Some(now - chrono::Duration::seconds((2 - i) as i64 * 60));
492            sessions.push(meta);
493        }
494
495        let refs: Vec<_> = sessions.iter().collect();
496        let stats = StatsCache::calculate_context_saturation(&refs, 30);
497
498        // Should calculate average of available 3 sessions
499        // (30% + 40% + 60%) / 3 = 43.33%
500        assert!((stats.avg_saturation_pct - 43.33).abs() < 0.1);
501    }
502
503    #[test]
504    fn test_context_saturation_trend_increasing() {
505        use crate::models::SessionMetadata;
506        use chrono::Utc;
507        use std::path::PathBuf;
508
509        // Sessions with sharply increasing saturation: 20%, 30%, 40%, 50%, 60%
510        // avg = 40%, slope ≈ +10% per session → predict ~5 sessions to 85%
511        let now = Utc::now();
512        let tokens_for_pct = |pct: f64| (pct / 100.0 * 200_000.0) as u64;
513        let pcts = [20.0f64, 30.0, 40.0, 50.0, 60.0];
514
515        let sessions: Vec<SessionMetadata> = pcts
516            .iter()
517            .enumerate()
518            .map(|(i, &pct)| {
519                let mut meta = SessionMetadata::from_path(
520                    PathBuf::from(format!("/trend{}.jsonl", i)),
521                    "test".into(),
522                );
523                meta.total_tokens = tokens_for_pct(pct);
524                meta.last_timestamp = Some(now - chrono::Duration::seconds((4 - i) as i64 * 60));
525                meta
526            })
527            .collect();
528
529        let refs: Vec<_> = sessions.iter().collect();
530        let stats = StatsCache::calculate_context_saturation(&refs, 30);
531
532        assert!(stats.trend_slope > 0.0, "slope should be positive");
533        assert!(
534            stats.sessions_until_high.is_some(),
535            "should predict sessions until 85%"
536        );
537        let predicted = stats.sessions_until_high.unwrap();
538        assert!(
539            (3..=8).contains(&predicted),
540            "predicted {} sessions to 85%, expected ~5",
541            predicted
542        );
543    }
544
545    #[test]
546    fn test_context_saturation_trend_flat_no_prediction() {
547        use crate::models::SessionMetadata;
548        use chrono::Utc;
549        use std::path::PathBuf;
550
551        // Flat saturation at 40% — no upward trend, no prediction
552        let now = Utc::now();
553        let tokens = (0.40 * 200_000.0) as u64;
554
555        let sessions: Vec<SessionMetadata> = (0..5)
556            .map(|i| {
557                let mut meta = SessionMetadata::from_path(
558                    PathBuf::from(format!("/flat{}.jsonl", i)),
559                    "test".into(),
560                );
561                meta.total_tokens = tokens;
562                meta.last_timestamp = Some(now - chrono::Duration::seconds((4 - i) as i64 * 60));
563                meta
564            })
565            .collect();
566
567        let refs: Vec<_> = sessions.iter().collect();
568        let stats = StatsCache::calculate_context_saturation(&refs, 30);
569
570        assert!(
571            stats.sessions_until_high.is_none(),
572            "flat trend should not predict a breach"
573        );
574    }
575}