Skip to main content

ccboard_core/analytics/
mod.rs

1//! Advanced analytics module for Claude Code usage analysis
2//!
3//! Provides time series trends, forecasting, usage pattern detection,
4//! and actionable insights to optimize costs and productivity.
5
6use chrono::{DateTime, Utc};
7use std::sync::Arc;
8
9use crate::models::config::AnomalyThresholds;
10use crate::models::session::SessionMetadata;
11
12pub mod anomalies;
13pub mod discover;
14pub mod discover_llm;
15pub mod forecasting;
16pub mod insights;
17pub mod optimization;
18pub mod patterns;
19pub mod plugin_usage;
20pub mod tool_chains;
21pub mod trends;
22
23#[cfg(test)]
24mod tests;
25
26pub use anomalies::{
27    detect_anomalies, detect_daily_cost_spikes, Anomaly, AnomalyMetric, AnomalySeverity,
28    DailyCostAnomaly,
29};
30pub use discover::{
31    collect_sessions_data as discover_collect_sessions, discover_patterns, run_discover,
32    DiscoverConfig, DiscoverSuggestion, SessionData as DiscoverSessionData, SuggestionCategory,
33};
34pub use discover_llm::{call_claude_cli as discover_call_llm, LlmSuggestion};
35pub use forecasting::{forecast_usage, ForecastData, TrendDirection};
36pub use insights::{generate_budget_alerts, generate_insights, Alert};
37pub use optimization::{
38    generate_cost_suggestions, generate_model_recommendations, CostSuggestion, OptimizationCategory,
39};
40pub use patterns::{detect_patterns, UsagePatterns};
41pub use plugin_usage::{aggregate_plugin_usage, PluginAnalytics, PluginType, PluginUsage};
42pub use tool_chains::{analyze_tool_chains, ToolChain, ToolChainAnalysis};
43pub use trends::{compute_trends, SessionDurationStats, TrendsData};
44
45/// Period selection for analytics computation
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
47pub enum Period {
48    /// Last N days from now
49    Days(usize),
50    /// All loaded sessions (honest: not "all time", limited by DataStore)
51    Available,
52}
53
54impl Period {
55    /// Last 7 days
56    pub fn last_7d() -> Self {
57        Self::Days(7)
58    }
59
60    /// Last 30 days
61    pub fn last_30d() -> Self {
62        Self::Days(30)
63    }
64
65    /// Last 90 days
66    pub fn last_90d() -> Self {
67        Self::Days(90)
68    }
69
70    /// All available sessions
71    pub fn available() -> Self {
72        Self::Available
73    }
74
75    /// Convert to days (for filtering)
76    pub fn days(&self) -> usize {
77        match self {
78            Period::Days(n) => *n,
79            Period::Available => 36500, // 100 years (effectively all)
80        }
81    }
82
83    /// Display label (shows loaded count for Available)
84    pub fn display(&self, total_loaded: usize) -> String {
85        match self {
86            Period::Days(n) => format!("Last {} days", n),
87            Period::Available => format!("All loaded ({} sessions)", total_loaded),
88        }
89    }
90}
91
92/// Per-tool token and cost attribution for a period
93#[derive(Debug, Clone)]
94pub struct ToolTokenStat {
95    pub tool_name: String,
96    pub call_count: usize,
97    pub tokens: u64,
98    /// Fraction of total tool tokens (0.0..1.0)
99    pub pct_of_total: f64,
100    /// Estimated cost proportional to period total cost
101    pub est_cost_usd: f64,
102    /// est_cost_usd / call_count (0 if call_count == 0)
103    pub cost_per_call: f64,
104}
105
106/// Complete analytics data for a period
107#[derive(Debug, Clone)]
108pub struct AnalyticsData {
109    /// Time series trends
110    pub trends: TrendsData,
111    /// Usage forecasting
112    pub forecast: ForecastData,
113    /// Behavioral patterns
114    pub patterns: UsagePatterns,
115    /// Actionable insights
116    pub insights: Vec<String>,
117    /// Tool chain bigram/trigram analysis
118    pub tool_chains: Option<ToolChainAnalysis>,
119    /// Cost optimization suggestions
120    pub cost_suggestions: Vec<optimization::CostSuggestion>,
121    /// Session-level anomalies (Z-score based)
122    pub anomalies: Vec<anomalies::Anomaly>,
123    /// Daily cost spikes
124    pub daily_spikes: Vec<anomalies::DailyCostAnomaly>,
125    /// Per-tool token and cost breakdown for the period
126    pub tool_token_stats: Vec<ToolTokenStat>,
127    /// Number of sessions in the analyzed period
128    pub sessions_in_period: usize,
129    /// Timestamp of computation
130    pub computed_at: DateTime<Utc>,
131    /// Period analyzed
132    pub period: Period,
133    /// Effective anomaly thresholds used for this computation
134    pub anomaly_thresholds: AnomalyThresholds,
135}
136
137impl AnalyticsData {
138    /// Compute analytics from sessions (sync function)
139    ///
140    /// This is a sync function for simplicity. If computation exceeds 16ms
141    /// (render deadline), caller should offload to `tokio::task::spawn_blocking`.
142    ///
143    /// # Performance
144    /// Target: <100ms for 1000 sessions over 30 days
145    pub fn compute(sessions: &[Arc<SessionMetadata>], period: Period) -> Self {
146        Self::compute_inner(sessions, period, &AnomalyThresholds::default())
147    }
148
149    /// Compute analytics using custom anomaly thresholds from settings.json.
150    pub fn compute_with_thresholds(
151        sessions: &[Arc<SessionMetadata>],
152        period: Period,
153        thresholds: &AnomalyThresholds,
154    ) -> Self {
155        Self::compute_inner(sessions, period, thresholds)
156    }
157
158    fn compute_inner(
159        sessions: &[Arc<SessionMetadata>],
160        period: Period,
161        thresholds: &AnomalyThresholds,
162    ) -> Self {
163        use chrono::Local;
164
165        let trends = compute_trends(sessions, period.days());
166        let forecast = forecast_usage(&trends);
167        let patterns = detect_patterns(sessions, period.days());
168        let insights = generate_insights(&trends, &patterns, &forecast);
169
170        let cutoff = Local::now() - chrono::Duration::days(period.days() as i64);
171        let period_sessions: Vec<Arc<SessionMetadata>> = sessions
172            .iter()
173            .filter(|s| {
174                s.first_timestamp
175                    .map(|ts| ts.with_timezone(&Local) >= cutoff)
176                    .unwrap_or(false)
177            })
178            .cloned()
179            .collect();
180
181        let sessions_in_period = period_sessions.len();
182        let anomalies_detected =
183            anomalies::detect_anomalies_with_thresholds(&period_sessions, thresholds);
184        let daily_spikes_detected = anomalies::detect_daily_cost_spikes_with_thresholds(
185            &period_sessions,
186            period.days(),
187            thresholds,
188        );
189
190        // Aggregate per-tool token usage across all sessions
191        let mut aggregated_tool_tokens: std::collections::HashMap<String, u64> =
192            std::collections::HashMap::new();
193        for session in sessions {
194            for (tool, &tokens) in &session.tool_token_usage {
195                *aggregated_tool_tokens.entry(tool.clone()).or_default() += tokens;
196            }
197        }
198
199        // Aggregate per-tool call counts and token usage for the period only
200        let mut period_tool_calls: std::collections::HashMap<String, usize> =
201            std::collections::HashMap::new();
202        let mut period_tool_tokens: std::collections::HashMap<String, u64> =
203            std::collections::HashMap::new();
204        for session in &period_sessions {
205            for (tool, &calls) in &session.tool_usage {
206                *period_tool_calls.entry(tool.clone()).or_default() += calls;
207            }
208            for (tool, &tokens) in &session.tool_token_usage {
209                *period_tool_tokens.entry(tool.clone()).or_default() += tokens;
210            }
211        }
212
213        // Estimate period cost from trend data
214        let total_cost_estimate: f64 = trends.daily_cost.iter().sum();
215
216        // Build per-tool stats sorted by token usage descending
217        let total_tool_tokens: u64 = period_tool_tokens.values().sum();
218        let mut tool_token_stats: Vec<ToolTokenStat> = period_tool_tokens
219            .iter()
220            .map(|(name, &tokens)| {
221                let pct = if total_tool_tokens > 0 {
222                    tokens as f64 / total_tool_tokens as f64
223                } else {
224                    0.0
225                };
226                let est_cost = total_cost_estimate * pct;
227                let calls = *period_tool_calls.get(name).unwrap_or(&0);
228                let cost_per_call = if calls > 0 {
229                    est_cost / calls as f64
230                } else {
231                    0.0
232                };
233                ToolTokenStat {
234                    tool_name: name.clone(),
235                    call_count: calls,
236                    tokens,
237                    pct_of_total: pct,
238                    est_cost_usd: est_cost,
239                    cost_per_call,
240                }
241            })
242            .collect();
243        tool_token_stats.sort_by(|a, b| b.tokens.cmp(&a.tokens));
244
245        // Generate cost suggestions (plugin_analytics populated with empty data here;
246        // full plugin analytics with dead-code detection requires skill/command lists
247        // which are provided by DataStore when calling the analytics tab)
248        let mut cost_suggestions = optimization::generate_cost_suggestions(
249            &plugin_usage::PluginAnalytics::empty(),
250            &aggregated_tool_tokens,
251            total_cost_estimate,
252        );
253
254        // Append model downgrade recommendations
255        let model_recs =
256            optimization::generate_model_recommendations(sessions, total_cost_estimate);
257        cost_suggestions.extend(model_recs);
258        // Re-sort by potential savings descending after merge
259        cost_suggestions.sort_by(|a, b| {
260            b.potential_savings
261                .partial_cmp(&a.potential_savings)
262                .unwrap_or(std::cmp::Ordering::Equal)
263        });
264
265        Self {
266            trends,
267            forecast,
268            patterns,
269            insights,
270            tool_chains: Some(analyze_tool_chains(sessions)),
271            cost_suggestions,
272            anomalies: anomalies_detected,
273            daily_spikes: daily_spikes_detected,
274            tool_token_stats,
275            sessions_in_period,
276            computed_at: Utc::now(),
277            period,
278            anomaly_thresholds: thresholds.clone(),
279        }
280    }
281
282    /// Graceful fallback if stats-cache.json missing
283    ///
284    /// Cost forecasting requires pricing data from StatsCache.
285    /// If unavailable, returns limited analytics with warning.
286    pub fn from_sessions_only(sessions: &[Arc<SessionMetadata>], period: Period) -> Self {
287        tracing::warn!("Stats cache missing, computing analytics from sessions only");
288
289        Self {
290            trends: compute_trends(sessions, period.days()),
291            forecast: ForecastData::unavailable("Stats cache required for cost forecasting"),
292            patterns: detect_patterns(sessions, period.days()),
293            insights: vec!["Limited insights: stats cache unavailable".to_string()],
294            tool_chains: Some(analyze_tool_chains(sessions)),
295            cost_suggestions: Vec::new(),
296            anomalies: Vec::new(),
297            daily_spikes: Vec::new(),
298            tool_token_stats: Vec::new(),
299            sessions_in_period: sessions.len(),
300            computed_at: Utc::now(),
301            period,
302            anomaly_thresholds: AnomalyThresholds::default(),
303        }
304    }
305}