Skip to main content

ccboard_core/analytics/
forecasting.rs

1//! Usage forecasting with linear regression
2//!
3//! Predicts future token usage and costs based on historical trends,
4//! with R² confidence metric to assess prediction reliability.
5
6use super::trends::TrendsData;
7
8/// Forecast data with predictions
9#[derive(Debug, Clone)]
10pub struct ForecastData {
11    /// Predicted tokens for next 30 days
12    pub next_30_days_tokens: u64,
13    /// Predicted cost for next 30 days
14    pub next_30_days_cost: f64,
15    /// Monthly cost estimate (extrapolated)
16    pub monthly_cost_estimate: f64,
17    /// Confidence (R² coefficient, 0.0-1.0)
18    pub confidence: f64,
19    /// Trend direction
20    pub trend_direction: TrendDirection,
21    /// Reason if unavailable
22    pub unavailable_reason: Option<String>,
23}
24
25/// Trend direction with percentage change
26#[derive(Debug, Clone)]
27pub enum TrendDirection {
28    /// Increasing trend (percentage)
29    Up(f64),
30    /// Decreasing trend (percentage)
31    Down(f64),
32    /// Stable trend (<10% change)
33    Stable,
34}
35
36impl ForecastData {
37    /// Create unavailable forecast with reason
38    pub fn unavailable(reason: &str) -> Self {
39        Self {
40            next_30_days_tokens: 0,
41            next_30_days_cost: 0.0,
42            monthly_cost_estimate: 0.0,
43            confidence: 0.0,
44            trend_direction: TrendDirection::Stable,
45            unavailable_reason: Some(reason.to_string()),
46        }
47    }
48}
49
50/// Forecast usage with linear regression
51///
52/// Uses simple linear regression (y = slope * x + intercept) to extrapolate
53/// token usage 30 days into the future. Confidence is measured using R²
54/// (coefficient of determination), not sample size.
55///
56/// # Performance
57/// Target: <20ms
58///
59/// # Returns
60/// - `ForecastData::unavailable()` if <7 days of data
61/// - `ForecastData` with R² confidence otherwise
62pub fn forecast_usage(trends: &TrendsData) -> ForecastData {
63    if trends.dates.len() < 7 {
64        return ForecastData::unavailable("Insufficient data (<7 days)");
65    }
66
67    // Prepare data points (x = day index, y = tokens)
68    let points: Vec<_> = trends
69        .daily_tokens
70        .iter()
71        .enumerate()
72        .map(|(i, &tokens)| (i as f64, tokens as f64))
73        .collect();
74
75    // Linear regression: y = slope * x + intercept
76    let (slope, intercept, r_squared) = linear_regression(&points);
77
78    // R² = coefficient of determination (0.0-1.0)
79    // 1.0 = perfect fit, 0.0 = no correlation
80    let confidence = r_squared.clamp(0.0, 1.0);
81
82    // Extrapolate 30 days ahead
83    let last_x = points.len() as f64;
84    let next_30_x = last_x + 30.0;
85    let next_30_days_tokens = (slope * next_30_x + intercept).max(0.0) as u64;
86
87    // Estimate cost (using current avg cost/token)
88    let total_cost: f64 = trends.daily_cost.iter().sum();
89    let total_tokens: u64 = trends.daily_tokens.iter().sum();
90    let cost_per_token = if total_tokens > 0 {
91        total_cost / total_tokens as f64
92    } else {
93        0.01 / 1000.0 // Default: $0.01/1K tokens
94    };
95    let next_30_days_cost = next_30_days_tokens as f64 * cost_per_token;
96
97    // Monthly estimate (extrapolate to 30 days from current average)
98    let days_in_period = trends.dates.len() as f64;
99    let monthly_cost_estimate = (total_cost / days_in_period) * 30.0;
100
101    // Trend direction (slope-based)
102    let trend_direction = if slope.abs() < 0.01 * intercept.abs() {
103        TrendDirection::Stable
104    } else if slope > 0.0 {
105        let increase_pct = (slope * 30.0 / intercept.abs() * 100.0).abs();
106        TrendDirection::Up(increase_pct)
107    } else {
108        let decrease_pct = (slope * 30.0 / intercept.abs() * 100.0).abs();
109        TrendDirection::Down(decrease_pct)
110    };
111
112    ForecastData {
113        next_30_days_tokens,
114        next_30_days_cost,
115        monthly_cost_estimate,
116        confidence,
117        trend_direction,
118        unavailable_reason: None,
119    }
120}
121
122/// Simple linear regression with R² calculation
123///
124/// Computes the best-fit line y = slope * x + intercept and R² coefficient.
125///
126/// # Returns
127/// (slope, intercept, r_squared)
128fn linear_regression(points: &[(f64, f64)]) -> (f64, f64, f64) {
129    let n = points.len() as f64;
130    let sum_x: f64 = points.iter().map(|p| p.0).sum();
131    let sum_y: f64 = points.iter().map(|p| p.1).sum();
132    let sum_xx: f64 = points.iter().map(|p| p.0 * p.0).sum();
133    let sum_xy: f64 = points.iter().map(|p| p.0 * p.1).sum();
134
135    // Slope and intercept
136    let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x);
137    let intercept = (sum_y - slope * sum_x) / n;
138
139    // R² (coefficient of determination)
140    let mean_y = sum_y / n;
141    let ss_tot: f64 = points.iter().map(|p| (p.1 - mean_y).powi(2)).sum();
142    let ss_res: f64 = points
143        .iter()
144        .map(|p| {
145            let predicted = slope * p.0 + intercept;
146            (p.1 - predicted).powi(2)
147        })
148        .sum();
149
150    let r_squared = if ss_tot > 0.0 {
151        1.0 - (ss_res / ss_tot)
152    } else {
153        0.0
154    };
155
156    (slope, intercept, r_squared)
157}