Skip to main content

ccboard_types/analytics/
patterns.rs

1//! Usage pattern detection
2//!
3//! Identifies behavioral patterns: peak hours, productive days,
4//! model distribution, and session duration analytics.
5
6use chrono::{Datelike, Timelike, Weekday};
7use std::collections::HashMap;
8use std::sync::Arc;
9use std::time::Duration;
10
11use crate::models::session::SessionMetadata;
12
13/// Usage patterns
14#[derive(Debug, Clone)]
15pub struct UsagePatterns {
16    /// Most productive hour (0-23)
17    pub most_productive_hour: u8,
18    /// Most productive weekday
19    pub most_productive_day: Weekday,
20    /// Average session duration
21    pub avg_session_duration: Duration,
22    /// Most used model (by token count)
23    pub most_used_model: String,
24    /// Model distribution by token count (percentages)
25    pub model_distribution: HashMap<String, f64>,
26    /// Model distribution by cost (percentages)
27    pub model_cost_distribution: HashMap<String, f64>,
28    /// Peak hours (above 80th percentile)
29    pub peak_hours: Vec<u8>,
30    /// Hourly distribution (sessions per hour, 0-23)
31    pub hourly_distribution: [usize; 24],
32    /// Weekday distribution (sessions per weekday, 0-6)
33    pub weekday_distribution: [usize; 7],
34    /// Activity heatmap: [weekday][hour] = session count
35    /// weekday: 0-6 (Mon-Sun), hour: 0-23
36    pub activity_heatmap: [[usize; 24]; 7],
37    /// Tool usage statistics: tool name -> call count
38    pub tool_usage: HashMap<String, usize>,
39}
40
41impl UsagePatterns {
42    /// Empty placeholder
43    pub fn empty() -> Self {
44        Self {
45            most_productive_hour: 0,
46            most_productive_day: Weekday::Mon,
47            avg_session_duration: Duration::from_secs(0),
48            most_used_model: "unknown".to_string(),
49            model_distribution: HashMap::new(),
50            model_cost_distribution: HashMap::new(),
51            peak_hours: Vec::new(),
52            hourly_distribution: [0; 24],
53            weekday_distribution: [0; 7],
54            activity_heatmap: [[0; 24]; 7],
55            tool_usage: HashMap::new(),
56        }
57    }
58}
59
60/// Estimate cost from session (same as trends.rs)
61///
62/// TODO: Deduplicate with trends.rs estimate_cost()
63fn estimate_cost(session: &SessionMetadata) -> f64 {
64    (session.total_tokens as f64 / 1000.0) * 0.01
65}
66
67/// Detect usage patterns
68///
69/// Analyzes hourly/weekday distributions, model usage (token + cost weighted),
70/// session duration, and peak hours (80th percentile threshold).
71///
72/// # Performance
73/// Target: <30ms for 1000 sessions
74///
75/// # Graceful Degradation
76/// - Empty sessions: Returns UsagePatterns::empty()
77/// - Missing timestamps: Session skipped with warning
78/// - No duration data: avg_session_duration = 0
79pub fn detect_patterns(sessions: &[Arc<SessionMetadata>], days: usize) -> UsagePatterns {
80    use chrono::Local;
81
82    if sessions.is_empty() {
83        return UsagePatterns::empty();
84    }
85
86    let mut hourly_counts = [0usize; 24];
87    let mut weekday_counts = [0usize; 7];
88    let mut activity_heatmap = [[0usize; 24]; 7];
89    let mut tool_usage: HashMap<String, usize> = HashMap::new();
90    let mut total_duration = Duration::from_secs(0);
91    let mut duration_count = 0usize;
92    let mut model_tokens: HashMap<String, u64> = HashMap::new();
93    let mut model_costs: HashMap<String, f64> = HashMap::new();
94
95    // Filter by period (same logic as compute_trends)
96    let now = Local::now();
97    let cutoff = now - chrono::Duration::days(days as i64);
98
99    for session in sessions {
100        // Apply period filter check
101        let passes_filter = if let Some(ts) = session.first_timestamp {
102            let local_ts = ts.with_timezone(&Local);
103            local_ts >= cutoff
104        } else {
105            false
106        };
107
108        if !passes_filter {
109            continue;
110        }
111
112        // Hourly distribution & heatmap
113        if let Some(ts) = session.first_timestamp {
114            let local_ts = ts.with_timezone(&Local);
115            let hour = local_ts.hour() as usize;
116            let weekday = local_ts.weekday().num_days_from_monday() as usize;
117
118            hourly_counts[hour] += 1;
119            weekday_counts[weekday] += 1;
120            activity_heatmap[weekday][hour] += 1;
121        }
122
123        // Tool usage stats - extracted from session metadata
124        for (tool_name, count) in &session.tool_usage {
125            *tool_usage.entry(tool_name.clone()).or_default() += count;
126        }
127
128        // Session duration
129        if let (Some(start), Some(end)) = (session.first_timestamp, session.last_timestamp) {
130            if let Ok(duration) = (end - start).to_std() {
131                total_duration += duration;
132                duration_count += 1;
133            }
134        }
135
136        // Model distribution (tokens + cost)
137        // Divide tokens equally among models used in this session
138        if session.models_used.is_empty() {
139            // No model info: attribute to "unknown"
140            *model_tokens.entry("unknown".to_string()).or_default() += session.total_tokens;
141            *model_costs.entry("unknown".to_string()).or_default() += estimate_cost(session);
142        } else {
143            let models_count = session.models_used.len() as u64;
144            let tokens_per_model = session.total_tokens / models_count;
145            let cost = estimate_cost(session);
146            let cost_per_model = cost / models_count as f64;
147
148            for model in &session.models_used {
149                *model_tokens.entry(model.clone()).or_default() += tokens_per_model;
150                *model_costs.entry(model.clone()).or_default() += cost_per_model;
151            }
152        }
153    }
154
155    // Most productive hour
156    let most_productive_hour = hourly_counts
157        .iter()
158        .enumerate()
159        .max_by_key(|(_, count)| *count)
160        .map(|(hour, _)| hour as u8)
161        .unwrap_or(0);
162
163    // Most productive day
164    let most_productive_day = weekday_counts
165        .iter()
166        .enumerate()
167        .max_by_key(|(_, count)| *count)
168        .and_then(|(idx, _)| Weekday::try_from(idx as u8).ok())
169        .unwrap_or(Weekday::Mon);
170
171    // Average duration
172    let avg_session_duration = if duration_count > 0 {
173        total_duration / duration_count as u32
174    } else {
175        Duration::from_secs(0)
176    };
177
178    // Peak hours (80th percentile threshold)
179    let total_sessions: usize = hourly_counts.iter().sum();
180    let threshold = (total_sessions as f64 * 0.8 / 24.0) as usize;
181    let peak_hours: Vec<u8> = hourly_counts
182        .iter()
183        .enumerate()
184        .filter(|(_, count)| **count > threshold)
185        .map(|(hour, _)| hour as u8)
186        .collect();
187
188    // Model distribution (by tokens)
189    let total_tokens: u64 = model_tokens.values().sum();
190    let model_distribution: HashMap<String, f64> = if total_tokens > 0 {
191        model_tokens
192            .into_iter()
193            .map(|(model, tokens)| (model, tokens as f64 / total_tokens as f64))
194            .collect()
195    } else {
196        HashMap::new()
197    };
198
199    // Most used model
200    let most_used_model = model_distribution
201        .iter()
202        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
203        .map(|(model, _)| model.clone())
204        .unwrap_or_else(|| "unknown".to_string());
205
206    // Model cost distribution (NEW: cost-weighted)
207    let total_cost: f64 = model_costs.values().sum();
208    let model_cost_distribution: HashMap<String, f64> = if total_cost > 0.0 {
209        model_costs
210            .into_iter()
211            .map(|(model, cost)| (model, cost / total_cost))
212            .collect()
213    } else {
214        HashMap::new()
215    };
216
217    UsagePatterns {
218        most_productive_hour,
219        most_productive_day,
220        avg_session_duration,
221        most_used_model,
222        model_distribution,
223        model_cost_distribution,
224        peak_hours,
225        hourly_distribution: hourly_counts,
226        weekday_distribution: weekday_counts,
227        activity_heatmap,
228        tool_usage,
229    }
230}