use crate::models::session::{SessionId, SessionMetadata};
use chrono::{Local, NaiveDate};
use std::collections::BTreeMap;
use std::sync::Arc;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AnomalySeverity {
Critical,
Warning,
}
impl AnomalySeverity {
pub fn icon(&self) -> &'static str {
match self {
Self::Critical => "🚨",
Self::Warning => "⚠️",
}
}
pub fn color_name(&self) -> &'static str {
match self {
Self::Critical => "red",
Self::Warning => "yellow",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AnomalyMetric {
Tokens,
Cost,
}
impl AnomalyMetric {
pub fn name(&self) -> &'static str {
match self {
Self::Tokens => "Tokens",
Self::Cost => "Cost",
}
}
}
#[derive(Debug, Clone)]
pub struct Anomaly {
pub session_id: SessionId,
pub date: String,
pub metric: AnomalyMetric,
pub value: f64,
pub z_score: f64,
pub deviation_pct: f64,
pub severity: AnomalySeverity,
}
impl Anomaly {
pub fn format_value(&self) -> String {
match self.metric {
AnomalyMetric::Tokens => format!("{:.0}", self.value),
AnomalyMetric::Cost => format!("${:.2}", self.value),
}
}
pub fn format_deviation(&self) -> String {
let sign = if self.deviation_pct >= 0.0 { "+" } else { "" };
format!("{}{:.0}%", sign, self.deviation_pct)
}
}
#[derive(Debug, Clone)]
struct Statistics {
mean: f64,
std_dev: f64,
#[allow(dead_code)]
count: usize,
}
impl Statistics {
fn compute(values: &[f64]) -> Option<Self> {
if values.is_empty() {
return None;
}
let count = values.len();
let mean = values.iter().sum::<f64>() / count as f64;
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / count as f64;
let std_dev = variance.sqrt();
Some(Self {
mean,
std_dev,
count,
})
}
fn z_score(&self, value: f64) -> f64 {
if self.std_dev < f64::EPSILON {
return 0.0; }
(value - self.mean) / self.std_dev
}
fn deviation_pct(&self, value: f64) -> f64 {
if self.mean == 0.0 {
return 0.0;
}
((value - self.mean) / self.mean) * 100.0
}
}
pub fn detect_anomalies(sessions: &[Arc<SessionMetadata>]) -> Vec<Anomaly> {
const MIN_SESSIONS: usize = 10;
const WARNING_THRESHOLD: f64 = 2.0;
const CRITICAL_THRESHOLD: f64 = 3.0;
if sessions.len() < MIN_SESSIONS {
return vec![];
}
let mut anomalies = Vec::new();
let token_values: Vec<f64> = sessions.iter().map(|s| s.total_tokens as f64).collect();
if let Some(stats) = Statistics::compute(&token_values) {
if stats.std_dev > 0.0 {
for (idx, session) in sessions.iter().enumerate() {
let value = token_values[idx];
let z_score = stats.z_score(value);
let abs_z = z_score.abs();
if abs_z > WARNING_THRESHOLD {
let severity = if abs_z > CRITICAL_THRESHOLD {
AnomalySeverity::Critical
} else {
AnomalySeverity::Warning
};
anomalies.push(Anomaly {
session_id: session.id.clone(),
date: session
.first_timestamp
.as_ref()
.map(|ts| ts.format("%Y-%m-%d %H:%M").to_string())
.unwrap_or_else(|| "unknown".to_string()),
metric: AnomalyMetric::Tokens,
value,
z_score,
deviation_pct: stats.deviation_pct(value),
severity,
});
}
}
}
}
anomalies.sort_by(|a, b| {
match (a.severity, b.severity) {
(AnomalySeverity::Critical, AnomalySeverity::Warning) => std::cmp::Ordering::Less,
(AnomalySeverity::Warning, AnomalySeverity::Critical) => std::cmp::Ordering::Greater,
_ => {
b.z_score
.abs()
.partial_cmp(&a.z_score.abs())
.unwrap_or(std::cmp::Ordering::Equal)
}
}
});
anomalies
}
#[derive(Debug, Clone)]
pub struct DailyCostAnomaly {
pub date: NaiveDate,
pub cost_estimate: f64,
pub avg_cost: f64,
pub ratio: f64,
pub severity: AnomalySeverity,
}
impl DailyCostAnomaly {
pub fn format_ratio(&self) -> String {
format!("{:.1}× avg", self.ratio)
}
pub fn format_cost(&self) -> String {
format!("${:.3}", self.cost_estimate)
}
}
pub fn detect_daily_cost_spikes(
sessions: &[Arc<SessionMetadata>],
window_days: usize,
) -> Vec<DailyCostAnomaly> {
const MIN_DAYS: usize = 7;
const SPIKE_2X: f64 = 2.0;
const SPIKE_3X: f64 = 3.0;
const COST_PER_1K_TOKENS: f64 = 0.01;
let cutoff_date = (Local::now() - chrono::Duration::days(window_days as i64)).date_naive();
let mut daily_costs: BTreeMap<NaiveDate, f64> = BTreeMap::new();
for session in sessions {
if let Some(ts) = session.first_timestamp {
let local_date = ts.with_timezone(&Local).date_naive();
if local_date < cutoff_date {
continue;
}
let cost = (session.total_tokens as f64 / 1000.0) * COST_PER_1K_TOKENS;
*daily_costs.entry(local_date).or_default() += cost;
}
}
if daily_costs.len() < MIN_DAYS {
return vec![];
}
let costs: Vec<f64> = daily_costs.values().copied().collect();
let Some(stats) = Statistics::compute(&costs) else {
return vec![];
};
if stats.mean < 0.01 {
return vec![];
}
let mut spikes: Vec<DailyCostAnomaly> = daily_costs
.iter()
.filter_map(|(date, &cost)| {
let ratio = cost / stats.mean;
if ratio >= SPIKE_2X {
let severity = if ratio >= SPIKE_3X {
AnomalySeverity::Critical
} else {
AnomalySeverity::Warning
};
Some(DailyCostAnomaly {
date: *date,
cost_estimate: cost,
avg_cost: stats.mean,
ratio,
severity,
})
} else {
None
}
})
.collect();
spikes.sort_by(|a, b| b.date.cmp(&a.date));
spikes
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use std::path::PathBuf;
fn create_test_session(id: &str, tokens: u64) -> Arc<SessionMetadata> {
Arc::new(SessionMetadata {
id: id.into(),
file_path: PathBuf::from(format!("/tmp/{}.jsonl", id)),
project_path: "test".into(),
first_timestamp: Some(Utc::now()),
last_timestamp: Some(Utc::now()),
message_count: 10,
total_tokens: tokens,
input_tokens: tokens / 2,
output_tokens: tokens / 2,
cache_creation_tokens: 0,
cache_read_tokens: 0,
models_used: vec!["test-model".to_string()],
file_size_bytes: 1024,
first_user_message: Some("test message".to_string()),
has_subagents: false,
duration_seconds: Some(60),
branch: Some("main".to_string()),
tool_usage: std::collections::HashMap::new(),
tool_token_usage: std::collections::HashMap::new(),
})
}
#[test]
fn test_insufficient_data() {
let sessions = vec![
create_test_session("s1", 1000),
create_test_session("s2", 2000),
];
let anomalies = detect_anomalies(&sessions);
assert_eq!(
anomalies.len(),
0,
"Should return empty vec for <10 sessions"
);
}
#[test]
fn test_no_variance() {
let sessions: Vec<_> = (0..15)
.map(|i| create_test_session(&format!("s{}", i), 1000))
.collect();
let anomalies = detect_anomalies(&sessions);
assert_eq!(
anomalies.len(),
0,
"Should return empty vec when all values identical"
);
}
#[test]
fn test_detect_token_outliers() {
let mut sessions = vec![];
for i in 0..10 {
sessions.push(create_test_session(
&format!("normal_{}", i),
1000 + (i * 10) as u64,
));
}
sessions.push(create_test_session("critical", 20000));
let anomalies = detect_anomalies(&sessions);
assert!(
!anomalies.is_empty(),
"Should detect at least 1 anomaly with extreme outlier"
);
assert_eq!(
anomalies[0].severity,
AnomalySeverity::Critical,
"Extreme outlier should be critical"
);
assert_eq!(anomalies[0].metric, AnomalyMetric::Tokens);
assert!(
anomalies[0].z_score.abs() > 3.0,
"Critical anomaly should have |z| > 3"
);
}
#[test]
fn test_severity_sorting() {
let mut sessions = vec![];
for i in 0..8 {
sessions.push(create_test_session(
&format!("normal_{}", i),
1000 + (i * 10) as u64,
));
}
sessions.push(create_test_session("warning", 2000));
sessions.push(create_test_session("critical", 5000));
let anomalies = detect_anomalies(&sessions);
let critical_count = anomalies
.iter()
.filter(|a| a.severity == AnomalySeverity::Critical)
.count();
if critical_count > 0 {
assert_eq!(
anomalies[0].severity,
AnomalySeverity::Critical,
"Critical anomalies should be sorted first"
);
}
}
#[test]
fn test_deviation_percentage() {
let mut sessions = vec![];
for i in 0..10 {
sessions.push(create_test_session(&format!("s{}", i), 1000));
}
sessions.push(create_test_session("outlier", 10000));
let anomalies = detect_anomalies(&sessions);
assert!(
!anomalies.is_empty(),
"Should detect anomaly with extreme outlier"
);
let outlier = &anomalies[0];
assert!(
outlier.deviation_pct.abs() > 100.0,
"Deviation should be >100% for extreme outlier, got {}%",
outlier.deviation_pct
);
}
fn create_session_on_date(id: &str, tokens: u64, days_ago: i64) -> Arc<SessionMetadata> {
let ts = chrono::Utc::now() - chrono::Duration::days(days_ago);
Arc::new(SessionMetadata {
id: id.into(),
file_path: std::path::PathBuf::from(format!("/tmp/{}.jsonl", id)),
project_path: "test".into(),
first_timestamp: Some(ts),
last_timestamp: Some(ts),
message_count: 5,
total_tokens: tokens,
input_tokens: tokens / 2,
output_tokens: tokens / 2,
cache_creation_tokens: 0,
cache_read_tokens: 0,
models_used: vec!["test-model".to_string()],
file_size_bytes: 512,
first_user_message: Some("test".to_string()),
has_subagents: false,
duration_seconds: Some(30),
branch: Some("main".to_string()),
tool_usage: std::collections::HashMap::new(),
tool_token_usage: std::collections::HashMap::new(),
})
}
#[test]
fn test_daily_spikes_insufficient_data() {
let sessions: Vec<_> = (0..3)
.map(|i| create_session_on_date(&format!("s{}", i), 1000, i as i64))
.collect();
let spikes = detect_daily_cost_spikes(&sessions, 30);
assert!(spikes.is_empty(), "Should return empty for <7 days data");
}
#[test]
fn test_daily_spikes_detects_outlier_day() {
let mut sessions = vec![];
for day in 1..=8i64 {
for j in 0..3 {
sessions.push(create_session_on_date(
&format!("normal_d{}_s{}", day, j),
33_000, day,
));
}
}
sessions.push(create_session_on_date("spike", 1_000_000, 0));
let spikes = detect_daily_cost_spikes(&sessions, 30);
assert!(!spikes.is_empty(), "Should detect the spike day");
assert_eq!(spikes[0].severity, AnomalySeverity::Critical);
assert!(spikes[0].ratio > 3.0, "ratio should be >3x average");
}
}