use crate::solvers::alm::{AlmDistribution, AlmRegressor};
use crate::solvers::lm_dynamic::InformationCriterion;
use crate::solvers::traits::{FittedRegressor, Regressor};
use faer::{Col, Mat};
use statrs::function::gamma::ln_gamma;
use std::collections::HashMap;
use std::f64::consts::PI;
use std::sync::LazyLock;
const LN_FACT_SIZE: usize = 257;
static LN_FACTORIAL: LazyLock<[f64; LN_FACT_SIZE]> = LazyLock::new(|| {
let mut table = [0.0; LN_FACT_SIZE];
for k in 1..LN_FACT_SIZE {
table[k] = table[k - 1] + (k as f64).ln();
}
table
});
#[inline(always)]
fn fast_ln_gamma_p1(v: f64) -> f64 {
let k = v as u32;
if (k as usize) < LN_FACT_SIZE && v == k as f64 {
LN_FACTORIAL[k as usize]
} else {
ln_gamma(v + 1.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DemandType {
Regular,
Intermittent,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DemandDistribution {
Poisson,
NegativeBinomial,
Geometric,
Normal,
Gamma,
LogNormal,
RectifiedNormal,
}
impl DemandDistribution {
pub fn to_alm_distribution(&self) -> AlmDistribution {
match self {
DemandDistribution::Poisson => AlmDistribution::Poisson,
DemandDistribution::NegativeBinomial => AlmDistribution::NegativeBinomial,
DemandDistribution::Geometric => AlmDistribution::Geometric,
DemandDistribution::Normal => AlmDistribution::Normal,
DemandDistribution::Gamma => AlmDistribution::Gamma,
DemandDistribution::LogNormal => AlmDistribution::LogNormal,
DemandDistribution::RectifiedNormal => AlmDistribution::RectifiedNormal,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AnomalyType {
None,
Stockout,
NewProduct,
ObsoleteProduct,
HighOutlier,
LowOutlier,
}
#[derive(Debug, Clone)]
pub struct DistributionParameters {
pub mean: f64,
pub variance: f64,
pub shape: Option<f64>,
pub zero_prob: Option<f64>,
pub scale: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct DemandClassification {
pub demand_type: DemandType,
pub is_fractional: bool,
pub distribution: DemandDistribution,
pub parameters: DistributionParameters,
pub anomalies: Vec<AnomalyType>,
pub ic_values: HashMap<DemandDistribution, f64>,
pub zero_proportion: f64,
pub n_observations: usize,
}
impl DemandClassification {
pub fn has_stockouts(&self) -> bool {
self.anomalies.contains(&AnomalyType::Stockout)
}
pub fn anomaly_counts(&self) -> HashMap<AnomalyType, usize> {
let mut counts = HashMap::new();
for anomaly in &self.anomalies {
*counts.entry(*anomaly).or_insert(0) += 1;
}
counts
}
pub fn is_new_product(&self) -> bool {
self.anomalies.contains(&AnomalyType::NewProduct)
}
pub fn is_obsolete_product(&self) -> bool {
self.anomalies.contains(&AnomalyType::ObsoleteProduct)
}
}
#[derive(Debug, Clone)]
pub struct AidClassifier {
#[allow(dead_code)]
anomaly_alpha: f64,
intermittent_threshold: f64,
detect_anomalies: bool,
ic_type: InformationCriterion,
}
impl Default for AidClassifier {
fn default() -> Self {
Self {
anomaly_alpha: 0.05,
intermittent_threshold: 0.3,
detect_anomalies: true,
ic_type: InformationCriterion::AICc,
}
}
}
impl AidClassifier {
pub fn new() -> Self {
Self::default()
}
pub fn builder() -> AidClassifierBuilder {
AidClassifierBuilder::default()
}
pub fn classify(&self, y: &Col<f64>) -> DemandClassification {
let n = y.nrows();
if n == 0 {
return DemandClassification {
demand_type: DemandType::Regular,
is_fractional: false,
distribution: DemandDistribution::Normal,
parameters: DistributionParameters {
mean: 0.0,
variance: 0.0,
shape: None,
zero_prob: None,
scale: None,
},
anomalies: Vec::new(),
ic_values: HashMap::new(),
zero_proportion: 0.0,
n_observations: 0,
};
}
let n_f = n as f64;
let mut sum = 0.0;
let mut sum_sq = 0.0;
let mut zero_count = 0u32;
let mut is_fractional = false;
let mut has_non_positive = false;
let mut n_positive = 0usize;
for &v in y.iter() {
sum += v;
sum_sq += v * v;
if v == 0.0 {
zero_count += 1;
}
if !is_fractional && v != v.round() {
is_fractional = true;
}
if v <= 0.0 {
has_non_positive = true;
} else {
n_positive += 1;
}
}
let y_mean = sum / n_f;
let zero_proportion = zero_count as f64 / n_f;
let sum_sq_dev = sum_sq - sum * sum / n_f;
let y_var = sum_sq_dev / n_f;
let demand_type = if zero_proportion > self.intermittent_threshold {
DemandType::Intermittent
} else {
DemandType::Regular
};
let mut sum_ln_gamma_yp1 = 0.0;
let mut sum_ln_y = 0.0;
let mut sum_ln_y_sq = 0.0;
if !is_fractional {
for &v in y.iter() {
sum_ln_gamma_yp1 += fast_ln_gamma_p1(v.round().max(0.0));
}
} else if !has_non_positive {
let needs_ln_sq = demand_type == DemandType::Regular; for &v in y.iter() {
let ln_v = v.ln();
sum_ln_y += ln_v;
if needs_ln_sq {
sum_ln_y_sq += ln_v * ln_v;
}
}
}
let log_mean_exp = if !has_non_positive && is_fractional && n_positive > 0 {
Some((sum_ln_y / n_positive as f64).exp())
} else {
None
};
let stats = PrecomputedStats {
n,
y_mean,
y_var,
zero_proportion,
has_non_positive,
sum_y: sum,
sum_sq_dev,
sum_ln_gamma_yp1,
sum_ln_y,
sum_ln_y_sq,
log_mean_exp,
};
let candidates = self.get_candidate_distributions(demand_type, is_fractional);
let mut ic_values = HashMap::with_capacity(3);
let mut best_dist = candidates[0];
let mut best_ic = f64::INFINITY;
let mut best_params = DistributionParameters {
mean: y_mean,
variance: y_var,
shape: None,
zero_prob: Some(zero_proportion),
scale: None,
};
for dist in candidates {
if let Some((ic, params)) = self.fit_distribution(y, dist, &stats) {
ic_values.insert(dist, ic);
if ic < best_ic {
best_ic = ic;
best_dist = dist;
best_params = params;
}
}
}
let anomalies = if self.detect_anomalies {
self.detect_anomalies_impl(y, best_dist, &best_params)
} else {
vec![AnomalyType::None; n]
};
DemandClassification {
demand_type,
is_fractional,
distribution: best_dist,
parameters: best_params,
anomalies,
ic_values,
zero_proportion,
n_observations: n,
}
}
#[inline]
fn get_candidate_distributions(
&self,
demand_type: DemandType,
is_fractional: bool,
) -> [DemandDistribution; 3] {
match (demand_type, is_fractional) {
(DemandType::Regular, false) => [
DemandDistribution::Poisson,
DemandDistribution::NegativeBinomial,
DemandDistribution::Normal,
],
(DemandType::Regular, true) => [
DemandDistribution::Normal,
DemandDistribution::Gamma,
DemandDistribution::LogNormal,
],
(DemandType::Intermittent, false) => [
DemandDistribution::NegativeBinomial,
DemandDistribution::Geometric,
DemandDistribution::Poisson,
],
(DemandType::Intermittent, true) => [
DemandDistribution::RectifiedNormal,
DemandDistribution::Gamma,
DemandDistribution::Normal,
],
}
}
fn fit_distribution(
&self,
y: &Col<f64>,
dist: DemandDistribution,
stats: &PrecomputedStats,
) -> Option<(f64, DistributionParameters)> {
let n = stats.n;
let n_f = n as f64;
let mu = stats.y_mean;
let var = stats.y_var;
if stats.has_non_positive
&& matches!(
dist,
DemandDistribution::Gamma | DemandDistribution::LogNormal
)
{
return None;
}
if dist == DemandDistribution::RectifiedNormal {
return self.fit_distribution_via_alm(y, dist);
}
let (ll, scale, shape, n_params, mu_out) = match dist {
DemandDistribution::Poisson => {
let lambda = mu.max(1e-10);
let ll = stats.sum_y * lambda.ln() - n_f * lambda - stats.sum_ln_gamma_yp1;
(ll, 1.0, None, 1, mu)
}
DemandDistribution::NegativeBinomial => {
let size = if var > mu && mu > 0.0 {
mu * mu / (var - mu)
} else {
1.0
};
let mu_c = mu.max(1e-10);
let p = size / (size + mu_c);
let ln_gamma_size = ln_gamma(size);
let sum_ln_gamma_y_plus_size: f64 = y
.iter()
.map(|&v| {
if v <= 0.0 {
ln_gamma_size
} else {
ln_gamma(v.round() + size)
}
})
.sum();
let ll = sum_ln_gamma_y_plus_size - n_f * ln_gamma_size - stats.sum_ln_gamma_yp1
+ n_f * size * p.ln()
+ stats.sum_y * (1.0 - p).ln();
(ll, 1.0, Some(size), 2, mu)
}
DemandDistribution::Geometric => {
let lambda = mu.max(1e-10);
let p = 1.0 / (1.0 + lambda);
let ll = n_f * p.ln() + stats.sum_y * (1.0 - p).ln();
(ll, 1.0, None, 1, mu)
}
DemandDistribution::Normal => {
let df = (n - 1) as f64;
let sigma = (stats.sum_sq_dev / df).sqrt();
let sigma2 = sigma * sigma;
let ll = -0.5 * n_f * (2.0 * PI * sigma2).ln() - stats.sum_sq_dev / (2.0 * sigma2);
(ll, sigma, None, 2, mu)
}
DemandDistribution::Gamma => {
let shape = if var > 0.0 { mu * mu / var } else { 1.0 };
let rate = shape / mu;
let ll = n_f * shape * rate.ln() + (shape - 1.0) * stats.sum_ln_y
- rate * stats.sum_y
- n_f * ln_gamma(shape);
(ll, 1.0, Some(shape), 2, mu)
}
DemandDistribution::LogNormal => {
let mu_resp = stats.log_mean_exp.unwrap_or(mu);
let log_mu = mu_resp.ln();
let df = (n - 1) as f64;
let log_rss =
stats.sum_ln_y_sq - 2.0 * log_mu * stats.sum_ln_y + n_f * log_mu * log_mu;
let sigma = (log_rss / df).sqrt();
let sigma2 = sigma * sigma;
let ll = -stats.sum_ln_y
- n_f * sigma.ln()
- 0.5 * n_f * (2.0 * PI).ln()
- log_rss / (2.0 * sigma2);
(ll, sigma, None, 2, mu_resp)
}
DemandDistribution::RectifiedNormal => unreachable!(),
};
if !ll.is_finite() {
return None;
}
let ic = self.ic_type.compute(ll, n_params, n);
let out_var = if (mu_out - mu).abs() < 1e-15 {
var
} else {
y.iter().map(|&v| (v - mu_out).powi(2)).sum::<f64>() / n_f
};
Some((
ic,
DistributionParameters {
mean: mu_out,
variance: out_var,
shape,
zero_prob: Some(stats.zero_proportion),
scale: Some(scale),
},
))
}
fn fit_distribution_via_alm(
&self,
y: &Col<f64>,
dist: DemandDistribution,
) -> Option<(f64, DistributionParameters)> {
let n = y.nrows();
let x = Mat::from_fn(n, 1, |_, _| 1.0);
let model = AlmRegressor::builder()
.distribution(dist.to_alm_distribution())
.with_intercept(false)
.compute_inference(false)
.build();
match model.fit(&x, y) {
Ok(fitted) => {
let result = fitted.result();
let scale = fitted.scale();
let ll = result.log_likelihood;
let k = result.n_parameters;
let ic = self.ic_type.compute(ll, k, n);
let mean = result.fitted_values.iter().sum::<f64>() / n as f64;
let variance: f64 =
result.residuals.iter().map(|&r| r.powi(2)).sum::<f64>() / n as f64;
let zero_count = y.iter().filter(|&&v| v == 0.0).count();
let zero_prob = zero_count as f64 / n as f64;
Some((
ic,
DistributionParameters {
mean,
variance,
shape: None,
zero_prob: Some(zero_prob),
scale: Some(scale),
},
))
}
Err(_) => None,
}
}
fn detect_anomalies_impl(
&self,
y: &Col<f64>,
_dist: DemandDistribution,
params: &DistributionParameters,
) -> Vec<AnomalyType> {
let n = y.nrows();
let mut anomalies = vec![AnomalyType::None; n];
if n == 0 {
return anomalies;
}
let mean = params.mean;
let std_dev = params.variance.sqrt();
let mut leading_zeros = 0;
for i in 0..n {
if y[i] == 0.0 {
leading_zeros += 1;
} else {
break;
}
}
if leading_zeros > 0 && (leading_zeros as f64 / n as f64) > 0.1 {
for anomaly in anomalies.iter_mut().take(leading_zeros) {
*anomaly = AnomalyType::NewProduct;
}
}
let mut trailing_zeros = 0;
for i in (0..n).rev() {
if y[i] == 0.0 {
trailing_zeros += 1;
} else {
break;
}
}
if trailing_zeros > 0 && (trailing_zeros as f64 / n as f64) > 0.1 {
for anomaly in anomalies.iter_mut().skip(n - trailing_zeros) {
if *anomaly == AnomalyType::None {
*anomaly = AnomalyType::ObsoleteProduct;
}
}
}
if mean > 0.0 {
let _z_threshold = 2.0;
for i in leading_zeros..(n - trailing_zeros) {
if y[i] == 0.0 && anomalies[i] == AnomalyType::None {
let before_nonzero = if i > 0 {
(0..i).rev().take(3).any(|j| y[j] > 0.0)
} else {
false
};
let after_nonzero = ((i + 1)..n).take(3).any(|j| y[j] > 0.0);
if before_nonzero && after_nonzero {
anomalies[i] = AnomalyType::Stockout;
}
}
}
}
if std_dev > 0.0 {
let high_threshold = mean + 3.0 * std_dev;
let low_threshold = (mean - 3.0 * std_dev).max(0.0);
for i in 0..n {
if anomalies[i] == AnomalyType::None {
if y[i] > high_threshold {
anomalies[i] = AnomalyType::HighOutlier;
} else if y[i] > 0.0 && y[i] < low_threshold && mean > 0.0 {
anomalies[i] = AnomalyType::LowOutlier;
}
}
}
}
anomalies
}
}
struct PrecomputedStats {
n: usize,
y_mean: f64,
y_var: f64,
zero_proportion: f64,
has_non_positive: bool,
sum_y: f64,
sum_sq_dev: f64,
sum_ln_gamma_yp1: f64,
sum_ln_y: f64,
sum_ln_y_sq: f64,
log_mean_exp: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct AidClassifierBuilder {
anomaly_alpha: f64,
intermittent_threshold: f64,
detect_anomalies: bool,
ic_type: InformationCriterion,
}
impl Default for AidClassifierBuilder {
fn default() -> Self {
Self {
anomaly_alpha: 0.05,
intermittent_threshold: 0.3,
detect_anomalies: true,
ic_type: InformationCriterion::AICc,
}
}
}
impl AidClassifierBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn anomaly_alpha(mut self, alpha: f64) -> Self {
self.anomaly_alpha = alpha.clamp(0.001, 0.5);
self
}
pub fn intermittent_threshold(mut self, threshold: f64) -> Self {
self.intermittent_threshold = threshold.clamp(0.0, 1.0);
self
}
pub fn detect_anomalies(mut self, detect: bool) -> Self {
self.detect_anomalies = detect;
self
}
pub fn ic(mut self, ic: InformationCriterion) -> Self {
self.ic_type = ic;
self
}
pub fn build(self) -> AidClassifier {
AidClassifier {
anomaly_alpha: self.anomaly_alpha,
intermittent_threshold: self.intermittent_threshold,
detect_anomalies: self.detect_anomalies,
ic_type: self.ic_type,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builder_defaults() {
let classifier = AidClassifier::builder().build();
assert_eq!(classifier.ic_type, InformationCriterion::AICc);
assert!(classifier.detect_anomalies);
}
#[test]
fn test_builder_custom() {
let classifier = AidClassifier::builder()
.intermittent_threshold(0.5)
.detect_anomalies(false)
.ic(InformationCriterion::BIC)
.build();
assert!(!classifier.detect_anomalies);
assert_eq!(classifier.ic_type, InformationCriterion::BIC);
}
#[test]
fn test_regular_count_demand() {
let y = Col::from_fn(50, |i| ((i % 5) + 5) as f64);
let result = AidClassifier::new().classify(&y);
assert_eq!(result.demand_type, DemandType::Regular);
assert!(!result.is_fractional);
assert_eq!(result.zero_proportion, 0.0);
}
#[test]
fn test_intermittent_demand() {
let y = Col::from_fn(50, |i| if i % 3 == 0 { 5.0 } else { 0.0 });
let result = AidClassifier::builder()
.intermittent_threshold(0.3)
.build()
.classify(&y);
assert_eq!(result.demand_type, DemandType::Intermittent);
assert!(result.zero_proportion > 0.5);
}
#[test]
fn test_fractional_demand() {
let y = Col::from_fn(30, |i| 5.5 + (i as f64 * 0.1).sin());
let result = AidClassifier::new().classify(&y);
assert!(result.is_fractional);
assert_eq!(result.demand_type, DemandType::Regular);
}
#[test]
fn test_stockout_detection() {
let mut y = Col::from_fn(30, |_| 10.0);
y[10] = 0.0; y[15] = 0.0;
let result = AidClassifier::builder()
.detect_anomalies(true)
.build()
.classify(&y);
assert!(result.has_stockouts());
}
#[test]
fn test_new_product_detection() {
let y = Col::from_fn(30, |i| if i < 10 { 0.0 } else { 5.0 });
let result = AidClassifier::new().classify(&y);
assert!(result.is_new_product());
for i in 0..10 {
assert_eq!(result.anomalies[i], AnomalyType::NewProduct);
}
}
#[test]
fn test_obsolete_product_detection() {
let y = Col::from_fn(30, |i| if i < 20 { 5.0 } else { 0.0 });
let result = AidClassifier::new().classify(&y);
assert!(result.is_obsolete_product());
for i in 20..30 {
assert_eq!(result.anomalies[i], AnomalyType::ObsoleteProduct);
}
}
#[test]
fn test_empty_data() {
let y = Col::zeros(0);
let result = AidClassifier::new().classify(&y);
assert_eq!(result.n_observations, 0);
assert!(result.anomalies.is_empty());
}
#[test]
fn test_ic_values_populated() {
let y = Col::from_fn(50, |i| (i + 1) as f64);
let result = AidClassifier::new().classify(&y);
assert!(!result.ic_values.is_empty());
}
#[test]
fn test_demand_distribution_to_alm() {
assert_eq!(
DemandDistribution::Poisson.to_alm_distribution(),
AlmDistribution::Poisson
);
assert_eq!(
DemandDistribution::Normal.to_alm_distribution(),
AlmDistribution::Normal
);
assert_eq!(
DemandDistribution::Gamma.to_alm_distribution(),
AlmDistribution::Gamma
);
}
#[test]
fn test_anomaly_counts() {
let mut y = Col::from_fn(30, |_| 10.0);
y[10] = 0.0;
y[15] = 0.0;
y[25] = 100.0;
let result = AidClassifier::new().classify(&y);
let counts = result.anomaly_counts();
assert!(
counts.get(&AnomalyType::Stockout).unwrap_or(&0) > &0
|| counts.get(&AnomalyType::HighOutlier).unwrap_or(&0) > &0
);
}
}