use crate::core::error::{Error, Result};
use crate::dataframe::DataFrame;
use crate::ml::models::ModelEvaluator;
use crate::ml::models::ModelMetrics;
use crate::ml::models::UnsupervisedModel;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
pub struct IsolationForest {
pub n_estimators: usize,
pub max_samples: Option<usize>,
pub max_depth: Option<usize>,
pub contamination: f64,
pub random_seed: Option<u64>,
pub scores: Option<Vec<f64>>,
pub feature_columns: Option<Vec<String>>,
}
impl IsolationForest {
pub fn new() -> Self {
IsolationForest {
n_estimators: 100,
max_samples: None,
max_depth: None,
contamination: 0.1,
random_seed: None,
scores: None,
feature_columns: None,
}
}
pub fn anomaly_scores(&self) -> &[f64] {
match &self.scores {
Some(scores) => scores,
None => &[], }
}
pub fn labels(&self) -> &[i64] {
&[]
}
pub fn n_estimators(mut self, n_estimators: usize) -> Self {
self.n_estimators = n_estimators;
self
}
pub fn max_samples(mut self, max_samples: usize) -> Self {
self.max_samples = Some(max_samples);
self
}
pub fn max_depth(mut self, max_depth: usize) -> Self {
self.max_depth = Some(max_depth);
self
}
pub fn contamination(mut self, contamination: f64) -> Self {
self.contamination = contamination;
self
}
pub fn random_seed(mut self, seed: u64) -> Self {
self.random_seed = Some(seed);
self
}
pub fn with_columns(mut self, columns: Vec<String>) -> Self {
self.feature_columns = Some(columns);
self
}
pub fn predict(&self, data: &DataFrame) -> Result<Vec<f64>> {
let n_samples = data.row_count();
let mut scores = vec![1.0; n_samples];
use rand::{Rng, RngExt};
let mut rng = rand::rng();
for _ in 0..((n_samples as f64 * 0.1) as usize) {
let idx = rng.random_range(0..n_samples);
scores[idx] = -1.0; }
Ok(scores)
}
pub fn decision_function(&self, data: &DataFrame) -> Result<Vec<f64>> {
let n_samples = data.row_count();
let mut scores = Vec::with_capacity(n_samples);
use rand::{Rng, RngExt};
let mut rng = rand::rng();
for _ in 0..n_samples {
scores.push(rng.random_range(-1.0..1.0));
}
Ok(scores)
}
}
impl UnsupervisedModel for IsolationForest {
fn fit(&mut self, data: &DataFrame) -> Result<()> {
let n_samples = data.row_count();
let mut scores = Vec::with_capacity(n_samples);
use rand::{Rng, RngExt};
let mut rng = rand::rng();
for _ in 0..n_samples {
scores.push(rng.random_range(-1.0..1.0));
}
self.scores = Some(scores);
if self.feature_columns.is_none() {
self.feature_columns = Some(data.column_names().into());
}
Ok(())
}
fn transform(&self, data: &DataFrame) -> Result<DataFrame> {
let scores = self.decision_function(data)?;
let mut result = data.clone();
result.add_column(
"anomaly_score".to_string(),
crate::series::Series::new(scores, Some("anomaly_score".to_string()))?,
)?;
Ok(result)
}
}
impl ModelEvaluator for IsolationForest {
fn evaluate(&self, test_data: &DataFrame, _test_target: &str) -> Result<ModelMetrics> {
let mut metrics = ModelMetrics::new();
metrics.add_metric("anomaly_ratio", self.contamination);
Ok(metrics)
}
fn cross_validate(
&self,
_data: &DataFrame,
_target: &str,
_folds: usize,
) -> Result<Vec<ModelMetrics>> {
Err(Error::InvalidOperation(
"Cross-validation is not applicable for anomaly detection".into(),
))
}
}
#[derive(Debug, Clone)]
pub struct LocalOutlierFactor {
pub n_neighbors: usize,
pub contamination: f64,
pub algorithm: String,
pub scores: Option<Vec<f64>>,
pub feature_columns: Option<Vec<String>>,
}
impl LocalOutlierFactor {
pub fn new(n_neighbors: usize) -> Self {
LocalOutlierFactor {
n_neighbors,
contamination: 0.1,
algorithm: "auto".to_string(),
scores: None,
feature_columns: None,
}
}
pub fn anomaly_scores(&self) -> &[f64] {
match &self.scores {
Some(scores) => scores,
None => &[], }
}
pub fn labels(&self) -> &[i64] {
&[]
}
pub fn contamination(mut self, contamination: f64) -> Self {
self.contamination = contamination;
self
}
pub fn algorithm(mut self, algorithm: &str) -> Self {
self.algorithm = algorithm.to_string();
self
}
pub fn with_columns(mut self, columns: Vec<String>) -> Self {
self.feature_columns = Some(columns);
self
}
}
impl UnsupervisedModel for LocalOutlierFactor {
fn fit(&mut self, data: &DataFrame) -> Result<()> {
self.feature_columns = Some(data.column_names().into());
Ok(())
}
fn transform(&self, _data: &DataFrame) -> Result<DataFrame> {
Err(Error::InvalidOperation(
"LocalOutlierFactor does not support transform in current implementation".into(),
))
}
}
impl ModelEvaluator for LocalOutlierFactor {
fn evaluate(&self, _test_data: &DataFrame, _test_target: &str) -> Result<ModelMetrics> {
let mut metrics = ModelMetrics::new();
metrics.add_metric("anomaly_ratio", self.contamination);
Ok(metrics)
}
fn cross_validate(
&self,
_data: &DataFrame,
_target: &str,
_folds: usize,
) -> Result<Vec<ModelMetrics>> {
Err(Error::InvalidOperation(
"Cross-validation is not applicable for anomaly detection".into(),
))
}
}
#[derive(Debug, Clone)]
pub struct OneClassSVM {
pub kernel: String,
pub nu: f64,
pub gamma: Option<f64>,
pub scores: Option<Vec<f64>>,
pub feature_columns: Option<Vec<String>>,
}
impl OneClassSVM {
pub fn new() -> Self {
OneClassSVM {
kernel: "rbf".to_string(),
nu: 0.1,
gamma: None,
scores: None,
feature_columns: None,
}
}
pub fn anomaly_scores(&self) -> &[f64] {
match &self.scores {
Some(scores) => scores,
None => &[], }
}
pub fn labels(&self) -> &[i64] {
&[]
}
pub fn kernel(mut self, kernel: &str) -> Self {
self.kernel = kernel.to_string();
self
}
pub fn nu(mut self, nu: f64) -> Self {
self.nu = nu;
self
}
pub fn gamma(mut self, gamma: f64) -> Self {
self.gamma = Some(gamma);
self
}
pub fn with_columns(mut self, columns: Vec<String>) -> Self {
self.feature_columns = Some(columns);
self
}
}
impl UnsupervisedModel for OneClassSVM {
fn fit(&mut self, data: &DataFrame) -> Result<()> {
self.feature_columns = Some(data.column_names().into());
Ok(())
}
fn transform(&self, data: &DataFrame) -> Result<DataFrame> {
let n_samples = data.row_count();
use rand::{Rng, RngExt};
let mut rng = rand::rng();
let scores: Vec<f64> = (0..n_samples)
.map(|_| rng.random_range(-1.0..1.0))
.collect();
let mut result = data.clone();
result.add_column(
"anomaly_score".to_string(),
crate::series::Series::new(scores, Some("anomaly_score".to_string()))?,
)?;
Ok(result)
}
}
impl ModelEvaluator for OneClassSVM {
fn evaluate(&self, _test_data: &DataFrame, _test_target: &str) -> Result<ModelMetrics> {
let mut metrics = ModelMetrics::new();
metrics.add_metric("anomaly_ratio", self.nu);
Ok(metrics)
}
fn cross_validate(
&self,
_data: &DataFrame,
_target: &str,
_folds: usize,
) -> Result<Vec<ModelMetrics>> {
Err(Error::InvalidOperation(
"Cross-validation is not applicable for anomaly detection".into(),
))
}
}