use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataMatrix {
pub data: Vec<f64>,
pub n_samples: usize,
pub n_features: usize,
}
impl DataMatrix {
#[must_use]
pub fn new(data: Vec<f64>, n_samples: usize, n_features: usize) -> Self {
assert_eq!(data.len(), n_samples * n_features);
Self {
data,
n_samples,
n_features,
}
}
#[must_use]
pub fn from_rows(rows: &[&[f64]]) -> Self {
let n_samples = rows.len();
let n_features = rows.first().map(|r| r.len()).unwrap_or(0);
let mut data = Vec::with_capacity(n_samples * n_features);
for row in rows {
assert_eq!(row.len(), n_features, "All rows must have same length");
data.extend_from_slice(row);
}
Self {
data,
n_samples,
n_features,
}
}
#[must_use]
pub fn row(&self, idx: usize) -> &[f64] {
let start = idx * self.n_features;
&self.data[start..start + self.n_features]
}
pub fn row_mut(&mut self, idx: usize) -> &mut [f64] {
let start = idx * self.n_features;
let end = start + self.n_features;
&mut self.data[start..end]
}
#[must_use]
pub fn get(&self, row: usize, col: usize) -> f64 {
self.data[row * self.n_features + col]
}
pub fn set(&mut self, row: usize, col: usize, value: f64) {
self.data[row * self.n_features + col] = value;
}
#[must_use]
pub fn zeros(n_samples: usize, n_features: usize) -> Self {
Self {
data: vec![0.0; n_samples * n_features],
n_samples,
n_features,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum DistanceMetric {
#[default]
Euclidean,
Manhattan,
Cosine,
Chebyshev,
}
impl DistanceMetric {
#[must_use]
pub fn compute(&self, a: &[f64], b: &[f64]) -> f64 {
match self {
DistanceMetric::Euclidean => a
.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>()
.sqrt(),
DistanceMetric::Manhattan => a.iter().zip(b.iter()).map(|(x, y)| (x - y).abs()).sum(),
DistanceMetric::Cosine => {
let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f64 = a.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
let norm_b: f64 = b.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
1.0
} else {
1.0 - (dot / (norm_a * norm_b))
}
}
DistanceMetric::Chebyshev => a
.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).abs())
.fold(0.0f64, f64::max),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Dataset {
pub features: Vec<Vec<f32>>,
pub n_samples: usize,
pub n_features: usize,
pub labels: Option<Vec<i32>>,
}
impl Dataset {
#[must_use]
pub fn new(features: Vec<Vec<f32>>) -> Self {
let n_samples = features.len();
let n_features = features.first().map(|f| f.len()).unwrap_or(0);
Self {
features,
n_samples,
n_features,
labels: None,
}
}
#[must_use]
pub fn to_data_matrix(&self) -> DataMatrix {
let data: Vec<f64> = self
.features
.iter()
.flat_map(|row| row.iter().map(|&x| x as f64))
.collect();
DataMatrix::new(data, self.n_samples, self.n_features)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusteringResult {
pub labels: Vec<usize>,
pub n_clusters: usize,
pub centroids: Vec<f64>,
pub inertia: f64,
pub iterations: u32,
pub converged: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnomalyResult {
pub scores: Vec<f64>,
pub labels: Vec<i32>,
pub threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegressionResult {
pub coefficients: Vec<f64>,
pub intercept: f64,
pub r2_score: f64,
}