1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct DataMatrix {
14 pub data: Vec<f64>,
16 pub n_samples: usize,
18 pub n_features: usize,
20}
21
22impl DataMatrix {
23 #[must_use]
25 pub fn new(data: Vec<f64>, n_samples: usize, n_features: usize) -> Self {
26 assert_eq!(data.len(), n_samples * n_features);
27 Self {
28 data,
29 n_samples,
30 n_features,
31 }
32 }
33
34 #[must_use]
36 pub fn from_rows(rows: &[&[f64]]) -> Self {
37 let n_samples = rows.len();
38 let n_features = rows.first().map(|r| r.len()).unwrap_or(0);
39 let mut data = Vec::with_capacity(n_samples * n_features);
40
41 for row in rows {
42 assert_eq!(row.len(), n_features, "All rows must have same length");
43 data.extend_from_slice(row);
44 }
45
46 Self {
47 data,
48 n_samples,
49 n_features,
50 }
51 }
52
53 #[must_use]
55 pub fn row(&self, idx: usize) -> &[f64] {
56 let start = idx * self.n_features;
57 &self.data[start..start + self.n_features]
58 }
59
60 pub fn row_mut(&mut self, idx: usize) -> &mut [f64] {
62 let start = idx * self.n_features;
63 let end = start + self.n_features;
64 &mut self.data[start..end]
65 }
66
67 #[must_use]
69 pub fn get(&self, row: usize, col: usize) -> f64 {
70 self.data[row * self.n_features + col]
71 }
72
73 pub fn set(&mut self, row: usize, col: usize, value: f64) {
75 self.data[row * self.n_features + col] = value;
76 }
77
78 #[must_use]
80 pub fn zeros(n_samples: usize, n_features: usize) -> Self {
81 Self {
82 data: vec![0.0; n_samples * n_features],
83 n_samples,
84 n_features,
85 }
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
95pub enum DistanceMetric {
96 #[default]
98 Euclidean,
99 Manhattan,
101 Cosine,
103 Chebyshev,
105}
106
107impl DistanceMetric {
108 #[must_use]
110 pub fn compute(&self, a: &[f64], b: &[f64]) -> f64 {
111 match self {
112 DistanceMetric::Euclidean => a
113 .iter()
114 .zip(b.iter())
115 .map(|(x, y)| (x - y).powi(2))
116 .sum::<f64>()
117 .sqrt(),
118 DistanceMetric::Manhattan => a.iter().zip(b.iter()).map(|(x, y)| (x - y).abs()).sum(),
119 DistanceMetric::Cosine => {
120 let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
121 let norm_a: f64 = a.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
122 let norm_b: f64 = b.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
123 if norm_a == 0.0 || norm_b == 0.0 {
124 1.0
125 } else {
126 1.0 - (dot / (norm_a * norm_b))
127 }
128 }
129 DistanceMetric::Chebyshev => a
130 .iter()
131 .zip(b.iter())
132 .map(|(x, y)| (x - y).abs())
133 .fold(0.0f64, f64::max),
134 }
135 }
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct Dataset {
145 pub features: Vec<Vec<f32>>,
147 pub n_samples: usize,
149 pub n_features: usize,
151 pub labels: Option<Vec<i32>>,
153}
154
155impl Dataset {
156 #[must_use]
158 pub fn new(features: Vec<Vec<f32>>) -> Self {
159 let n_samples = features.len();
160 let n_features = features.first().map(|f| f.len()).unwrap_or(0);
161 Self {
162 features,
163 n_samples,
164 n_features,
165 labels: None,
166 }
167 }
168
169 #[must_use]
171 pub fn to_data_matrix(&self) -> DataMatrix {
172 let data: Vec<f64> = self
173 .features
174 .iter()
175 .flat_map(|row| row.iter().map(|&x| x as f64))
176 .collect();
177 DataMatrix::new(data, self.n_samples, self.n_features)
178 }
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ClusteringResult {
188 pub labels: Vec<usize>,
190 pub n_clusters: usize,
192 pub centroids: Vec<f64>,
194 pub inertia: f64,
196 pub iterations: u32,
198 pub converged: bool,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct AnomalyResult {
205 pub scores: Vec<f64>,
207 pub labels: Vec<i32>,
209 pub threshold: f64,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct RegressionResult {
216 pub coefficients: Vec<f64>,
218 pub intercept: f64,
220 pub r2_score: f64,
222}