rustkernel_ml/
types.rs

1//! Common ML types and data structures.
2
3use serde::{Deserialize, Serialize};
4
5// ============================================================================
6// Data Matrix
7// ============================================================================
8
9/// A dense matrix for ML data (row-major storage).
10///
11/// Each row represents a sample, each column represents a feature.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct DataMatrix {
14    /// Flat storage of all values (row-major).
15    pub data: Vec<f64>,
16    /// Number of samples (rows).
17    pub n_samples: usize,
18    /// Number of features (columns).
19    pub n_features: usize,
20}
21
22impl DataMatrix {
23    /// Create a new data matrix from flat data.
24    #[must_use]
25    pub fn new(data: Vec<f64>, n_samples: usize, n_features: usize) -> Self {
26        assert_eq!(data.len(), n_samples * n_features);
27        Self {
28            data,
29            n_samples,
30            n_features,
31        }
32    }
33
34    /// Create a data matrix from row vectors.
35    #[must_use]
36    pub fn from_rows(rows: &[&[f64]]) -> Self {
37        let n_samples = rows.len();
38        let n_features = rows.first().map(|r| r.len()).unwrap_or(0);
39        let mut data = Vec::with_capacity(n_samples * n_features);
40
41        for row in rows {
42            assert_eq!(row.len(), n_features, "All rows must have same length");
43            data.extend_from_slice(row);
44        }
45
46        Self {
47            data,
48            n_samples,
49            n_features,
50        }
51    }
52
53    /// Get a row (sample) as a slice.
54    #[must_use]
55    pub fn row(&self, idx: usize) -> &[f64] {
56        let start = idx * self.n_features;
57        &self.data[start..start + self.n_features]
58    }
59
60    /// Get a mutable row.
61    pub fn row_mut(&mut self, idx: usize) -> &mut [f64] {
62        let start = idx * self.n_features;
63        let end = start + self.n_features;
64        &mut self.data[start..end]
65    }
66
67    /// Get element at (row, col).
68    #[must_use]
69    pub fn get(&self, row: usize, col: usize) -> f64 {
70        self.data[row * self.n_features + col]
71    }
72
73    /// Set element at (row, col).
74    pub fn set(&mut self, row: usize, col: usize, value: f64) {
75        self.data[row * self.n_features + col] = value;
76    }
77
78    /// Create a zero matrix.
79    #[must_use]
80    pub fn zeros(n_samples: usize, n_features: usize) -> Self {
81        Self {
82            data: vec![0.0; n_samples * n_features],
83            n_samples,
84            n_features,
85        }
86    }
87}
88
89// ============================================================================
90// Distance Metrics
91// ============================================================================
92
93/// Distance metric for similarity calculations.
94#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
95pub enum DistanceMetric {
96    /// Euclidean distance (L2 norm).
97    #[default]
98    Euclidean,
99    /// Manhattan distance (L1 norm).
100    Manhattan,
101    /// Cosine distance (1 - cosine similarity).
102    Cosine,
103    /// Chebyshev distance (L∞ norm).
104    Chebyshev,
105}
106
107impl DistanceMetric {
108    /// Compute distance between two vectors.
109    #[must_use]
110    pub fn compute(&self, a: &[f64], b: &[f64]) -> f64 {
111        match self {
112            DistanceMetric::Euclidean => a
113                .iter()
114                .zip(b.iter())
115                .map(|(x, y)| (x - y).powi(2))
116                .sum::<f64>()
117                .sqrt(),
118            DistanceMetric::Manhattan => a.iter().zip(b.iter()).map(|(x, y)| (x - y).abs()).sum(),
119            DistanceMetric::Cosine => {
120                let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
121                let norm_a: f64 = a.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
122                let norm_b: f64 = b.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
123                if norm_a == 0.0 || norm_b == 0.0 {
124                    1.0
125                } else {
126                    1.0 - (dot / (norm_a * norm_b))
127                }
128            }
129            DistanceMetric::Chebyshev => a
130                .iter()
131                .zip(b.iter())
132                .map(|(x, y)| (x - y).abs())
133                .fold(0.0f64, f64::max),
134        }
135    }
136}
137
138// ============================================================================
139// Dataset (Legacy/Compatibility)
140// ============================================================================
141
142/// Dataset for ML operations (f32 version for compatibility).
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct Dataset {
145    /// Feature matrix (rows: samples, cols: features).
146    pub features: Vec<Vec<f32>>,
147    /// Number of samples.
148    pub n_samples: usize,
149    /// Number of features.
150    pub n_features: usize,
151    /// Optional labels.
152    pub labels: Option<Vec<i32>>,
153}
154
155impl Dataset {
156    /// Create a new dataset.
157    #[must_use]
158    pub fn new(features: Vec<Vec<f32>>) -> Self {
159        let n_samples = features.len();
160        let n_features = features.first().map(|f| f.len()).unwrap_or(0);
161        Self {
162            features,
163            n_samples,
164            n_features,
165            labels: None,
166        }
167    }
168
169    /// Convert to DataMatrix (f64).
170    #[must_use]
171    pub fn to_data_matrix(&self) -> DataMatrix {
172        let data: Vec<f64> = self
173            .features
174            .iter()
175            .flat_map(|row| row.iter().map(|&x| x as f64))
176            .collect();
177        DataMatrix::new(data, self.n_samples, self.n_features)
178    }
179}
180
181// ============================================================================
182// Clustering Result
183// ============================================================================
184
185/// Clustering result.
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ClusteringResult {
188    /// Cluster assignment per sample.
189    pub labels: Vec<usize>,
190    /// Number of clusters found.
191    pub n_clusters: usize,
192    /// Cluster centroids (flattened, row-major).
193    pub centroids: Vec<f64>,
194    /// Inertia/WCSS (within-cluster sum of squares).
195    pub inertia: f64,
196    /// Number of iterations run.
197    pub iterations: u32,
198    /// Whether the algorithm converged.
199    pub converged: bool,
200}
201
202/// Anomaly detection result.
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct AnomalyResult {
205    /// Anomaly scores per sample.
206    pub scores: Vec<f64>,
207    /// Binary labels (-1 for anomaly, 1 for normal).
208    pub labels: Vec<i32>,
209    /// Threshold used for classification.
210    pub threshold: f64,
211}
212
213/// Regression result.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct RegressionResult {
216    /// Model coefficients.
217    pub coefficients: Vec<f64>,
218    /// Intercept.
219    pub intercept: f64,
220    /// R² score.
221    pub r2_score: f64,
222}