1use scirs2_core::ndarray::{Array2, ArrayView2};
6use scirs2_linalg::compat::{ArrayLinalgExt, UPLO};
7use sklears_core::{
8 error::{Result as SklResult, SklearsError},
9 traits::{Estimator, Fit, Transform, Untrained},
10 types::Float,
11};
12
13#[derive(Debug, Clone)]
46pub struct DiffusionMaps<S = Untrained> {
47 state: S,
48 n_components: usize,
49 diffusion_time: usize,
50 epsilon: Option<f64>,
51 alpha: f64,
52 eigen_solver: String,
53 random_state: Option<u64>,
54 n_jobs: Option<i32>,
55}
56
57#[derive(Debug, Clone)]
59pub struct DiffusionMapsTrained {
60 pub embedding: Array2<f64>,
62 pub eigenvalues: Array2<f64>,
64 pub eigenvectors: Array2<f64>,
66 pub affinity_matrix: Array2<f64>,
68 pub epsilon: f64,
70}
71
72impl DiffusionMaps<Untrained> {
73 pub fn new() -> Self {
75 Self {
76 state: Untrained,
77 n_components: 2,
78 diffusion_time: 1,
79 epsilon: None,
80 alpha: 0.5,
81 eigen_solver: "auto".to_string(),
82 random_state: None,
83 n_jobs: None,
84 }
85 }
86
87 pub fn n_components(mut self, n_components: usize) -> Self {
89 self.n_components = n_components;
90 self
91 }
92
93 pub fn diffusion_time(mut self, diffusion_time: usize) -> Self {
95 self.diffusion_time = diffusion_time;
96 self
97 }
98
99 pub fn epsilon(mut self, epsilon: f64) -> Self {
101 self.epsilon = Some(epsilon);
102 self
103 }
104
105 pub fn alpha(mut self, alpha: f64) -> Self {
107 self.alpha = alpha;
108 self
109 }
110
111 pub fn eigen_solver(mut self, eigen_solver: &str) -> Self {
113 self.eigen_solver = eigen_solver.to_string();
114 self
115 }
116
117 pub fn random_state(mut self, random_state: Option<u64>) -> Self {
119 self.random_state = random_state;
120 self
121 }
122
123 pub fn n_jobs(mut self, n_jobs: Option<i32>) -> Self {
125 self.n_jobs = n_jobs;
126 self
127 }
128}
129
130impl Default for DiffusionMaps<Untrained> {
131 fn default() -> Self {
132 Self::new()
133 }
134}
135
136impl Estimator for DiffusionMaps<Untrained> {
137 type Config = ();
138 type Error = SklearsError;
139 type Float = Float;
140
141 fn config(&self) -> &Self::Config {
142 &()
143 }
144}
145
146impl Fit<ArrayView2<'_, Float>, ()> for DiffusionMaps<Untrained> {
147 type Fitted = DiffusionMaps<DiffusionMapsTrained>;
148
149 fn fit(self, x: &ArrayView2<'_, Float>, _y: &()) -> SklResult<Self::Fitted> {
150 let x = x.mapv(|x| x);
151 let (n_samples, _) = x.dim();
152
153 if n_samples <= self.n_components {
154 return Err(SklearsError::InvalidInput(
155 "Number of samples must be greater than n_components".to_string(),
156 ));
157 }
158
159 let epsilon = self.epsilon.unwrap_or_else(|| self.estimate_epsilon(&x));
161 let affinity = self.compute_affinity_matrix(&x, epsilon)?;
162
163 let transition_matrix = self.normalize_affinity_matrix(&affinity)?;
165
166 let (eigenvalues, eigenvectors) = self.compute_eigenvectors(&transition_matrix)?;
168
169 let embedding = self.create_embedding(&eigenvalues, &eigenvectors)?;
171
172 Ok(DiffusionMaps {
173 state: DiffusionMapsTrained {
174 embedding,
175 eigenvalues,
176 eigenvectors,
177 affinity_matrix: affinity,
178 epsilon,
179 },
180 n_components: self.n_components,
181 diffusion_time: self.diffusion_time,
182 epsilon: Some(epsilon),
183 alpha: self.alpha,
184 eigen_solver: self.eigen_solver,
185 random_state: self.random_state,
186 n_jobs: self.n_jobs,
187 })
188 }
189}
190
191impl DiffusionMaps<Untrained> {
192 fn estimate_epsilon(&self, x: &Array2<f64>) -> f64 {
193 let n_samples = x.nrows();
194 let mut distances = Vec::new();
195
196 for i in 0..n_samples {
198 for j in i + 1..n_samples {
199 let diff = &x.row(i) - &x.row(j);
200 let dist = diff.mapv(|x| x * x).sum().sqrt();
201 distances.push(dist);
202 }
203 }
204
205 distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
207 distances[distances.len() / 2]
208 }
209
210 fn compute_affinity_matrix(&self, x: &Array2<f64>, epsilon: f64) -> SklResult<Array2<f64>> {
211 let n_samples = x.nrows();
212 let mut affinity = Array2::zeros((n_samples, n_samples));
213
214 for i in 0..n_samples {
216 for j in 0..n_samples {
217 if i != j {
218 let diff = &x.row(i) - &x.row(j);
219 let dist_sq = diff.mapv(|x| x * x).sum();
220 let weight = (-dist_sq / (2.0 * epsilon * epsilon)).exp();
221 affinity[[i, j]] = weight;
222 } else {
223 affinity[[i, j]] = 1.0;
224 }
225 }
226 }
227
228 Ok(affinity)
229 }
230
231 fn normalize_affinity_matrix(&self, affinity: &Array2<f64>) -> SklResult<Array2<f64>> {
232 let n_samples = affinity.nrows();
233 let mut normalized = affinity.clone();
234
235 let mut degrees = Array2::zeros((n_samples, n_samples));
237 for i in 0..n_samples {
238 let degree_sum: f64 = affinity.row(i).sum();
239 degrees[[i, i]] = degree_sum;
240 }
241
242 for i in 0..n_samples {
245 for j in 0..n_samples {
246 if degrees[[i, i]] > 1e-10 && degrees[[j, j]] > 1e-10 {
247 let degree_factor_i = degrees[[i, i]].powf(-self.alpha);
248 let degree_factor_j = degrees[[j, j]].powf(-self.alpha);
249 normalized[[i, j]] = degree_factor_i * affinity[[i, j]] * degree_factor_j;
250 }
251 }
252 }
253
254 for i in 0..n_samples {
256 let row_sum: f64 = normalized.row(i).sum();
257 if row_sum > 1e-10 {
258 for j in 0..n_samples {
259 normalized[[i, j]] /= row_sum;
260 }
261 }
262 }
263
264 Ok(normalized)
265 }
266
267 fn compute_eigenvectors(
268 &self,
269 transition_matrix: &Array2<f64>,
270 ) -> SklResult<(Array2<f64>, Array2<f64>)> {
271 let symmetric_matrix = (transition_matrix + &transition_matrix.t()) / 2.0;
274
275 let (eigenvals, eigenvecs) = symmetric_matrix
277 .eigh(UPLO::Lower)
278 .map_err(|e| SklearsError::InvalidInput(format!("Eigendecomposition failed: {e}")))?;
279
280 let mut eigen_pairs: Vec<(f64, usize)> = eigenvals
282 .iter()
283 .enumerate()
284 .map(|(i, &val)| (val, i))
285 .collect();
286 eigen_pairs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
287
288 let n = eigenvals.len();
290 let mut sorted_eigenvals = Array2::zeros((n, 1));
291 let mut sorted_eigenvecs = Array2::zeros((n, n));
292
293 for (new_idx, &(eigenval, old_idx)) in eigen_pairs.iter().enumerate() {
294 sorted_eigenvals[[new_idx, 0]] = eigenval;
295 for i in 0..n {
296 sorted_eigenvecs[[i, new_idx]] = eigenvecs[[i, old_idx]];
297 }
298 }
299
300 Ok((sorted_eigenvals, sorted_eigenvecs))
301 }
302
303 fn create_embedding(
304 &self,
305 eigenvalues: &Array2<f64>,
306 eigenvectors: &Array2<f64>,
307 ) -> SklResult<Array2<f64>> {
308 let n_samples = eigenvectors.nrows();
309 let mut embedding = Array2::zeros((n_samples, self.n_components));
310
311 for comp_idx in 0..self.n_components {
313 let eigen_idx = comp_idx + 1; if eigen_idx < eigenvalues.nrows() {
315 let eigenval = eigenvalues[[eigen_idx, 0]];
316
317 let diffusion_weight = eigenval.powf(self.diffusion_time as f64);
319
320 for i in 0..n_samples {
321 embedding[[i, comp_idx]] = eigenvectors[[i, eigen_idx]] * diffusion_weight;
322 }
323 }
324 }
325
326 Ok(embedding)
327 }
328}
329
330impl Transform<ArrayView2<'_, Float>, Array2<f64>> for DiffusionMaps<DiffusionMapsTrained> {
331 fn transform(&self, _x: &ArrayView2<'_, Float>) -> SklResult<Array2<f64>> {
332 Ok(self.state.embedding.clone())
334 }
335}
336
337impl DiffusionMaps<DiffusionMapsTrained> {
338 pub fn embedding(&self) -> &Array2<f64> {
340 &self.state.embedding
341 }
342
343 pub fn eigenvalues(&self) -> &Array2<f64> {
345 &self.state.eigenvalues
346 }
347
348 pub fn eigenvectors(&self) -> &Array2<f64> {
350 &self.state.eigenvectors
351 }
352
353 pub fn affinity_matrix(&self) -> &Array2<f64> {
355 &self.state.affinity_matrix
356 }
357
358 pub fn epsilon(&self) -> f64 {
360 self.state.epsilon
361 }
362}