1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::prelude::*;
8use scirs2_core::random::Rng;
9
10use crate::error::{Result, TransformError};
11use scirs2_linalg::eigh;
12use statrs::statistics::Statistics;
13
14pub struct SpectralEmbedding {
16 _ncomponents: usize,
18 laplacian_type: LaplacianType,
20 random_state: Option<u64>,
22}
23
24#[derive(Clone, Copy, Debug)]
26pub enum LaplacianType {
27 Unnormalized,
29 NormalizedSymmetric,
31 NormalizedRandomWalk,
33}
34
35impl SpectralEmbedding {
36 pub fn new(_ncomponents: usize) -> Self {
38 SpectralEmbedding {
39 _ncomponents,
40 laplacian_type: LaplacianType::NormalizedSymmetric,
41 random_state: None,
42 }
43 }
44
45 pub fn with_laplacian_type(mut self, laplaciantype: LaplacianType) -> Self {
47 self.laplacian_type = laplaciantype;
48 self
49 }
50
51 pub fn with_random_state(mut self, seed: u64) -> Self {
53 self.random_state = Some(seed);
54 self
55 }
56
57 fn compute_laplacian(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
59 let n = adjacency.shape()[0];
60 if adjacency.shape()[1] != n {
61 return Err(TransformError::InvalidInput(
62 "Adjacency matrix must be square".into(),
63 ));
64 }
65
66 let degrees: Array1<f64> = adjacency.sum_axis(scirs2_core::ndarray::Axis(1));
68
69 match self.laplacian_type {
70 LaplacianType::Unnormalized => {
71 let mut laplacian = -adjacency.clone();
73 for i in 0..n {
74 laplacian[[i, i]] += degrees[i];
75 }
76 Ok(laplacian)
77 }
78 LaplacianType::NormalizedSymmetric => {
79 let mut laplacian = Array2::eye(n);
81 let d_sqrt_inv = degrees.mapv(|d| if d > 0.0 { 1.0 / d.sqrt() } else { 0.0 });
82
83 for i in 0..n {
84 for j in 0..n {
85 if adjacency[[i, j]] != 0.0 {
86 laplacian[[i, j]] -= d_sqrt_inv[i] * adjacency[[i, j]] * d_sqrt_inv[j];
87 }
88 }
89 }
90 Ok(laplacian)
91 }
92 LaplacianType::NormalizedRandomWalk => {
93 let mut laplacian = Array2::eye(n);
95 let d_inv = degrees.mapv(|d| if d > 0.0 { 1.0 / d } else { 0.0 });
96
97 for i in 0..n {
98 for j in 0..n {
99 if adjacency[[i, j]] != 0.0 {
100 laplacian[[i, j]] -= d_inv[i] * adjacency[[i, j]];
101 }
102 }
103 }
104 Ok(laplacian)
105 }
106 }
107 }
108
109 pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
111 let laplacian = self.compute_laplacian(adjacency)?;
112
113 let (eigenvalues, eigenvectors) = eigh(&laplacian.view(), None).map_err(|e| {
115 TransformError::ComputationError(format!("Eigendecomposition failed: {e}"))
116 })?;
117
118 let mut eigen_pairs: Vec<(f64, Array1<f64>)> = eigenvalues
120 .iter()
121 .zip(eigenvectors.columns())
122 .map(|(&val, vec)| (val, vec.to_owned()))
123 .collect();
124
125 eigen_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
127
128 let start_idx = if eigen_pairs[0].0.abs() < 1e-10 { 1 } else { 0 };
130 let end_idx = (start_idx + self._ncomponents).min(eigen_pairs.len());
131
132 let nnodes = adjacency.shape()[0];
134 let actual_components = end_idx - start_idx;
135 let mut embedding = Array2::zeros((nnodes, actual_components));
136
137 for (col_idx, idx) in (start_idx..end_idx).enumerate() {
138 embedding.column_mut(col_idx).assign(&eigen_pairs[idx].1);
139 }
140
141 Ok(embedding)
142 }
143}
144
145pub struct DeepWalk {
147 _embeddingdim: usize,
149 n_walks: usize,
151 walk_length: usize,
153 window_size: usize,
155 negative_samples: usize,
157 learning_rate: f64,
159 n_epochs: usize,
161 random_state: Option<u64>,
163}
164
165impl DeepWalk {
166 pub fn new(_embeddingdim: usize) -> Self {
168 DeepWalk {
169 _embeddingdim,
170 n_walks: 10,
171 walk_length: 80,
172 window_size: 5,
173 negative_samples: 5,
174 learning_rate: 0.025,
175 n_epochs: 1,
176 random_state: None,
177 }
178 }
179
180 pub fn with_n_walks(mut self, nwalks: usize) -> Self {
182 self.n_walks = nwalks;
183 self
184 }
185
186 pub fn with_walk_length(mut self, walklength: usize) -> Self {
188 self.walk_length = walklength;
189 self
190 }
191
192 pub fn with_window_size(mut self, windowsize: usize) -> Self {
194 self.window_size = windowsize;
195 self
196 }
197
198 pub fn with_random_state(mut self, seed: u64) -> Self {
200 self.random_state = Some(seed);
201 self
202 }
203
204 fn generate_walks(&self, adjacency: &Array2<f64>) -> Vec<Vec<usize>> {
206 let nnodes = adjacency.shape()[0];
207 let mut walks = Vec::with_capacity(nnodes * self.n_walks);
208
209 let mut rng = scirs2_core::random::rng();
210
211 let mut neighbors: Vec<Vec<usize>> = vec![Vec::new(); nnodes];
213 for i in 0..nnodes {
214 for j in 0..nnodes {
215 if adjacency[[i, j]] > 0.0 {
216 neighbors[i].push(j);
217 }
218 }
219 }
220
221 for start_node in 0..nnodes {
223 for _ in 0..self.n_walks {
224 let mut walk = vec![start_node];
225 let mut current = start_node;
226
227 for _ in 1..self.walk_length {
228 let node_neighbors = &neighbors[current];
229 if node_neighbors.is_empty() {
230 break;
231 }
232
233 let next_idx = rng.gen_range(0..node_neighbors.len());
235 current = node_neighbors[next_idx];
236 walk.push(current);
237 }
238
239 if walk.len() > 1 {
240 walks.push(walk);
241 }
242 }
243 }
244
245 walks
246 }
247
248 fn train_embeddings(&self, walks: &[Vec<usize>], nnodes: usize) -> Array2<f64> {
250 let mut rng = scirs2_core::random::rng();
251
252 let mut embeddings = Array2::zeros((nnodes, self._embeddingdim));
254 for i in 0..nnodes {
255 for j in 0..self._embeddingdim {
256 embeddings[[i, j]] = rng.gen_range(-0.5..0.5) / self._embeddingdim as f64;
257 }
258 }
259
260 let mut context_embeddings = embeddings.clone();
261
262 for epoch in 0..self.n_epochs {
264 let mut _total_loss = 0.0;
265 let lr = self.learning_rate * (1.0 - epoch as f64 / self.n_epochs as f64);
266
267 for walk in walks {
268 for (idx, ¢er) in walk.iter().enumerate() {
269 let window_start = idx.saturating_sub(self.window_size);
271 let window_end = (idx + self.window_size + 1).min(walk.len());
272
273 #[allow(clippy::needless_range_loop)]
274 for context_idx in window_start..window_end {
275 if context_idx == idx {
276 continue;
277 }
278
279 let context = walk[context_idx];
280
281 let center_vec = embeddings.row(center).to_owned();
283 let context_vec = context_embeddings.row(context).to_owned();
284 let dot_product = center_vec.dot(&context_vec);
285 let sigmoid = 1.0 / (1.0 + (-dot_product).exp());
286
287 let grad_coef = lr * (1.0 - sigmoid);
289 let center_grad = &context_vec * grad_coef;
290 let context_grad = ¢er_vec * grad_coef;
291
292 embeddings.row_mut(center).scaled_add(1.0, ¢er_grad);
294 context_embeddings
295 .row_mut(context)
296 .scaled_add(1.0, &context_grad);
297
298 _total_loss += -(sigmoid.ln());
299
300 for _ in 0..self.negative_samples {
302 let negative = rng.gen_range(0..nnodes);
303 if negative == context {
304 continue;
305 }
306
307 let neg_vec = context_embeddings.row(negative).to_owned();
308 let neg_dot = center_vec.dot(&neg_vec);
309 let neg_sigmoid = 1.0 / (1.0 + (-neg_dot).exp());
310
311 let neg_grad_coef = -lr * neg_sigmoid;
313 let center_neg_grad = &neg_vec * neg_grad_coef;
314 let neg_grad = ¢er_vec * neg_grad_coef;
315
316 embeddings.row_mut(center).scaled_add(1.0, ¢er_neg_grad);
318 context_embeddings
319 .row_mut(negative)
320 .scaled_add(1.0, &neg_grad);
321
322 _total_loss += -((1.0 - neg_sigmoid).ln());
323 }
324 }
325 }
326 }
327 }
328
329 embeddings
330 }
331
332 pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
334 let walks = self.generate_walks(adjacency);
335 if walks.is_empty() {
336 return Err(TransformError::InvalidInput(
337 "No valid walks generated".into(),
338 ));
339 }
340
341 let embeddings = self.train_embeddings(&walks, adjacency.shape()[0]);
342 Ok(embeddings)
343 }
344}
345
346pub struct Node2Vec {
348 base_model: DeepWalk,
350 p: f64,
352 q: f64,
354}
355
356impl Node2Vec {
357 pub fn new(_embeddingdim: usize, p: f64, q: f64) -> Self {
359 Node2Vec {
360 base_model: DeepWalk::new(_embeddingdim),
361 p,
362 q,
363 }
364 }
365
366 pub fn configure_base<F>(mut self, f: F) -> Self
368 where
369 F: FnOnce(DeepWalk) -> DeepWalk,
370 {
371 self.base_model = f(self.base_model);
372 self
373 }
374
375 fn generate_biased_walks(&self, adjacency: &Array2<f64>) -> Vec<Vec<usize>> {
377 let nnodes = adjacency.shape()[0];
378 let mut walks = Vec::with_capacity(nnodes * self.base_model.n_walks);
379
380 let mut rng = if let Some(seed) = self.base_model.random_state {
381 StdRng::seed_from_u64(seed)
382 } else {
383 StdRng::seed_from_u64(scirs2_core::random::random::<u64>())
384 };
385
386 let mut neighbors: Vec<Vec<usize>> = vec![Vec::new(); nnodes];
388 for i in 0..nnodes {
389 for j in 0..nnodes {
390 if adjacency[[i, j]] > 0.0 {
391 neighbors[i].push(j);
392 }
393 }
394 }
395
396 for start_node in 0..nnodes {
398 for _ in 0..self.base_model.n_walks {
399 let mut walk = vec![start_node];
400
401 if neighbors[start_node].is_empty() {
402 continue;
403 }
404
405 let first_step =
407 neighbors[start_node][rng.gen_range(0..neighbors[start_node].len())];
408 walk.push(first_step);
409
410 for _ in 2..self.base_model.walk_length {
412 let current = *walk.last().unwrap();
413 let prev = walk[walk.len() - 2];
414
415 let current_neighbors = &neighbors[current];
416 if current_neighbors.is_empty() {
417 break;
418 }
419
420 let mut probs = Vec::with_capacity(current_neighbors.len());
422 let mut total_prob = 0.0;
423
424 for &next in current_neighbors {
425 let prob = if next == prev {
426 1.0 / self.p } else if adjacency[[next, prev]] > 0.0 {
428 1.0 } else {
430 1.0 / self.q };
432
433 probs.push(prob);
434 total_prob += prob;
435 }
436
437 for p in &mut probs {
439 *p /= total_prob;
440 }
441
442 let rand_val: f64 = rng.random();
444 let mut cumsum = 0.0;
445 let mut next_node = current_neighbors[0];
446
447 for (idx, &prob) in probs.iter().enumerate() {
448 cumsum += prob;
449 if rand_val <= cumsum {
450 next_node = current_neighbors[idx];
451 break;
452 }
453 }
454
455 walk.push(next_node);
456 }
457
458 if walk.len() > 1 {
459 walks.push(walk);
460 }
461 }
462 }
463
464 walks
465 }
466
467 pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
469 let walks = self.generate_biased_walks(adjacency);
470 if walks.is_empty() {
471 return Err(TransformError::InvalidInput(
472 "No valid walks generated".into(),
473 ));
474 }
475
476 let embeddings = self
477 .base_model
478 .train_embeddings(&walks, adjacency.shape()[0]);
479 Ok(embeddings)
480 }
481}
482
483pub struct GraphAutoencoder {
485 _encoderdims: Vec<usize>,
487 activation: ActivationType,
489 learning_rate: f64,
491 n_epochs: usize,
493}
494
495#[derive(Clone, Copy, Debug)]
497pub enum ActivationType {
498 ReLU,
500 Sigmoid,
502 Tanh,
504}
505
506impl GraphAutoencoder {
507 pub fn new(_encoderdims: Vec<usize>) -> Self {
509 GraphAutoencoder {
510 _encoderdims,
511 activation: ActivationType::ReLU,
512 learning_rate: 0.01,
513 n_epochs: 200,
514 }
515 }
516
517 pub fn with_activation(mut self, activation: ActivationType) -> Self {
519 self.activation = activation;
520 self
521 }
522
523 pub fn with_learning_rate(mut self, lr: f64) -> Self {
525 self.learning_rate = lr;
526 self
527 }
528
529 pub fn with_n_epochs(mut self, nepochs: usize) -> Self {
531 self.n_epochs = nepochs;
532 self
533 }
534
535 fn activate(&self, x: &Array2<f64>) -> Array2<f64> {
537 match self.activation {
538 ActivationType::ReLU => x.mapv(|v| v.max(0.0)),
539 ActivationType::Sigmoid => x.mapv(|v| 1.0 / (1.0 + (-v).exp())),
540 ActivationType::Tanh => x.mapv(|v| v.tanh()),
541 }
542 }
543
544 fn activate_derivative(&self, x: &Array2<f64>) -> Array2<f64> {
546 match self.activation {
547 ActivationType::ReLU => x.mapv(|v| if v > 0.0 { 1.0 } else { 0.0 }),
548 ActivationType::Sigmoid => {
549 let sig = self.activate(x);
550 &sig * &(1.0 - &sig)
551 }
552 ActivationType::Tanh => {
553 let tanh = x.mapv(|v| v.tanh());
554 1.0 - &tanh * &tanh
555 }
556 }
557 }
558
559 pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
561 let nnodes = adjacency.shape()[0];
562
563 if self._encoderdims.is_empty() || self._encoderdims[0] != nnodes {
564 return Err(TransformError::InvalidInput(format!(
565 "First encoder dimension must match number of nodes ({nnodes})"
566 )));
567 }
568
569 let mut rng = scirs2_core::random::rng();
570
571 let mut encoder_weights = Vec::new();
573 for i in 0..self._encoderdims.len() - 1 {
574 let (n_in, n_out) = (self._encoderdims[i], self._encoderdims[i + 1]);
575 let scale = (2.0 / n_in as f64).sqrt();
576 let mut w = Array2::zeros((n_in, n_out));
577
578 for j in 0..n_in {
579 for k in 0..n_out {
580 w[[j, k]] = rng.gen_range(-scale..scale);
581 }
582 }
583 encoder_weights.push(w);
584 }
585
586 let mut decoder_weights: Vec<Array2<f64>> = encoder_weights
588 .iter()
589 .rev()
590 .map(|w| w.t().to_owned())
591 .collect();
592
593 let features = adjacency.clone();
595
596 for _epoch in 0..self.n_epochs {
597 let mut activations = vec![features.clone()];
599 let mut z = features.clone();
600
601 for (i, w) in encoder_weights.iter().enumerate() {
602 z = z.dot(w);
603 if i < encoder_weights.len() - 1 {
604 z = self.activate(&z);
605 }
606 activations.push(z.clone());
607 }
608
609 let _embeddings = z.clone();
611
612 for (i, w) in decoder_weights.iter().enumerate() {
614 z = z.dot(w);
615 if i < decoder_weights.len() - 1 {
616 z = self.activate(&z);
617 }
618 }
619
620 let reconstruction = z.mapv(|v| 1.0 / (1.0 + (-v).exp()));
622
623 let loss = -adjacency * &reconstruction.mapv(|v| (v + 1e-8).ln())
625 - (1.0 - adjacency) * &reconstruction.mapv(|v| (1.0 - v + 1e-8).ln());
626 let _avg_loss = loss.mean();
627
628 let mut delta = &reconstruction - adjacency;
630 delta *= self.learning_rate;
631
632 let decoder_len = decoder_weights.len();
634 for (i, w) in decoder_weights.iter_mut().rev().enumerate() {
635 let layer_idx = activations.len() - 2 - i;
636 let grad = activations[layer_idx].t().dot(&delta);
637 *w -= &grad;
638
639 if i < decoder_len - 1 {
640 delta = delta.dot(&w.t());
641 delta *= &self.activate_derivative(&activations[layer_idx]);
642 }
643 }
644
645 let encoder_len = encoder_weights.len();
647 for (i, w) in encoder_weights.iter_mut().enumerate() {
648 let grad = activations[i].t().dot(&delta);
649 *w -= &grad;
650
651 if i < encoder_len - 1 {
652 delta = delta.dot(&w.t());
653 delta *= &self.activate_derivative(&activations[i]);
654 }
655 }
656 }
657
658 let mut z = features;
660 for (i, w) in encoder_weights.iter().enumerate() {
661 z = z.dot(w);
662 if i < encoder_weights.len() - 1 {
663 z = self.activate(&z);
664 }
665 }
666
667 Ok(z)
668 }
669}
670
671#[allow(dead_code)]
673pub fn edge_list_to_adjacency(edges: &[(usize, usize)], nnodes: Option<usize>) -> Array2<f64> {
674 let max_node = edges
675 .iter()
676 .flat_map(|(u, v)| vec![*u, *v])
677 .max()
678 .unwrap_or(0);
679
680 let n = nnodes.unwrap_or(max_node + 1);
681 let mut adjacency = Array2::zeros((n, n));
682
683 for &(u, v) in edges {
684 if u < n && v < n {
685 adjacency[[u, v]] = 1.0;
686 adjacency[[v, u]] = 1.0; }
688 }
689
690 adjacency
691}
692
693#[allow(dead_code)]
695pub fn adjacency_to_edge_list(adjacency: &Array2<f64>) -> Vec<(usize, usize)> {
696 let mut edges = Vec::new();
697 let n = adjacency.shape()[0];
698
699 for i in 0..n {
700 for j in i + 1..n {
701 if adjacency[[i, j]] > 0.0 {
703 edges.push((i, j));
704 }
705 }
706 }
707
708 edges
709}