use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::random::prelude::*;
use scirs2_core::random::{Rng, RngExt};
use crate::error::{Result, TransformError};
use scirs2_linalg::eigh;
use statrs::statistics::Statistics;
pub struct SpectralEmbedding {
_ncomponents: usize,
laplacian_type: LaplacianType,
random_state: Option<u64>,
}
#[derive(Clone, Copy, Debug)]
pub enum LaplacianType {
Unnormalized,
NormalizedSymmetric,
NormalizedRandomWalk,
}
impl SpectralEmbedding {
pub fn new(_ncomponents: usize) -> Self {
SpectralEmbedding {
_ncomponents,
laplacian_type: LaplacianType::NormalizedSymmetric,
random_state: None,
}
}
pub fn with_laplacian_type(mut self, laplaciantype: LaplacianType) -> Self {
self.laplacian_type = laplaciantype;
self
}
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
fn compute_laplacian(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
let n = adjacency.shape()[0];
if adjacency.shape()[1] != n {
return Err(TransformError::InvalidInput(
"Adjacency matrix must be square".into(),
));
}
let degrees: Array1<f64> = adjacency.sum_axis(scirs2_core::ndarray::Axis(1));
match self.laplacian_type {
LaplacianType::Unnormalized => {
let mut laplacian = -adjacency.clone();
for i in 0..n {
laplacian[[i, i]] += degrees[i];
}
Ok(laplacian)
}
LaplacianType::NormalizedSymmetric => {
let mut laplacian = Array2::eye(n);
let d_sqrt_inv = degrees.mapv(|d| if d > 0.0 { 1.0 / d.sqrt() } else { 0.0 });
for i in 0..n {
for j in 0..n {
if adjacency[[i, j]] != 0.0 {
laplacian[[i, j]] -= d_sqrt_inv[i] * adjacency[[i, j]] * d_sqrt_inv[j];
}
}
}
Ok(laplacian)
}
LaplacianType::NormalizedRandomWalk => {
let mut laplacian = Array2::eye(n);
let d_inv = degrees.mapv(|d| if d > 0.0 { 1.0 / d } else { 0.0 });
for i in 0..n {
for j in 0..n {
if adjacency[[i, j]] != 0.0 {
laplacian[[i, j]] -= d_inv[i] * adjacency[[i, j]];
}
}
}
Ok(laplacian)
}
}
}
pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
let laplacian = self.compute_laplacian(adjacency)?;
let (eigenvalues, eigenvectors) = eigh(&laplacian.view(), None).map_err(|e| {
TransformError::ComputationError(format!("Eigendecomposition failed: {e}"))
})?;
let mut eigen_pairs: Vec<(f64, Array1<f64>)> = eigenvalues
.iter()
.zip(eigenvectors.columns())
.map(|(&val, vec)| (val, vec.to_owned()))
.collect();
eigen_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
let start_idx = if eigen_pairs[0].0.abs() < 1e-10 { 1 } else { 0 };
let end_idx = (start_idx + self._ncomponents).min(eigen_pairs.len());
let nnodes = adjacency.shape()[0];
let actual_components = end_idx - start_idx;
let mut embedding = Array2::zeros((nnodes, actual_components));
for (col_idx, idx) in (start_idx..end_idx).enumerate() {
embedding.column_mut(col_idx).assign(&eigen_pairs[idx].1);
}
Ok(embedding)
}
}
pub struct DeepWalk {
_embeddingdim: usize,
n_walks: usize,
walk_length: usize,
window_size: usize,
negative_samples: usize,
learning_rate: f64,
n_epochs: usize,
random_state: Option<u64>,
}
impl DeepWalk {
pub fn new(_embeddingdim: usize) -> Self {
DeepWalk {
_embeddingdim,
n_walks: 10,
walk_length: 80,
window_size: 5,
negative_samples: 5,
learning_rate: 0.025,
n_epochs: 1,
random_state: None,
}
}
pub fn with_n_walks(mut self, nwalks: usize) -> Self {
self.n_walks = nwalks;
self
}
pub fn with_walk_length(mut self, walklength: usize) -> Self {
self.walk_length = walklength;
self
}
pub fn with_window_size(mut self, windowsize: usize) -> Self {
self.window_size = windowsize;
self
}
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
fn generate_walks(&self, adjacency: &Array2<f64>) -> Vec<Vec<usize>> {
let nnodes = adjacency.shape()[0];
let mut walks = Vec::with_capacity(nnodes * self.n_walks);
let mut rng = scirs2_core::random::rng();
let mut neighbors: Vec<Vec<usize>> = vec![Vec::new(); nnodes];
for i in 0..nnodes {
for j in 0..nnodes {
if adjacency[[i, j]] > 0.0 {
neighbors[i].push(j);
}
}
}
for start_node in 0..nnodes {
for _ in 0..self.n_walks {
let mut walk = vec![start_node];
let mut current = start_node;
for _ in 1..self.walk_length {
let node_neighbors = &neighbors[current];
if node_neighbors.is_empty() {
break;
}
let next_idx = rng.random_range(0..node_neighbors.len());
current = node_neighbors[next_idx];
walk.push(current);
}
if walk.len() > 1 {
walks.push(walk);
}
}
}
walks
}
fn train_embeddings(&self, walks: &[Vec<usize>], nnodes: usize) -> Array2<f64> {
let mut rng = scirs2_core::random::rng();
let mut embeddings = Array2::zeros((nnodes, self._embeddingdim));
for i in 0..nnodes {
for j in 0..self._embeddingdim {
embeddings[[i, j]] = rng.random_range(-0.5..0.5) / self._embeddingdim as f64;
}
}
let mut context_embeddings = embeddings.clone();
for epoch in 0..self.n_epochs {
let mut _total_loss = 0.0;
let lr = self.learning_rate * (1.0 - epoch as f64 / self.n_epochs as f64);
for walk in walks {
for (idx, ¢er) in walk.iter().enumerate() {
let window_start = idx.saturating_sub(self.window_size);
let window_end = (idx + self.window_size + 1).min(walk.len());
#[allow(clippy::needless_range_loop)]
for context_idx in window_start..window_end {
if context_idx == idx {
continue;
}
let context = walk[context_idx];
let center_vec = embeddings.row(center).to_owned();
let context_vec = context_embeddings.row(context).to_owned();
let dot_product = center_vec.dot(&context_vec);
let sigmoid = 1.0 / (1.0 + (-dot_product).exp());
let grad_coef = lr * (1.0 - sigmoid);
let center_grad = &context_vec * grad_coef;
let context_grad = ¢er_vec * grad_coef;
embeddings.row_mut(center).scaled_add(1.0, ¢er_grad);
context_embeddings
.row_mut(context)
.scaled_add(1.0, &context_grad);
_total_loss += -(sigmoid.ln());
for _ in 0..self.negative_samples {
let negative = rng.random_range(0..nnodes);
if negative == context {
continue;
}
let neg_vec = context_embeddings.row(negative).to_owned();
let neg_dot = center_vec.dot(&neg_vec);
let neg_sigmoid = 1.0 / (1.0 + (-neg_dot).exp());
let neg_grad_coef = -lr * neg_sigmoid;
let center_neg_grad = &neg_vec * neg_grad_coef;
let neg_grad = ¢er_vec * neg_grad_coef;
embeddings.row_mut(center).scaled_add(1.0, ¢er_neg_grad);
context_embeddings
.row_mut(negative)
.scaled_add(1.0, &neg_grad);
_total_loss += -((1.0 - neg_sigmoid).ln());
}
}
}
}
}
embeddings
}
pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
let walks = self.generate_walks(adjacency);
if walks.is_empty() {
return Err(TransformError::InvalidInput(
"No valid walks generated".into(),
));
}
let embeddings = self.train_embeddings(&walks, adjacency.shape()[0]);
Ok(embeddings)
}
}
pub struct Node2Vec {
base_model: DeepWalk,
p: f64,
q: f64,
}
impl Node2Vec {
pub fn new(_embeddingdim: usize, p: f64, q: f64) -> Self {
Node2Vec {
base_model: DeepWalk::new(_embeddingdim),
p,
q,
}
}
pub fn configure_base<F>(mut self, f: F) -> Self
where
F: FnOnce(DeepWalk) -> DeepWalk,
{
self.base_model = f(self.base_model);
self
}
fn generate_biased_walks(&self, adjacency: &Array2<f64>) -> Vec<Vec<usize>> {
let nnodes = adjacency.shape()[0];
let mut walks = Vec::with_capacity(nnodes * self.base_model.n_walks);
let mut rng = if let Some(seed) = self.base_model.random_state {
StdRng::seed_from_u64(seed)
} else {
StdRng::seed_from_u64(scirs2_core::random::random::<u64>())
};
let mut neighbors: Vec<Vec<usize>> = vec![Vec::new(); nnodes];
for i in 0..nnodes {
for j in 0..nnodes {
if adjacency[[i, j]] > 0.0 {
neighbors[i].push(j);
}
}
}
for start_node in 0..nnodes {
for _ in 0..self.base_model.n_walks {
let mut walk = vec![start_node];
if neighbors[start_node].is_empty() {
continue;
}
let first_step =
neighbors[start_node][rng.random_range(0..neighbors[start_node].len())];
walk.push(first_step);
for _ in 2..self.base_model.walk_length {
let current = *walk.last().expect("Operation failed");
let prev = walk[walk.len() - 2];
let current_neighbors = &neighbors[current];
if current_neighbors.is_empty() {
break;
}
let mut probs = Vec::with_capacity(current_neighbors.len());
let mut total_prob = 0.0;
for &next in current_neighbors {
let prob = if next == prev {
1.0 / self.p } else if adjacency[[next, prev]] > 0.0 {
1.0 } else {
1.0 / self.q };
probs.push(prob);
total_prob += prob;
}
for p in &mut probs {
*p /= total_prob;
}
let rand_val: f64 = rng.random();
let mut cumsum = 0.0;
let mut next_node = current_neighbors[0];
for (idx, &prob) in probs.iter().enumerate() {
cumsum += prob;
if rand_val <= cumsum {
next_node = current_neighbors[idx];
break;
}
}
walk.push(next_node);
}
if walk.len() > 1 {
walks.push(walk);
}
}
}
walks
}
pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
let walks = self.generate_biased_walks(adjacency);
if walks.is_empty() {
return Err(TransformError::InvalidInput(
"No valid walks generated".into(),
));
}
let embeddings = self
.base_model
.train_embeddings(&walks, adjacency.shape()[0]);
Ok(embeddings)
}
}
pub struct GraphAutoencoder {
_encoderdims: Vec<usize>,
activation: ActivationType,
learning_rate: f64,
n_epochs: usize,
}
#[derive(Clone, Copy, Debug)]
pub enum ActivationType {
ReLU,
Sigmoid,
Tanh,
}
impl GraphAutoencoder {
pub fn new(_encoderdims: Vec<usize>) -> Self {
GraphAutoencoder {
_encoderdims,
activation: ActivationType::ReLU,
learning_rate: 0.01,
n_epochs: 200,
}
}
pub fn with_activation(mut self, activation: ActivationType) -> Self {
self.activation = activation;
self
}
pub fn with_learning_rate(mut self, lr: f64) -> Self {
self.learning_rate = lr;
self
}
pub fn with_n_epochs(mut self, nepochs: usize) -> Self {
self.n_epochs = nepochs;
self
}
fn activate(&self, x: &Array2<f64>) -> Array2<f64> {
match self.activation {
ActivationType::ReLU => x.mapv(|v| v.max(0.0)),
ActivationType::Sigmoid => x.mapv(|v| 1.0 / (1.0 + (-v).exp())),
ActivationType::Tanh => x.mapv(|v| v.tanh()),
}
}
fn activate_derivative(&self, x: &Array2<f64>) -> Array2<f64> {
match self.activation {
ActivationType::ReLU => x.mapv(|v| if v > 0.0 { 1.0 } else { 0.0 }),
ActivationType::Sigmoid => {
let sig = self.activate(x);
&sig * &(1.0 - &sig)
}
ActivationType::Tanh => {
let tanh = x.mapv(|v| v.tanh());
1.0 - &tanh * &tanh
}
}
}
pub fn fit_transform(&self, adjacency: &Array2<f64>) -> Result<Array2<f64>> {
let nnodes = adjacency.shape()[0];
if self._encoderdims.is_empty() || self._encoderdims[0] != nnodes {
return Err(TransformError::InvalidInput(format!(
"First encoder dimension must match number of nodes ({nnodes})"
)));
}
let mut rng = scirs2_core::random::rng();
let mut encoder_weights = Vec::new();
for i in 0..self._encoderdims.len() - 1 {
let (n_in, n_out) = (self._encoderdims[i], self._encoderdims[i + 1]);
let scale = (2.0 / n_in as f64).sqrt();
let mut w = Array2::zeros((n_in, n_out));
for j in 0..n_in {
for k in 0..n_out {
w[[j, k]] = rng.random_range(-scale..scale);
}
}
encoder_weights.push(w);
}
let mut decoder_weights: Vec<Array2<f64>> = encoder_weights
.iter()
.rev()
.map(|w| w.t().to_owned())
.collect();
let features = adjacency.clone();
for _epoch in 0..self.n_epochs {
let mut activations = vec![features.clone()];
let mut z = features.clone();
for (i, w) in encoder_weights.iter().enumerate() {
z = z.dot(w);
if i < encoder_weights.len() - 1 {
z = self.activate(&z);
}
activations.push(z.clone());
}
let _embeddings = z.clone();
for (i, w) in decoder_weights.iter().enumerate() {
z = z.dot(w);
if i < decoder_weights.len() - 1 {
z = self.activate(&z);
}
}
let reconstruction = z.mapv(|v| 1.0 / (1.0 + (-v).exp()));
let loss = -adjacency * &reconstruction.mapv(|v| (v + 1e-8).ln())
- (1.0 - adjacency) * &reconstruction.mapv(|v| (1.0 - v + 1e-8).ln());
let _avg_loss = loss.mean();
let mut delta = &reconstruction - adjacency;
delta *= self.learning_rate;
let decoder_len = decoder_weights.len();
for (i, w) in decoder_weights.iter_mut().rev().enumerate() {
let layer_idx = activations.len() - 2 - i;
let grad = activations[layer_idx].t().dot(&delta);
*w -= &grad;
if i < decoder_len - 1 {
delta = delta.dot(&w.t());
delta *= &self.activate_derivative(&activations[layer_idx]);
}
}
let encoder_len = encoder_weights.len();
for (i, w) in encoder_weights.iter_mut().enumerate() {
let grad = activations[i].t().dot(&delta);
*w -= &grad;
if i < encoder_len - 1 {
delta = delta.dot(&w.t());
delta *= &self.activate_derivative(&activations[i]);
}
}
}
let mut z = features;
for (i, w) in encoder_weights.iter().enumerate() {
z = z.dot(w);
if i < encoder_weights.len() - 1 {
z = self.activate(&z);
}
}
Ok(z)
}
}
#[allow(dead_code)]
pub fn edge_list_to_adjacency(edges: &[(usize, usize)], nnodes: Option<usize>) -> Array2<f64> {
let max_node = edges
.iter()
.flat_map(|(u, v)| vec![*u, *v])
.max()
.unwrap_or(0);
let n = nnodes.unwrap_or(max_node + 1);
let mut adjacency = Array2::zeros((n, n));
for &(u, v) in edges {
if u < n && v < n {
adjacency[[u, v]] = 1.0;
adjacency[[v, u]] = 1.0; }
}
adjacency
}
#[allow(dead_code)]
pub fn adjacency_to_edge_list(adjacency: &Array2<f64>) -> Vec<(usize, usize)> {
let mut edges = Vec::new();
let n = adjacency.shape()[0];
for i in 0..n {
for j in i + 1..n {
if adjacency[[i, j]] > 0.0 {
edges.push((i, j));
}
}
}
edges
}