use crate::error::{DatasetsError, Result};
use crate::utils::Dataset;
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::random::prelude::*;
use scirs2_core::random::rand_distributions::Distribution;
fn create_rng(randomseed: Option<u64>) -> StdRng {
match randomseed {
Some(seed) => StdRng::seed_from_u64(seed),
None => {
let mut r = thread_rng();
StdRng::seed_from_u64(r.next_u64())
}
}
}
pub fn make_sparse_spd(n: usize, density: f64, randomseed: Option<u64>) -> Result<Dataset> {
if n == 0 {
return Err(DatasetsError::InvalidFormat("n must be > 0".to_string()));
}
if density <= 0.0 || density > 1.0 {
return Err(DatasetsError::InvalidFormat(
"density must be in (0, 1]".to_string(),
));
}
let mut rng = create_rng(randomseed);
let uniform_01 = scirs2_core::random::Uniform::new(0.0, 1.0).map_err(|e| {
DatasetsError::ComputationError(format!("Failed to create uniform dist: {e}"))
})?;
let normal = scirs2_core::random::Normal::new(0.0, 1.0).map_err(|e| {
DatasetsError::ComputationError(format!("Failed to create normal dist: {e}"))
})?;
let mut lower = Array2::zeros((n, n));
for i in 0..n {
let sample_val: f64 = normal.sample(&mut rng);
lower[[i, i]] = 1.0 + sample_val.abs();
}
for i in 1..n {
for j in 0..i {
if uniform_01.sample(&mut rng) < density {
lower[[i, j]] = normal.sample(&mut rng);
}
}
}
let mut matrix = Array2::zeros((n, n));
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0;
for k in 0..=j {
sum += lower[[i, k]] * lower[[j, k]];
}
matrix[[i, j]] = sum;
matrix[[j, i]] = sum;
}
}
let diag_shift = 0.1;
for i in 0..n {
matrix[[i, i]] += diag_shift;
}
let diag = Array1::from_vec((0..n).map(|i| matrix[[i, i]]).collect());
let eps = 1e-14;
let nnz: usize = matrix.iter().filter(|&&v: &&f64| v.abs() > eps).count();
let feature_names: Vec<String> = (0..n).map(|i| format!("col_{i}")).collect();
let dataset = Dataset::new(matrix, Some(diag))
.with_featurenames(feature_names)
.with_description(format!(
"Sparse SPD matrix ({n}x{n}, factor density={density})"
))
.with_metadata("generator", "make_sparse_spd")
.with_metadata("n", &n.to_string())
.with_metadata("density", &density.to_string())
.with_metadata("nnz", &nnz.to_string());
Ok(dataset)
}
pub fn make_sparse_banded(n: usize, bandwidth: usize, randomseed: Option<u64>) -> Result<Dataset> {
if n == 0 {
return Err(DatasetsError::InvalidFormat("n must be > 0".to_string()));
}
if bandwidth >= n {
return Err(DatasetsError::InvalidFormat(format!(
"bandwidth ({bandwidth}) must be < n ({n})"
)));
}
let mut rng = create_rng(randomseed);
let normal = scirs2_core::random::Normal::new(0.0, 1.0).map_err(|e| {
DatasetsError::ComputationError(format!("Failed to create normal dist: {e}"))
})?;
let mut matrix = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
let diff = i.abs_diff(j);
if diff <= bandwidth && i != j {
matrix[[i, j]] = normal.sample(&mut rng);
}
}
}
let mut sym_matrix = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
sym_matrix[[i, j]] = (matrix[[i, j]] + matrix[[j, i]]) / 2.0;
}
}
for i in 0..n {
let off_diag_sum: f64 = (0..n)
.filter(|&j| j != i)
.map(|j| {
let v: f64 = sym_matrix[[i, j]];
v.abs()
})
.sum();
sym_matrix[[i, i]] = 1.0 + off_diag_sum;
}
let diag = Array1::from_vec((0..n).map(|i| sym_matrix[[i, i]]).collect());
let eps = 1e-14;
let nnz: usize = sym_matrix.iter().filter(|&&v: &&f64| v.abs() > eps).count();
let feature_names: Vec<String> = (0..n).map(|i| format!("col_{i}")).collect();
let dataset = Dataset::new(sym_matrix, Some(diag))
.with_featurenames(feature_names)
.with_description(format!(
"Sparse banded matrix ({n}x{n}, bandwidth={bandwidth})"
))
.with_metadata("generator", "make_sparse_banded")
.with_metadata("n", &n.to_string())
.with_metadata("bandwidth", &bandwidth.to_string())
.with_metadata("nnz", &nnz.to_string());
Ok(dataset)
}
pub fn make_sparse_laplacian(n: usize, randomseed: Option<u64>) -> Result<Dataset> {
if n == 0 {
return Err(DatasetsError::InvalidFormat("n must be > 0".to_string()));
}
let mut rng = create_rng(randomseed);
let p_edge = if n == 1 {
0.0
} else {
let p = 3.0 * (n as f64).ln() / n as f64;
p.min(1.0)
};
let uniform = scirs2_core::random::Uniform::new(0.0, 1.0).map_err(|e| {
DatasetsError::ComputationError(format!("Failed to create uniform dist: {e}"))
})?;
let mut adj = Array2::zeros((n, n));
for i in 0..n {
for j in (i + 1)..n {
if uniform.sample(&mut rng) < p_edge {
adj[[i, j]] = 1.0;
adj[[j, i]] = 1.0;
}
}
}
let mut degrees = vec![0.0_f64; n];
for i in 0..n {
for j in 0..n {
degrees[i] += adj[[i, j]];
}
}
let mut laplacian = Array2::zeros((n, n));
for i in 0..n {
laplacian[[i, i]] = degrees[i];
for j in 0..n {
if i != j {
laplacian[[i, j]] = -adj[[i, j]];
}
}
}
let target = Array1::from_vec(degrees.clone());
let n_edges: usize = {
let total_degree: f64 = degrees.iter().sum();
(total_degree / 2.0).round() as usize
};
let feature_names: Vec<String> = (0..n).map(|i| format!("node_{i}")).collect();
let dataset = Dataset::new(laplacian, Some(target))
.with_featurenames(feature_names)
.with_description(format!("Graph Laplacian ({n}x{n}, p_edge={p_edge:.4})"))
.with_metadata("generator", "make_sparse_laplacian")
.with_metadata("n", &n.to_string())
.with_metadata("p_edge", &p_edge.to_string())
.with_metadata("n_edges", &n_edges.to_string());
Ok(dataset)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sparse_spd_shape() {
let ds = make_sparse_spd(50, 0.1, Some(42)).expect("should succeed");
assert_eq!(ds.n_samples(), 50);
assert_eq!(ds.n_features(), 50);
assert!(ds.target.is_some());
let target = ds.target.as_ref().expect("target exists");
assert_eq!(target.len(), 50);
}
#[test]
fn test_sparse_spd_symmetric() {
let n = 30;
let ds = make_sparse_spd(n, 0.2, Some(42)).expect("should succeed");
for i in 0..n {
for j in 0..n {
assert!(
(ds.data[[i, j]] - ds.data[[j, i]]).abs() < 1e-10,
"Not symmetric at ({i},{j}): {} vs {}",
ds.data[[i, j]],
ds.data[[j, i]]
);
}
}
}
#[test]
fn test_sparse_spd_positive_diagonal() {
let n = 40;
let ds = make_sparse_spd(n, 0.15, Some(42)).expect("should succeed");
for i in 0..n {
assert!(
ds.data[[i, i]] > 0.0,
"Diagonal element at [{i},{i}] should be positive, got {}",
ds.data[[i, i]]
);
}
}
#[test]
fn test_sparse_spd_positive_definite() {
let n = 20;
let ds = make_sparse_spd(n, 0.3, Some(42)).expect("should succeed");
let mut rng = StdRng::seed_from_u64(999);
let normal = scirs2_core::random::Normal::new(0.0, 1.0).expect("normal dist");
for trial in 0..10 {
let x: Vec<f64> = (0..n).map(|_| normal.sample(&mut rng)).collect();
let mut xtax = 0.0;
for i in 0..n {
for j in 0..n {
xtax += x[i] * ds.data[[i, j]] * x[j];
}
}
assert!(
xtax > 0.0,
"x^T A x should be positive (trial {trial}), got {xtax}"
);
}
}
#[test]
fn test_sparse_spd_reproducibility() {
let ds1 = make_sparse_spd(25, 0.2, Some(77)).expect("should succeed");
let ds2 = make_sparse_spd(25, 0.2, Some(77)).expect("should succeed");
for i in 0..25 {
for j in 0..25 {
assert!(
(ds1.data[[i, j]] - ds2.data[[i, j]]).abs() < 1e-15,
"Matrix differs at ({i},{j})"
);
}
}
}
#[test]
fn test_sparse_spd_validation() {
assert!(make_sparse_spd(0, 0.1, None).is_err());
assert!(make_sparse_spd(10, 0.0, None).is_err());
assert!(make_sparse_spd(10, -0.1, None).is_err());
assert!(make_sparse_spd(10, 1.1, None).is_err());
}
#[test]
fn test_sparse_banded_shape() {
let ds = make_sparse_banded(100, 3, Some(42)).expect("should succeed");
assert_eq!(ds.n_samples(), 100);
assert_eq!(ds.n_features(), 100);
assert!(ds.target.is_some());
}
#[test]
fn test_sparse_banded_band_structure() {
let n = 50;
let bw = 2;
let ds = make_sparse_banded(n, bw, Some(42)).expect("should succeed");
let eps = 1e-14;
for i in 0..n {
for j in 0..n {
let diff = i.abs_diff(j);
if diff > bw {
assert!(
ds.data[[i, j]].abs() < eps,
"Entry ({i},{j}) outside band should be 0, got {}",
ds.data[[i, j]]
);
}
}
}
}
#[test]
fn test_sparse_banded_symmetric() {
let n = 40;
let ds = make_sparse_banded(n, 3, Some(42)).expect("should succeed");
for i in 0..n {
for j in 0..n {
assert!(
(ds.data[[i, j]] - ds.data[[j, i]]).abs() < 1e-10,
"Not symmetric at ({i},{j})"
);
}
}
}
#[test]
fn test_sparse_banded_diagonally_dominant() {
let n = 30;
let ds = make_sparse_banded(n, 4, Some(42)).expect("should succeed");
for i in 0..n {
let diag_val = ds.data[[i, i]];
let off_diag_sum: f64 = (0..n)
.filter(|&j| j != i)
.map(|j| ds.data[[i, j]].abs())
.sum();
assert!(
diag_val >= 1.0 + off_diag_sum - 1e-10,
"Row {i} not diagonally dominant: diag={diag_val}, off_sum={off_diag_sum}"
);
}
}
#[test]
fn test_sparse_banded_reproducibility() {
let ds1 = make_sparse_banded(30, 2, Some(55)).expect("should succeed");
let ds2 = make_sparse_banded(30, 2, Some(55)).expect("should succeed");
for i in 0..30 {
for j in 0..30 {
assert!(
(ds1.data[[i, j]] - ds2.data[[i, j]]).abs() < 1e-15,
"Matrix differs at ({i},{j})"
);
}
}
}
#[test]
fn test_sparse_banded_zero_bandwidth() {
let n = 10;
let ds = make_sparse_banded(n, 0, Some(42)).expect("should succeed");
let eps = 1e-14;
for i in 0..n {
for j in 0..n {
if i != j {
assert!(
ds.data[[i, j]].abs() < eps,
"Off-diagonal ({i},{j}) should be 0 for bw=0"
);
} else {
assert!(
ds.data[[i, i]] > 0.0,
"Diagonal ({i},{i}) should be positive"
);
}
}
}
}
#[test]
fn test_sparse_banded_validation() {
assert!(make_sparse_banded(0, 1, None).is_err());
assert!(make_sparse_banded(5, 5, None).is_err()); assert!(make_sparse_banded(5, 10, None).is_err()); }
#[test]
fn test_sparse_laplacian_shape() {
let ds = make_sparse_laplacian(50, Some(42)).expect("should succeed");
assert_eq!(ds.n_samples(), 50);
assert_eq!(ds.n_features(), 50);
assert!(ds.target.is_some());
}
#[test]
fn test_sparse_laplacian_symmetric() {
let n = 30;
let ds = make_sparse_laplacian(n, Some(42)).expect("should succeed");
for i in 0..n {
for j in 0..n {
assert!(
(ds.data[[i, j]] - ds.data[[j, i]]).abs() < 1e-10,
"Laplacian not symmetric at ({i},{j})"
);
}
}
}
#[test]
fn test_sparse_laplacian_row_sums_zero() {
let n = 40;
let ds = make_sparse_laplacian(n, Some(42)).expect("should succeed");
for i in 0..n {
let row_sum: f64 = (0..n).map(|j| ds.data[[i, j]]).sum();
assert!(
row_sum.abs() < 1e-10,
"Row {i} sum should be 0, got {row_sum}"
);
}
}
#[test]
fn test_sparse_laplacian_positive_semidefinite() {
let n = 20;
let ds = make_sparse_laplacian(n, Some(42)).expect("should succeed");
let mut rng = StdRng::seed_from_u64(999);
let normal = scirs2_core::random::Normal::new(0.0, 1.0).expect("normal dist");
for trial in 0..10 {
let x: Vec<f64> = (0..n).map(|_| normal.sample(&mut rng)).collect();
let mut xtlx = 0.0;
for i in 0..n {
for j in 0..n {
xtlx += x[i] * ds.data[[i, j]] * x[j];
}
}
assert!(
xtlx >= -1e-10,
"x^T L x should be >= 0 (trial {trial}), got {xtlx}"
);
}
}
#[test]
fn test_sparse_laplacian_off_diagonal_nonpositive() {
let n = 30;
let ds = make_sparse_laplacian(n, Some(42)).expect("should succeed");
for i in 0..n {
for j in 0..n {
if i != j {
assert!(
ds.data[[i, j]] <= 1e-10,
"Off-diagonal L[{i},{j}] should be <= 0, got {}",
ds.data[[i, j]]
);
}
}
}
}
#[test]
fn test_sparse_laplacian_diagonal_equals_degree() {
let n = 25;
let ds = make_sparse_laplacian(n, Some(42)).expect("should succeed");
let target = ds.target.as_ref().expect("target exists");
for i in 0..n {
assert!(
(ds.data[[i, i]] - target[i]).abs() < 1e-10,
"Diagonal L[{i},{i}]={} should equal degree {}",
ds.data[[i, i]],
target[i]
);
}
}
#[test]
fn test_sparse_laplacian_reproducibility() {
let ds1 = make_sparse_laplacian(30, Some(88)).expect("should succeed");
let ds2 = make_sparse_laplacian(30, Some(88)).expect("should succeed");
for i in 0..30 {
for j in 0..30 {
assert!(
(ds1.data[[i, j]] - ds2.data[[i, j]]).abs() < 1e-15,
"Laplacian differs at ({i},{j})"
);
}
}
}
#[test]
fn test_sparse_laplacian_single_node() {
let ds = make_sparse_laplacian(1, Some(42)).expect("should succeed");
assert_eq!(ds.n_samples(), 1);
assert!(ds.data[[0, 0]].abs() < 1e-15);
}
#[test]
fn test_sparse_laplacian_validation() {
assert!(make_sparse_laplacian(0, None).is_err());
}
}