use crate::error::{StatsError, StatsResult};
#[derive(Debug, Clone, Copy, PartialEq)]
#[non_exhaustive]
pub enum SelectionRule {
And,
Or,
}
#[derive(Debug, Clone)]
pub struct NeighborhoodSelectionConfig {
pub alpha: f64,
pub threshold: f64,
pub method: SelectionRule,
pub max_iter: usize,
pub tolerance: f64,
}
impl Default for NeighborhoodSelectionConfig {
fn default() -> Self {
NeighborhoodSelectionConfig {
alpha: 0.1,
threshold: 1e-6,
method: SelectionRule::Or,
max_iter: 1000,
tolerance: 1e-6,
}
}
}
impl NeighborhoodSelectionConfig {
pub fn new(alpha: f64) -> StatsResult<Self> {
if alpha <= 0.0 {
return Err(StatsError::InvalidArgument(
"alpha must be positive".to_string(),
));
}
Ok(NeighborhoodSelectionConfig {
alpha,
..Default::default()
})
}
pub fn with_threshold(mut self, threshold: f64) -> StatsResult<Self> {
if threshold < 0.0 {
return Err(StatsError::InvalidArgument(
"threshold must be non-negative".to_string(),
));
}
self.threshold = threshold;
Ok(self)
}
pub fn with_method(mut self, method: SelectionRule) -> Self {
self.method = method;
self
}
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
pub fn with_tolerance(mut self, tolerance: f64) -> Self {
self.tolerance = tolerance;
self
}
}
#[derive(Debug, Clone)]
pub struct NeighborhoodSelectionResult {
pub adjacency_matrix: Vec<Vec<bool>>,
pub coefficients: Vec<Vec<f64>>,
pub n_edges: usize,
pub n_variables: usize,
}
impl NeighborhoodSelectionResult {
pub fn density(&self) -> f64 {
let p = self.n_variables;
let max_edges = p * (p - 1) / 2;
if max_edges == 0 {
return 0.0;
}
self.n_edges as f64 / max_edges as f64
}
pub fn degrees(&self) -> Vec<usize> {
let p = self.n_variables;
let mut deg = vec![0usize; p];
for i in 0..p {
for j in (i + 1)..p {
if self.adjacency_matrix[i][j] {
deg[i] += 1;
deg[j] += 1;
}
}
}
deg
}
}
pub fn neighborhood_selection(
data: &[Vec<f64>],
config: &NeighborhoodSelectionConfig,
) -> StatsResult<NeighborhoodSelectionResult> {
let n = data.len();
if n < 2 {
return Err(StatsError::InsufficientData(
"Need at least 2 samples for neighborhood selection".to_string(),
));
}
let p = data[0].len();
if p == 0 {
return Err(StatsError::InvalidArgument(
"Data must have at least one variable".to_string(),
));
}
for (i, row) in data.iter().enumerate() {
if row.len() != p {
return Err(StatsError::DimensionMismatch(format!(
"Row {} has length {} but expected {}",
i,
row.len(),
p
)));
}
}
let (standardized, _means, _stds) = standardize_columns(data, n, p)?;
let mut raw_adj = vec![vec![false; p]; p]; let mut coefficients = vec![vec![0.0; p]; p];
for j in 0..p {
let y: Vec<f64> = standardized.iter().map(|row| row[j]).collect();
let x_cols: Vec<usize> = (0..p).filter(|&k| k != j).collect();
let beta = lasso_coordinate_descent(
&standardized,
&y,
&x_cols,
config.alpha,
config.max_iter,
config.tolerance,
)?;
for (idx, &col) in x_cols.iter().enumerate() {
coefficients[j][col] = beta[idx];
if beta[idx].abs() > config.threshold {
raw_adj[j][col] = true;
}
}
}
let mut adj = vec![vec![false; p]; p];
let mut n_edges = 0;
for i in 0..p {
for j in (i + 1)..p {
let edge = match config.method {
SelectionRule::And => raw_adj[i][j] && raw_adj[j][i],
SelectionRule::Or => raw_adj[i][j] || raw_adj[j][i],
_ => raw_adj[i][j] || raw_adj[j][i], };
if edge {
adj[i][j] = true;
adj[j][i] = true;
n_edges += 1;
}
}
}
Ok(NeighborhoodSelectionResult {
adjacency_matrix: adj,
coefficients,
n_edges,
n_variables: p,
})
}
fn standardize_columns(
data: &[Vec<f64>],
n: usize,
p: usize,
) -> StatsResult<(Vec<Vec<f64>>, Vec<f64>, Vec<f64>)> {
let mut means = vec![0.0; p];
let mut stds = vec![0.0; p];
for row in data {
for j in 0..p {
means[j] += row[j];
}
}
for j in 0..p {
means[j] /= n as f64;
}
for row in data {
for j in 0..p {
let diff = row[j] - means[j];
stds[j] += diff * diff;
}
}
for j in 0..p {
stds[j] = (stds[j] / (n - 1).max(1) as f64).sqrt();
if stds[j] < 1e-15 {
stds[j] = 1.0; }
}
let mut standardized = vec![vec![0.0; p]; n];
for i in 0..n {
for j in 0..p {
standardized[i][j] = (data[i][j] - means[j]) / stds[j];
}
}
Ok((standardized, means, stds))
}
fn lasso_coordinate_descent(
data: &[Vec<f64>],
y: &[f64],
x_cols: &[usize],
alpha: f64,
max_iter: usize,
tol: f64,
) -> StatsResult<Vec<f64>> {
let n = data.len();
let n_features = x_cols.len();
let n_f64 = n as f64;
let mut xty = vec![0.0; n_features];
let mut xtx_diag = vec![0.0; n_features];
for k in 0..n_features {
let col = x_cols[k];
for i in 0..n {
xty[k] += data[i][col] * y[i];
xtx_diag[k] += data[i][col] * data[i][col];
}
xty[k] /= n_f64;
xtx_diag[k] /= n_f64;
}
let mut beta = vec![0.0; n_features];
let mut residuals: Vec<f64> = y.to_vec();
for _iter in 0..max_iter {
let mut max_change: f64 = 0.0;
for k in 0..n_features {
let col = x_cols[k];
let old_beta = beta[k];
let mut partial_residual = 0.0;
for i in 0..n {
partial_residual += data[i][col] * (residuals[i] + data[i][col] * old_beta);
}
partial_residual /= n_f64;
let denominator = xtx_diag[k];
if denominator.abs() < 1e-15 {
beta[k] = 0.0;
} else {
beta[k] = soft_threshold(partial_residual, alpha) / denominator;
}
let delta = beta[k] - old_beta;
if delta.abs() > 1e-15 {
for i in 0..n {
residuals[i] -= data[i][col] * delta;
}
}
let change = delta.abs();
if change > max_change {
max_change = change;
}
}
if max_change < tol {
break;
}
}
Ok(beta)
}
#[inline]
fn soft_threshold(x: f64, lambda: f64) -> f64 {
if x > lambda {
x - lambda
} else if x < -lambda {
x + lambda
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
fn generate_ar1_data(n: usize, p: usize, rho: f64) -> Vec<Vec<f64>> {
let mut data = vec![vec![0.0; p]; n];
for i in 0..n {
data[i][0] = ((i as f64 * 1.7 + 0.3).sin() * 2.0).tanh();
for j in 1..p {
let noise = ((i as f64 * (j as f64 + 1.0) * 0.73 + 0.5).sin()).tanh();
data[i][j] = rho * data[i][j - 1] + (1.0 - rho * rho).sqrt() * noise;
}
}
data
}
#[test]
fn test_neighborhood_selection_basic() {
let data = generate_ar1_data(100, 4, 0.5);
let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
assert_eq!(result.n_variables, 4);
assert!(result.n_edges <= 4 * 3 / 2, "Cannot exceed max edges");
}
#[test]
fn test_neighborhood_selection_and_vs_or() {
let data = generate_ar1_data(80, 3, 0.6);
let config_or = NeighborhoodSelectionConfig::new(0.15)
.expect("config creation failed")
.with_method(SelectionRule::Or);
let result_or = neighborhood_selection(&data, &config_or)
.expect("OR selection failed");
let config_and = NeighborhoodSelectionConfig::new(0.15)
.expect("config creation failed")
.with_method(SelectionRule::And);
let result_and = neighborhood_selection(&data, &config_and)
.expect("AND selection failed");
assert!(
result_and.n_edges <= result_or.n_edges,
"AND ({}) should have <= edges than OR ({})",
result_and.n_edges,
result_or.n_edges
);
}
#[test]
fn test_neighborhood_selection_high_regularization_sparse() {
let data = generate_ar1_data(50, 5, 0.3);
let config = NeighborhoodSelectionConfig::new(1.0)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
assert!(
result.n_edges <= 3,
"High regularization should give sparse graph, got {} edges",
result.n_edges
);
}
#[test]
fn test_neighborhood_selection_identity_data() {
let n = 100;
let p = 3;
let mut data = vec![vec![0.0; p]; n];
for i in 0..n {
for j in 0..p {
data[i][j] = ((i as f64 * (j as f64 * 3.7 + 1.1) + 0.9).sin()).tanh();
}
}
let config = NeighborhoodSelectionConfig::new(0.3)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
assert!(
result.n_edges <= 2,
"Independent data should yield sparse graph, got {} edges",
result.n_edges
);
}
#[test]
fn test_neighborhood_selection_symmetry() {
let data = generate_ar1_data(60, 4, 0.4);
let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
let p = result.n_variables;
for i in 0..p {
for j in 0..p {
assert_eq!(
result.adjacency_matrix[i][j],
result.adjacency_matrix[j][i],
"Adjacency matrix should be symmetric at ({}, {})",
i,
j
);
}
}
}
#[test]
fn test_neighborhood_selection_error_insufficient_data() {
let data = vec![vec![1.0, 2.0]]; let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config);
assert!(result.is_err());
}
#[test]
fn test_neighborhood_selection_error_empty_variables() {
let data: Vec<Vec<f64>> = vec![vec![], vec![]];
let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config);
assert!(result.is_err());
}
#[test]
fn test_neighborhood_selection_error_invalid_alpha() {
let result = NeighborhoodSelectionConfig::new(-0.1);
assert!(result.is_err());
let result = NeighborhoodSelectionConfig::new(0.0);
assert!(result.is_err());
}
#[test]
fn test_neighborhood_selection_density() {
let data = generate_ar1_data(50, 4, 0.3);
let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
let density = result.density();
assert!(density >= 0.0 && density <= 1.0, "Density should be in [0, 1]");
}
#[test]
fn test_neighborhood_selection_degrees() {
let data = generate_ar1_data(80, 3, 0.5);
let config = NeighborhoodSelectionConfig::new(0.05)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config)
.expect("neighborhood selection failed");
let degrees = result.degrees();
assert_eq!(degrees.len(), 3);
let total_degree: usize = degrees.iter().sum();
assert_eq!(total_degree, 2 * result.n_edges);
}
#[test]
fn test_neighborhood_selection_dimension_mismatch() {
let data = vec![vec![1.0, 2.0], vec![3.0]]; let config = NeighborhoodSelectionConfig::new(0.1)
.expect("config creation failed");
let result = neighborhood_selection(&data, &config);
assert!(result.is_err());
}
}