use crate::error::{OptimizeError, OptimizeResult};
use scirs2_core::ndarray::{Array1, Array2};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EmbeddingType {
GaussianRandom,
SparseRandom,
FastJohnsonLindenstrauss,
}
#[derive(Debug, Clone)]
pub struct SubspaceEmbeddingConfig {
pub original_dim: usize,
pub embedding_dim: usize,
pub embedding_type: EmbeddingType,
pub seed: u64,
}
pub struct SubspaceEmbedding {
config: SubspaceEmbeddingConfig,
matrix: Array2<f64>,
}
impl SubspaceEmbedding {
pub fn new(config: SubspaceEmbeddingConfig) -> OptimizeResult<Self> {
if config.original_dim == 0 || config.embedding_dim == 0 {
return Err(OptimizeError::InvalidInput(
"SubspaceEmbedding: dimensions must be non-zero".to_string(),
));
}
let matrix = match config.embedding_type {
EmbeddingType::GaussianRandom => {
gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
}
EmbeddingType::SparseRandom => {
sparse_projection(config.embedding_dim, config.original_dim, config.seed)
}
EmbeddingType::FastJohnsonLindenstrauss => {
gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
}
};
Ok(Self { config, matrix })
}
pub fn project(&self, x: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
if x.len() != self.config.original_dim {
return Err(OptimizeError::ValueError(format!(
"project: expected dim {}, got {}",
self.config.original_dim,
x.len()
)));
}
Ok(self.matrix.dot(x))
}
pub fn reconstruct(&self, y: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
if y.len() != self.config.embedding_dim {
return Err(OptimizeError::ValueError(format!(
"reconstruct: expected dim {}, got {}",
self.config.embedding_dim,
y.len()
)));
}
Ok(self.matrix.t().dot(y))
}
pub fn project_matrix(&self, a: &Array2<f64>) -> OptimizeResult<Array2<f64>> {
if a.nrows() != self.config.original_dim {
return Err(OptimizeError::ValueError(format!(
"project_matrix: expected {} rows, got {}",
self.config.original_dim,
a.nrows()
)));
}
Ok(self.matrix.dot(a))
}
pub fn embedding_dim(&self) -> usize {
self.config.embedding_dim
}
pub fn original_dim(&self) -> usize {
self.config.original_dim
}
pub fn jl_epsilon(&self, n_points: usize, delta: f64) -> f64 {
let k = self.config.embedding_dim as f64;
(2.0 * (n_points as f64 / delta).ln() / k).sqrt()
}
}
fn gaussian_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
let scale = (k as f64).recip().sqrt();
let mut state = seed.wrapping_add(1); let data: Vec<f64> = (0..k * n)
.map(|_| {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
let u1 = ((state >> 11) as f64) / ((1u64 << 53) as f64) + 1e-300;
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
let u2 = ((state >> 11) as f64) / ((1u64 << 53) as f64);
(-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos() * scale
})
.collect();
Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
}
fn sparse_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
let scale = (3.0_f64 / k as f64).sqrt();
let mut state = seed.wrapping_add(1);
let data: Vec<f64> = (0..k * n)
.map(|_| {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
let r = (state >> 32) % 6;
if r == 0 {
scale
} else if r == 1 {
-scale
} else {
0.0
}
})
.collect();
Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
}
pub fn sketched_least_squares(
a: &Array2<f64>,
b: &Array1<f64>,
k: usize,
seed: u64,
) -> OptimizeResult<Array1<f64>> {
let (m, n) = (a.nrows(), a.ncols());
if m == 0 || n == 0 {
return Err(OptimizeError::InvalidInput(
"sketched_least_squares: matrix must be non-empty".to_string(),
));
}
if b.len() != m {
return Err(OptimizeError::ValueError(format!(
"sketched_least_squares: b length {} does not match matrix rows {m}",
b.len()
)));
}
if k == 0 {
return Err(OptimizeError::InvalidInput(
"sketched_least_squares: sketch dimension k must be > 0".to_string(),
));
}
let config = SubspaceEmbeddingConfig {
original_dim: m,
embedding_dim: k,
embedding_type: EmbeddingType::GaussianRandom,
seed,
};
let emb = SubspaceEmbedding::new(config)?;
let sa = emb.project_matrix(a)?;
let sb = emb.matrix.dot(b);
let sat = sa.t().to_owned(); let satsa = sat.dot(&sa); let satsb = sat.dot(&sb);
solve_dense_linear(&satsa, &satsb)
}
fn solve_dense_linear(a: &Array2<f64>, b: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
let n = a.nrows();
if n == 0 {
return Ok(Array1::zeros(0));
}
let mut mat: Vec<f64> = a.iter().cloned().collect();
let mut rhs: Vec<f64> = b.iter().cloned().collect();
for col in 0..n {
let mut max_row = col;
let mut max_val = mat[col * n + col].abs();
for row in (col + 1)..n {
let v = mat[row * n + col].abs();
if v > max_val {
max_val = v;
max_row = row;
}
}
if max_val < 1e-300 {
return Err(OptimizeError::ComputationError(
"solve_dense_linear: singular or near-singular matrix".to_string(),
));
}
if max_row != col {
for k in 0..n {
mat.swap(col * n + k, max_row * n + k);
}
rhs.swap(col, max_row);
}
let pivot = mat[col * n + col];
for row in (col + 1)..n {
let factor = mat[row * n + col] / pivot;
for k in col..n {
let v = mat[col * n + k];
mat[row * n + k] -= factor * v;
}
rhs[row] -= factor * rhs[col];
}
}
let mut x = vec![0.0_f64; n];
for ii in 0..n {
let i = n - 1 - ii;
let mut sum = rhs[i];
for j in (i + 1)..n {
sum -= mat[i * n + j] * x[j];
}
x[i] = sum / mat[i * n + i];
}
Ok(Array1::from(x))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gaussian_projection_shape() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 100,
embedding_dim: 20,
embedding_type: EmbeddingType::GaussianRandom,
seed: 42,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
let x = Array1::zeros(100);
let y = emb.project(&x).expect("project must succeed");
assert_eq!(y.len(), 20, "projected dim should be embedding_dim");
}
#[test]
fn test_sparse_projection_shape() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 50,
embedding_dim: 10,
embedding_type: EmbeddingType::SparseRandom,
seed: 7,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
assert_eq!(emb.embedding_dim(), 10);
assert_eq!(emb.original_dim(), 50);
}
#[test]
fn test_projection_dimension_check() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 20,
embedding_dim: 5,
embedding_type: EmbeddingType::GaussianRandom,
seed: 1,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
let x_wrong = Array1::zeros(15); let res = emb.project(&x_wrong);
assert!(res.is_err(), "project with wrong dimension must fail");
}
#[test]
fn test_jl_epsilon_reasonable() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 1000,
embedding_dim: 200,
embedding_type: EmbeddingType::GaussianRandom,
seed: 0,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
let eps = emb.jl_epsilon(1000, 0.01);
assert!(eps > 0.0, "epsilon must be positive");
assert!(
eps < 1.0,
"epsilon={eps} should be < 1.0 for reasonable parameters"
);
}
#[test]
fn test_reconstruct_approx() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 10,
embedding_dim: 10,
embedding_type: EmbeddingType::GaussianRandom,
seed: 123,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
let x: Array1<f64> = Array1::from_iter((0..10).map(|i| i as f64));
let y = emb.project(&x).expect("project must succeed");
assert_eq!(y.len(), 10, "projected length");
let x_rec = emb.reconstruct(&y).expect("reconstruct must succeed");
assert_eq!(
x_rec.len(),
10,
"reconstructed length must equal original_dim"
);
}
#[test]
fn test_sketched_least_squares() {
let m = 100_usize;
let n = 10_usize;
let mut a_data = vec![0.0_f64; m * n];
for i in 0..m {
for j in 0..n {
a_data[i * n + j] = ((i as f64) * (j as f64 + 1.0) * 0.1).cos();
}
}
let a = Array2::from_shape_vec((m, n), a_data).expect("shape");
let x_true: Array1<f64> = Array1::from_iter((1..=n).map(|i| i as f64));
let b = a.dot(&x_true);
let x_sol = sketched_least_squares(&a, &b, 4 * n, 999)
.expect("sketched_least_squares must succeed");
assert_eq!(x_sol.len(), n);
let residual = a.dot(&x_sol) - &b;
let rel_err = residual.dot(&residual).sqrt() / (b.dot(&b).sqrt() + 1e-30);
assert!(
rel_err < 0.1,
"relative residual {rel_err} too large; sketched LS should be accurate"
);
}
#[test]
fn test_project_matrix_shape() {
let cfg = SubspaceEmbeddingConfig {
original_dim: 30,
embedding_dim: 8,
embedding_type: EmbeddingType::SparseRandom,
seed: 55,
};
let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
let a = Array2::ones((30, 5));
let sa = emb.project_matrix(&a).expect("project_matrix must succeed");
assert_eq!(sa.nrows(), 8, "SA should have embedding_dim rows");
assert_eq!(sa.ncols(), 5, "SA should preserve column count");
}
}