1use crate::error::{OptimizeError, OptimizeResult};
22use scirs2_core::ndarray::{Array1, Array2};
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum EmbeddingType {
31 GaussianRandom,
33 SparseRandom,
36 FastJohnsonLindenstrauss,
40}
41
42#[derive(Debug, Clone)]
44pub struct SubspaceEmbeddingConfig {
45 pub original_dim: usize,
47 pub embedding_dim: usize,
49 pub embedding_type: EmbeddingType,
51 pub seed: u64,
53}
54
55pub struct SubspaceEmbedding {
61 config: SubspaceEmbeddingConfig,
62 matrix: Array2<f64>,
64}
65
66impl SubspaceEmbedding {
67 pub fn new(config: SubspaceEmbeddingConfig) -> OptimizeResult<Self> {
73 if config.original_dim == 0 || config.embedding_dim == 0 {
74 return Err(OptimizeError::InvalidInput(
75 "SubspaceEmbedding: dimensions must be non-zero".to_string(),
76 ));
77 }
78 let matrix = match config.embedding_type {
79 EmbeddingType::GaussianRandom => {
80 gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
81 }
82 EmbeddingType::SparseRandom => {
83 sparse_projection(config.embedding_dim, config.original_dim, config.seed)
84 }
85 EmbeddingType::FastJohnsonLindenstrauss => {
86 gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
89 }
90 };
91 Ok(Self { config, matrix })
92 }
93
94 pub fn project(&self, x: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
100 if x.len() != self.config.original_dim {
101 return Err(OptimizeError::ValueError(format!(
102 "project: expected dim {}, got {}",
103 self.config.original_dim,
104 x.len()
105 )));
106 }
107 Ok(self.matrix.dot(x))
108 }
109
110 pub fn reconstruct(&self, y: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
116 if y.len() != self.config.embedding_dim {
117 return Err(OptimizeError::ValueError(format!(
118 "reconstruct: expected dim {}, got {}",
119 self.config.embedding_dim,
120 y.len()
121 )));
122 }
123 Ok(self.matrix.t().dot(y))
124 }
125
126 pub fn project_matrix(&self, a: &Array2<f64>) -> OptimizeResult<Array2<f64>> {
135 if a.nrows() != self.config.original_dim {
136 return Err(OptimizeError::ValueError(format!(
137 "project_matrix: expected {} rows, got {}",
138 self.config.original_dim,
139 a.nrows()
140 )));
141 }
142 Ok(self.matrix.dot(a))
143 }
144
145 pub fn embedding_dim(&self) -> usize {
147 self.config.embedding_dim
148 }
149
150 pub fn original_dim(&self) -> usize {
152 self.config.original_dim
153 }
154
155 pub fn jl_epsilon(&self, n_points: usize, delta: f64) -> f64 {
163 let k = self.config.embedding_dim as f64;
164 (2.0 * (n_points as f64 / delta).ln() / k).sqrt()
165 }
166}
167
168fn gaussian_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
176 let scale = (k as f64).recip().sqrt();
177 let mut state = seed.wrapping_add(1); let data: Vec<f64> = (0..k * n)
179 .map(|_| {
180 state = state
182 .wrapping_mul(6_364_136_223_846_793_005)
183 .wrapping_add(1_442_695_040_888_963_407);
184 let u1 = ((state >> 11) as f64) / ((1u64 << 53) as f64) + 1e-300;
185 state = state
187 .wrapping_mul(6_364_136_223_846_793_005)
188 .wrapping_add(1_442_695_040_888_963_407);
189 let u2 = ((state >> 11) as f64) / ((1u64 << 53) as f64);
190 (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos() * scale
192 })
193 .collect();
194
195 Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
196}
197
198fn sparse_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
201 let scale = (3.0_f64 / k as f64).sqrt();
202 let mut state = seed.wrapping_add(1);
203 let data: Vec<f64> = (0..k * n)
204 .map(|_| {
205 state = state
206 .wrapping_mul(6_364_136_223_846_793_005)
207 .wrapping_add(1_442_695_040_888_963_407);
208 let r = (state >> 32) % 6;
210 if r == 0 {
211 scale
212 } else if r == 1 {
213 -scale
214 } else {
215 0.0
216 }
217 })
218 .collect();
219
220 Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
221}
222
223pub fn sketched_least_squares(
245 a: &Array2<f64>,
246 b: &Array1<f64>,
247 k: usize,
248 seed: u64,
249) -> OptimizeResult<Array1<f64>> {
250 let (m, n) = (a.nrows(), a.ncols());
251 if m == 0 || n == 0 {
252 return Err(OptimizeError::InvalidInput(
253 "sketched_least_squares: matrix must be non-empty".to_string(),
254 ));
255 }
256 if b.len() != m {
257 return Err(OptimizeError::ValueError(format!(
258 "sketched_least_squares: b length {} does not match matrix rows {m}",
259 b.len()
260 )));
261 }
262 if k == 0 {
263 return Err(OptimizeError::InvalidInput(
264 "sketched_least_squares: sketch dimension k must be > 0".to_string(),
265 ));
266 }
267
268 let config = SubspaceEmbeddingConfig {
269 original_dim: m,
270 embedding_dim: k,
271 embedding_type: EmbeddingType::GaussianRandom,
272 seed,
273 };
274 let emb = SubspaceEmbedding::new(config)?;
275
276 let sa = emb.project_matrix(a)?;
278 let sb = emb.matrix.dot(b);
279
280 let sat = sa.t().to_owned(); let satsa = sat.dot(&sa); let satsb = sat.dot(&sb); solve_dense_linear(&satsa, &satsb)
286}
287
288fn solve_dense_linear(a: &Array2<f64>, b: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
296 let n = a.nrows();
297 if n == 0 {
298 return Ok(Array1::zeros(0));
299 }
300
301 let mut mat: Vec<f64> = a.iter().cloned().collect();
303 let mut rhs: Vec<f64> = b.iter().cloned().collect();
304
305 for col in 0..n {
306 let mut max_row = col;
308 let mut max_val = mat[col * n + col].abs();
309 for row in (col + 1)..n {
310 let v = mat[row * n + col].abs();
311 if v > max_val {
312 max_val = v;
313 max_row = row;
314 }
315 }
316 if max_val < 1e-300 {
317 return Err(OptimizeError::ComputationError(
318 "solve_dense_linear: singular or near-singular matrix".to_string(),
319 ));
320 }
321 if max_row != col {
323 for k in 0..n {
324 mat.swap(col * n + k, max_row * n + k);
325 }
326 rhs.swap(col, max_row);
327 }
328 let pivot = mat[col * n + col];
330 for row in (col + 1)..n {
331 let factor = mat[row * n + col] / pivot;
332 for k in col..n {
333 let v = mat[col * n + k];
334 mat[row * n + k] -= factor * v;
335 }
336 rhs[row] -= factor * rhs[col];
337 }
338 }
339
340 let mut x = vec![0.0_f64; n];
342 for ii in 0..n {
343 let i = n - 1 - ii;
344 let mut sum = rhs[i];
345 for j in (i + 1)..n {
346 sum -= mat[i * n + j] * x[j];
347 }
348 x[i] = sum / mat[i * n + i];
349 }
350
351 Ok(Array1::from(x))
352}
353
354#[cfg(test)]
359mod tests {
360 use super::*;
361
362 #[test]
363 fn test_gaussian_projection_shape() {
364 let cfg = SubspaceEmbeddingConfig {
365 original_dim: 100,
366 embedding_dim: 20,
367 embedding_type: EmbeddingType::GaussianRandom,
368 seed: 42,
369 };
370 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
371 let x = Array1::zeros(100);
372 let y = emb.project(&x).expect("project must succeed");
373 assert_eq!(y.len(), 20, "projected dim should be embedding_dim");
374 }
375
376 #[test]
377 fn test_sparse_projection_shape() {
378 let cfg = SubspaceEmbeddingConfig {
379 original_dim: 50,
380 embedding_dim: 10,
381 embedding_type: EmbeddingType::SparseRandom,
382 seed: 7,
383 };
384 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
385 assert_eq!(emb.embedding_dim(), 10);
386 assert_eq!(emb.original_dim(), 50);
387 }
388
389 #[test]
390 fn test_projection_dimension_check() {
391 let cfg = SubspaceEmbeddingConfig {
392 original_dim: 20,
393 embedding_dim: 5,
394 embedding_type: EmbeddingType::GaussianRandom,
395 seed: 1,
396 };
397 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
398 let x_wrong = Array1::zeros(15); let res = emb.project(&x_wrong);
401 assert!(res.is_err(), "project with wrong dimension must fail");
402 }
403
404 #[test]
405 fn test_jl_epsilon_reasonable() {
406 let cfg = SubspaceEmbeddingConfig {
407 original_dim: 1000,
408 embedding_dim: 200,
409 embedding_type: EmbeddingType::GaussianRandom,
410 seed: 0,
411 };
412 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
413 let eps = emb.jl_epsilon(1000, 0.01);
415 assert!(eps > 0.0, "epsilon must be positive");
416 assert!(
417 eps < 1.0,
418 "epsilon={eps} should be < 1.0 for reasonable parameters"
419 );
420 }
421
422 #[test]
423 fn test_reconstruct_approx() {
424 let cfg = SubspaceEmbeddingConfig {
427 original_dim: 10,
428 embedding_dim: 10,
429 embedding_type: EmbeddingType::GaussianRandom,
430 seed: 123,
431 };
432 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
433 let x: Array1<f64> = Array1::from_iter((0..10).map(|i| i as f64));
434 let y = emb.project(&x).expect("project must succeed");
435 assert_eq!(y.len(), 10, "projected length");
436 let x_rec = emb.reconstruct(&y).expect("reconstruct must succeed");
437 assert_eq!(
438 x_rec.len(),
439 10,
440 "reconstructed length must equal original_dim"
441 );
442 }
443
444 #[test]
445 fn test_sketched_least_squares() {
446 let m = 100_usize;
448 let n = 10_usize;
449
450 let mut a_data = vec![0.0_f64; m * n];
452 for i in 0..m {
453 for j in 0..n {
454 a_data[i * n + j] = ((i as f64) * (j as f64 + 1.0) * 0.1).cos();
455 }
456 }
457 let a = Array2::from_shape_vec((m, n), a_data).expect("shape");
458 let x_true: Array1<f64> = Array1::from_iter((1..=n).map(|i| i as f64));
459 let b = a.dot(&x_true);
460
461 let x_sol = sketched_least_squares(&a, &b, 4 * n, 999)
463 .expect("sketched_least_squares must succeed");
464
465 assert_eq!(x_sol.len(), n);
466 let residual = a.dot(&x_sol) - &b;
469 let rel_err = residual.dot(&residual).sqrt() / (b.dot(&b).sqrt() + 1e-30);
470 assert!(
471 rel_err < 0.1,
472 "relative residual {rel_err} too large; sketched LS should be accurate"
473 );
474 }
475
476 #[test]
477 fn test_project_matrix_shape() {
478 let cfg = SubspaceEmbeddingConfig {
479 original_dim: 30,
480 embedding_dim: 8,
481 embedding_type: EmbeddingType::SparseRandom,
482 seed: 55,
483 };
484 let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
485 let a = Array2::ones((30, 5));
486 let sa = emb.project_matrix(&a).expect("project_matrix must succeed");
487 assert_eq!(sa.nrows(), 8, "SA should have embedding_dim rows");
488 assert_eq!(sa.ncols(), 5, "SA should preserve column count");
489 }
490}