Skip to main content

scirs2_optimize/
subspace_embedding.rs

1//! Subspace embedding methods for dimensionality-reduced optimization.
2//!
3//! Uses random projections (Johnson-Lindenstrauss lemma) to reduce the dimension
4//! of large optimization problems.  Three projection families are supported:
5//!
6//! - **Gaussian** random projection (`i.i.d. N(0, 1/k)` entries).
7//! - **Sparse** random projection (Achlioptas 2003: entries ±√(3/k) with probability
8//!   1/6 each, 0 with probability 2/3).
9//! - **Fast JL** (placeholder backed by the Gaussian construction; a full SRHT
10//!   implementation can be substituted without changing the public API).
11//!
12//! The module also exposes [`sketched_least_squares`], a one-shot sketching solver for
13//! overdetermined systems `min ||Ax - b||²`.
14//!
15//! # References
16//!
17//! - Johnson, W.B. & Lindenstrauss, J. (1984). Extensions of Lipschitz mappings.
18//! - Achlioptas, D. (2003). Database-friendly random projections. JCSS 66(4).
19//! - Mahoney, M.W. (2011). Randomized algorithms for matrices and data. Found. Trends.
20
21use crate::error::{OptimizeError, OptimizeResult};
22use scirs2_core::ndarray::{Array1, Array2};
23
24// ---------------------------------------------------------------------------
25// Public types
26// ---------------------------------------------------------------------------
27
28/// Random projection embedding methods.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum EmbeddingType {
31    /// Gaussian random projection: entries i.i.d. N(0, 1/k).
32    GaussianRandom,
33    /// Sparse random projection (Achlioptas 2003):
34    /// entries ±√(3/k) with probability 1/6 each, 0 with probability 2/3.
35    SparseRandom,
36    /// Fast Johnson-Lindenstrauss transform (SRHT approximation).
37    /// Currently implemented as a Gaussian projection; a structured Hadamard
38    /// construction can replace this without breaking the API.
39    FastJohnsonLindenstrauss,
40}
41
42/// Configuration for a subspace embedding.
43#[derive(Debug, Clone)]
44pub struct SubspaceEmbeddingConfig {
45    /// Dimension of the original space (number of input coordinates).
46    pub original_dim: usize,
47    /// Dimension of the embedding / sketch space (`k`).
48    pub embedding_dim: usize,
49    /// Which projection family to use.
50    pub embedding_type: EmbeddingType,
51    /// RNG seed for reproducibility.
52    pub seed: u64,
53}
54
55// ---------------------------------------------------------------------------
56// Core embedding struct
57// ---------------------------------------------------------------------------
58
59/// A random subspace embedding `S: R^n → R^k` (`k × n` projection matrix).
60pub struct SubspaceEmbedding {
61    config: SubspaceEmbeddingConfig,
62    /// The `k × n` projection matrix.
63    matrix: Array2<f64>,
64}
65
66impl SubspaceEmbedding {
67    /// Construct a new embedding according to `config`.
68    ///
69    /// # Errors
70    ///
71    /// Returns [`OptimizeError::InvalidInput`] if either dimension is zero.
72    pub fn new(config: SubspaceEmbeddingConfig) -> OptimizeResult<Self> {
73        if config.original_dim == 0 || config.embedding_dim == 0 {
74            return Err(OptimizeError::InvalidInput(
75                "SubspaceEmbedding: dimensions must be non-zero".to_string(),
76            ));
77        }
78        let matrix = match config.embedding_type {
79            EmbeddingType::GaussianRandom => {
80                gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
81            }
82            EmbeddingType::SparseRandom => {
83                sparse_projection(config.embedding_dim, config.original_dim, config.seed)
84            }
85            EmbeddingType::FastJohnsonLindenstrauss => {
86                // Full SRHT requires a fast Walsh-Hadamard transform; we fall back to
87                // the Gaussian family which gives the same asymptotic JL guarantees.
88                gaussian_projection(config.embedding_dim, config.original_dim, config.seed)
89            }
90        };
91        Ok(Self { config, matrix })
92    }
93
94    /// Project a vector `x ∈ R^n → y = Sx ∈ R^k`.
95    ///
96    /// # Errors
97    ///
98    /// Returns [`OptimizeError::ValueError`] if `x.len() != original_dim`.
99    pub fn project(&self, x: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
100        if x.len() != self.config.original_dim {
101            return Err(OptimizeError::ValueError(format!(
102                "project: expected dim {}, got {}",
103                self.config.original_dim,
104                x.len()
105            )));
106        }
107        Ok(self.matrix.dot(x))
108    }
109
110    /// Approximate reconstruction `x ≈ S^T y` (pseudo-inverse via transpose).
111    ///
112    /// # Errors
113    ///
114    /// Returns [`OptimizeError::ValueError`] if `y.len() != embedding_dim`.
115    pub fn reconstruct(&self, y: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
116        if y.len() != self.config.embedding_dim {
117            return Err(OptimizeError::ValueError(format!(
118                "reconstruct: expected dim {}, got {}",
119                self.config.embedding_dim,
120                y.len()
121            )));
122        }
123        Ok(self.matrix.t().dot(y))
124    }
125
126    /// Project a matrix `A ∈ R^{n×p} → SA ∈ R^{k×p}`.
127    ///
128    /// The embedding matrix `S` is `k × n`, so `A` must have `n` rows
129    /// (i.e., `a.nrows() == original_dim`).
130    ///
131    /// # Errors
132    ///
133    /// Returns [`OptimizeError::ValueError`] if `a.nrows() != original_dim`.
134    pub fn project_matrix(&self, a: &Array2<f64>) -> OptimizeResult<Array2<f64>> {
135        if a.nrows() != self.config.original_dim {
136            return Err(OptimizeError::ValueError(format!(
137                "project_matrix: expected {} rows, got {}",
138                self.config.original_dim,
139                a.nrows()
140            )));
141        }
142        Ok(self.matrix.dot(a))
143    }
144
145    /// Embedding (sketch) dimension `k`.
146    pub fn embedding_dim(&self) -> usize {
147        self.config.embedding_dim
148    }
149
150    /// Original dimension `n`.
151    pub fn original_dim(&self) -> usize {
152        self.config.original_dim
153    }
154
155    /// Johnson-Lindenstrauss ε-distortion bound.
156    ///
157    /// For `n_points` points and failure probability `delta`,
158    /// the embedding preserves all pairwise distances within factor `(1 ± ε)`
159    /// with probability at least `1 − delta`.
160    ///
161    /// Formula: `ε = sqrt(2 * ln(n / δ) / k)`.
162    pub fn jl_epsilon(&self, n_points: usize, delta: f64) -> f64 {
163        let k = self.config.embedding_dim as f64;
164        (2.0 * (n_points as f64 / delta).ln() / k).sqrt()
165    }
166}
167
168// ---------------------------------------------------------------------------
169// Internal projection factories
170// ---------------------------------------------------------------------------
171
172/// Build a `k × n` Gaussian projection matrix.
173/// Entries are i.i.d. `N(0, 1/k)` generated via Box-Muller transform
174/// with a minimal LCG for zero external dependencies.
175fn gaussian_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
176    let scale = (k as f64).recip().sqrt();
177    let mut state = seed.wrapping_add(1); // ensure non-zero seed
178    let data: Vec<f64> = (0..k * n)
179        .map(|_| {
180            // LCG step 1
181            state = state
182                .wrapping_mul(6_364_136_223_846_793_005)
183                .wrapping_add(1_442_695_040_888_963_407);
184            let u1 = ((state >> 11) as f64) / ((1u64 << 53) as f64) + 1e-300;
185            // LCG step 2
186            state = state
187                .wrapping_mul(6_364_136_223_846_793_005)
188                .wrapping_add(1_442_695_040_888_963_407);
189            let u2 = ((state >> 11) as f64) / ((1u64 << 53) as f64);
190            // Box-Muller
191            (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos() * scale
192        })
193        .collect();
194
195    Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
196}
197
198/// Build a `k × n` sparse Achlioptas projection matrix.
199/// Each entry is `+√(3/k)` with prob 1/6, `−√(3/k)` with prob 1/6, 0 with prob 2/3.
200fn sparse_projection(k: usize, n: usize, seed: u64) -> Array2<f64> {
201    let scale = (3.0_f64 / k as f64).sqrt();
202    let mut state = seed.wrapping_add(1);
203    let data: Vec<f64> = (0..k * n)
204        .map(|_| {
205            state = state
206                .wrapping_mul(6_364_136_223_846_793_005)
207                .wrapping_add(1_442_695_040_888_963_407);
208            // Use top 32 bits for the decision.
209            let r = (state >> 32) % 6;
210            if r == 0 {
211                scale
212            } else if r == 1 {
213                -scale
214            } else {
215                0.0
216            }
217        })
218        .collect();
219
220    Array2::from_shape_vec((k, n), data).unwrap_or_else(|_| Array2::zeros((k, n)))
221}
222
223// ---------------------------------------------------------------------------
224// Sketched least squares
225// ---------------------------------------------------------------------------
226
227/// Sketched least-squares solver for overdetermined systems `min ||Ax − b||²`.
228///
229/// Constructs a Gaussian sketch `S` of dimension `k × m`, then solves the
230/// reduced normal equations `(SA)ᵀ(SA) x = (SA)ᵀ(Sb)` via Gaussian elimination
231/// with partial pivoting.
232///
233/// # Arguments
234///
235/// * `a` — `m × n` system matrix (m ≥ n for an overdetermined system).
236/// * `b` — right-hand side of length `m`.
237/// * `k` — sketch dimension (typically `k ≈ 4n` for good accuracy).
238/// * `seed` — RNG seed for reproducibility.
239///
240/// # Errors
241///
242/// Returns [`OptimizeError::InvalidInput`] for empty matrices or dimension mismatches,
243/// and [`OptimizeError::ComputationError`] for a singular sketched normal matrix.
244pub fn sketched_least_squares(
245    a: &Array2<f64>,
246    b: &Array1<f64>,
247    k: usize,
248    seed: u64,
249) -> OptimizeResult<Array1<f64>> {
250    let (m, n) = (a.nrows(), a.ncols());
251    if m == 0 || n == 0 {
252        return Err(OptimizeError::InvalidInput(
253            "sketched_least_squares: matrix must be non-empty".to_string(),
254        ));
255    }
256    if b.len() != m {
257        return Err(OptimizeError::ValueError(format!(
258            "sketched_least_squares: b length {} does not match matrix rows {m}",
259            b.len()
260        )));
261    }
262    if k == 0 {
263        return Err(OptimizeError::InvalidInput(
264            "sketched_least_squares: sketch dimension k must be > 0".to_string(),
265        ));
266    }
267
268    let config = SubspaceEmbeddingConfig {
269        original_dim: m,
270        embedding_dim: k,
271        embedding_type: EmbeddingType::GaussianRandom,
272        seed,
273    };
274    let emb = SubspaceEmbedding::new(config)?;
275
276    // SA  (k × n)  and  Sb  (k)
277    let sa = emb.project_matrix(a)?;
278    let sb = emb.matrix.dot(b);
279
280    // Normal equations: (SA)^T (SA) x = (SA)^T Sb
281    let sat = sa.t().to_owned(); // n × k
282    let satsa = sat.dot(&sa); // n × n
283    let satsb = sat.dot(&sb); // n
284
285    solve_dense_linear(&satsa, &satsb)
286}
287
288/// Solve `A x = b` with Gaussian elimination and partial pivoting.
289///
290/// `A` is `n × n` (row-major), `b` is length `n`.
291///
292/// # Errors
293///
294/// Returns [`OptimizeError::ComputationError`] if the matrix is singular.
295fn solve_dense_linear(a: &Array2<f64>, b: &Array1<f64>) -> OptimizeResult<Array1<f64>> {
296    let n = a.nrows();
297    if n == 0 {
298        return Ok(Array1::zeros(0));
299    }
300
301    // Build augmented system [A | b] stored as flat row-major vecs.
302    let mut mat: Vec<f64> = a.iter().cloned().collect();
303    let mut rhs: Vec<f64> = b.iter().cloned().collect();
304
305    for col in 0..n {
306        // Partial pivoting: find the row with the largest absolute value in column `col`.
307        let mut max_row = col;
308        let mut max_val = mat[col * n + col].abs();
309        for row in (col + 1)..n {
310            let v = mat[row * n + col].abs();
311            if v > max_val {
312                max_val = v;
313                max_row = row;
314            }
315        }
316        if max_val < 1e-300 {
317            return Err(OptimizeError::ComputationError(
318                "solve_dense_linear: singular or near-singular matrix".to_string(),
319            ));
320        }
321        // Swap rows.
322        if max_row != col {
323            for k in 0..n {
324                mat.swap(col * n + k, max_row * n + k);
325            }
326            rhs.swap(col, max_row);
327        }
328        // Eliminate below.
329        let pivot = mat[col * n + col];
330        for row in (col + 1)..n {
331            let factor = mat[row * n + col] / pivot;
332            for k in col..n {
333                let v = mat[col * n + k];
334                mat[row * n + k] -= factor * v;
335            }
336            rhs[row] -= factor * rhs[col];
337        }
338    }
339
340    // Back-substitution.
341    let mut x = vec![0.0_f64; n];
342    for ii in 0..n {
343        let i = n - 1 - ii;
344        let mut sum = rhs[i];
345        for j in (i + 1)..n {
346            sum -= mat[i * n + j] * x[j];
347        }
348        x[i] = sum / mat[i * n + i];
349    }
350
351    Ok(Array1::from(x))
352}
353
354// ---------------------------------------------------------------------------
355// Tests
356// ---------------------------------------------------------------------------
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn test_gaussian_projection_shape() {
364        let cfg = SubspaceEmbeddingConfig {
365            original_dim: 100,
366            embedding_dim: 20,
367            embedding_type: EmbeddingType::GaussianRandom,
368            seed: 42,
369        };
370        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
371        let x = Array1::zeros(100);
372        let y = emb.project(&x).expect("project must succeed");
373        assert_eq!(y.len(), 20, "projected dim should be embedding_dim");
374    }
375
376    #[test]
377    fn test_sparse_projection_shape() {
378        let cfg = SubspaceEmbeddingConfig {
379            original_dim: 50,
380            embedding_dim: 10,
381            embedding_type: EmbeddingType::SparseRandom,
382            seed: 7,
383        };
384        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
385        assert_eq!(emb.embedding_dim(), 10);
386        assert_eq!(emb.original_dim(), 50);
387    }
388
389    #[test]
390    fn test_projection_dimension_check() {
391        let cfg = SubspaceEmbeddingConfig {
392            original_dim: 20,
393            embedding_dim: 5,
394            embedding_type: EmbeddingType::GaussianRandom,
395            seed: 1,
396        };
397        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
398        // Wrong input length should return an error.
399        let x_wrong = Array1::zeros(15); // should be 20
400        let res = emb.project(&x_wrong);
401        assert!(res.is_err(), "project with wrong dimension must fail");
402    }
403
404    #[test]
405    fn test_jl_epsilon_reasonable() {
406        let cfg = SubspaceEmbeddingConfig {
407            original_dim: 1000,
408            embedding_dim: 200,
409            embedding_type: EmbeddingType::GaussianRandom,
410            seed: 0,
411        };
412        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
413        // With k=200, 1000 points, delta=0.01 the JL bound should be well below 1.
414        let eps = emb.jl_epsilon(1000, 0.01);
415        assert!(eps > 0.0, "epsilon must be positive");
416        assert!(
417            eps < 1.0,
418            "epsilon={eps} should be < 1.0 for reasonable parameters"
419        );
420    }
421
422    #[test]
423    fn test_reconstruct_approx() {
424        // For a Gaussian embedding, project then reconstruct via S^T S x.
425        // The reconstruction S^T (S x) should approximate x in expectation.
426        let cfg = SubspaceEmbeddingConfig {
427            original_dim: 10,
428            embedding_dim: 10,
429            embedding_type: EmbeddingType::GaussianRandom,
430            seed: 123,
431        };
432        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
433        let x: Array1<f64> = Array1::from_iter((0..10).map(|i| i as f64));
434        let y = emb.project(&x).expect("project must succeed");
435        assert_eq!(y.len(), 10, "projected length");
436        let x_rec = emb.reconstruct(&y).expect("reconstruct must succeed");
437        assert_eq!(
438            x_rec.len(),
439            10,
440            "reconstructed length must equal original_dim"
441        );
442    }
443
444    #[test]
445    fn test_sketched_least_squares() {
446        // Overdetermined system: 100 × 10, true solution x_true = [1..10].
447        let m = 100_usize;
448        let n = 10_usize;
449
450        // Build a well-conditioned A: row i has A[i,j] = cos(i * (j+1) * 0.1).
451        let mut a_data = vec![0.0_f64; m * n];
452        for i in 0..m {
453            for j in 0..n {
454                a_data[i * n + j] = ((i as f64) * (j as f64 + 1.0) * 0.1).cos();
455            }
456        }
457        let a = Array2::from_shape_vec((m, n), a_data).expect("shape");
458        let x_true: Array1<f64> = Array1::from_iter((1..=n).map(|i| i as f64));
459        let b = a.dot(&x_true);
460
461        // Sketch with k = 4*n to get good accuracy.
462        let x_sol = sketched_least_squares(&a, &b, 4 * n, 999)
463            .expect("sketched_least_squares must succeed");
464
465        assert_eq!(x_sol.len(), n);
466        // The sketched solution should approximate the true solution.
467        // With k=40 and n=10 the residual ||Ax - b||/||b|| should be small.
468        let residual = a.dot(&x_sol) - &b;
469        let rel_err = residual.dot(&residual).sqrt() / (b.dot(&b).sqrt() + 1e-30);
470        assert!(
471            rel_err < 0.1,
472            "relative residual {rel_err} too large; sketched LS should be accurate"
473        );
474    }
475
476    #[test]
477    fn test_project_matrix_shape() {
478        let cfg = SubspaceEmbeddingConfig {
479            original_dim: 30,
480            embedding_dim: 8,
481            embedding_type: EmbeddingType::SparseRandom,
482            seed: 55,
483        };
484        let emb = SubspaceEmbedding::new(cfg).expect("construction must succeed");
485        let a = Array2::ones((30, 5));
486        let sa = emb.project_matrix(&a).expect("project_matrix must succeed");
487        assert_eq!(sa.nrows(), 8, "SA should have embedding_dim rows");
488        assert_eq!(sa.ncols(), 5, "SA should preserve column count");
489    }
490}