Skip to main content

scirs2_optimize/subspace_embed/
mod.rs

1//! Subspace Embedding Optimization
2//!
3//! Reduces the dimensionality of a large-scale optimization problem by projecting
4//! the search space into a random low-dimensional subspace and optimising there.
5//!
6//! ## Algorithm sketch
7//!
8//! For each restart:
9//! 1. Sample a random projection matrix P of shape `(subspace_dim × full_dim)`.
10//! 2. Optimize the surrogate g(y) = f(P^T y) in the subspace via coordinate descent.
11//! 3. Recover the full-dimensional point x = P^T y.
12//! 4. Clip to bounds if provided.
13//!
14//! The method is particularly effective when the objective has a low intrinsic
15//! dimensionality (e.g., the effective subspace spanned by the gradient is small).
16//!
17//! ## References
18//!
19//! - Wang, Z. et al. (2016). "Bayesian Optimization in a Billion Dimensions via
20//!   Random Embeddings". JAIR 55.
21//! - Choromanski, K. et al. (2022). "Geometry-Aware Structured Transformations".
22
23use crate::error::{OptimizeError, OptimizeResult};
24
25// ──────────────────────────────────────────────────────────────── SketchType ──
26
27/// Type of random projection matrix to use for subspace embedding.
28#[non_exhaustive]
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub enum SketchType {
31    /// Dense Gaussian sketch: `P[i][j]` ~ N(0, 1/sub_dim).
32    Gaussian,
33    /// Rademacher sketch: `P[i][j]` = +/-1/sqrt(sub_dim) with equal probability.
34    Rademacher,
35    /// Count sketch: each full-dim coordinate maps to one sub-dim bucket with ±1 sign.
36    CountSketch,
37    /// Subsampled Randomized Hadamard Transform (approximate).
38    SRHT,
39}
40
41impl Default for SketchType {
42    fn default() -> Self {
43        SketchType::Gaussian
44    }
45}
46
47// ───────────────────────────────────────────────────────────── SubspaceConfig ──
48
49/// Configuration for the subspace embedding optimizer.
50#[derive(Debug, Clone)]
51pub struct SubspaceConfig {
52    /// Dimensionality of the random subspace.
53    pub subspace_dim: usize,
54    /// Number of independent restarts.
55    pub n_restarts: usize,
56    /// Number of coordinate-descent iterations per restart.
57    pub n_local_iter: usize,
58    /// RNG seed for reproducibility.
59    pub seed: u64,
60    /// Type of random sketch to use.
61    pub sketch_type: SketchType,
62    /// Finite-difference step size for gradient estimation.
63    pub fd_step: f64,
64    /// Step size (learning rate) for coordinate descent updates.
65    pub step_size: f64,
66}
67
68impl Default for SubspaceConfig {
69    fn default() -> Self {
70        Self {
71            subspace_dim: 100,
72            n_restarts: 5,
73            n_local_iter: 50,
74            seed: 42,
75            sketch_type: SketchType::Gaussian,
76            fd_step: 1e-5,
77            step_size: 0.1,
78        }
79    }
80}
81
82// ──────────────────────────────────────────────────────────── LCG utilities ──
83
84/// Advance a Knuth-style LCG and return the next state.
85#[inline]
86fn lcg_next(state: u64) -> u64 {
87    state
88        .wrapping_mul(6_364_136_223_846_793_005)
89        .wrapping_add(1_442_695_040_888_963_407)
90}
91
92/// Map an LCG state to a uniform f64 in `[0, 1)`.
93#[inline]
94fn lcg_to_f64(state: u64) -> f64 {
95    (state >> 11) as f64 / (1u64 << 53) as f64
96}
97
98/// Draw a standard-normal sample using the Box-Muller transform.
99///
100/// Returns (z0, z1) and the next RNG state after consuming two uniform draws.
101fn box_muller(state: u64) -> (f64, f64, u64) {
102    use std::f64::consts::PI;
103    let s1 = lcg_next(state);
104    let s2 = lcg_next(s1);
105    let u1 = lcg_to_f64(s1).max(1e-300); // avoid log(0)
106    let u2 = lcg_to_f64(s2);
107    let r = (-2.0 * u1.ln()).sqrt();
108    let theta = 2.0 * PI * u2;
109    (r * theta.cos(), r * theta.sin(), s2)
110}
111
112// ────────────────────────────────────────────────────── Sketch generators ──
113
114/// Generate a Rademacher sketch matrix of shape `(sub_dim × full_dim)`.
115///
116/// `P[i][j]` = +/-1/sqrt(sub_dim) with equal probability (seeded LCG).
117pub fn rademacher_sketch(full_dim: usize, sub_dim: usize, seed: u64) -> Vec<Vec<f64>> {
118    let scale = if sub_dim > 0 {
119        1.0 / (sub_dim as f64).sqrt()
120    } else {
121        1.0
122    };
123    let mut state = seed;
124    let mut p = vec![vec![0.0_f64; full_dim]; sub_dim];
125    for row in p.iter_mut() {
126        for val in row.iter_mut() {
127            state = lcg_next(state);
128            *val = if state & 1 == 0 { scale } else { -scale };
129        }
130    }
131    p
132}
133
134/// Generate a count-sketch matrix of shape `(sub_dim × full_dim)`.
135///
136/// Each full-dim coordinate maps to exactly one sub-dim bucket with a random ±1 sign.
137pub fn count_sketch(full_dim: usize, sub_dim: usize, seed: u64) -> Vec<Vec<f64>> {
138    let mut p = vec![vec![0.0_f64; full_dim]; sub_dim.max(1)];
139    let mut state = seed;
140    let s = sub_dim.max(1);
141    for j in 0..full_dim {
142        state = lcg_next(state);
143        let bucket = (state as usize) % s;
144        state = lcg_next(state);
145        let sign: f64 = if state & 1 == 0 { 1.0 } else { -1.0 };
146        p[bucket][j] = sign;
147    }
148    p
149}
150
151/// Generate a Gaussian sketch matrix of shape `(sub_dim × full_dim)`.
152///
153/// `P[i][j]` ~ N(0, 1/sub_dim) (Box-Muller, seeded LCG).
154pub fn gaussian_sketch(full_dim: usize, sub_dim: usize, seed: u64) -> Vec<Vec<f64>> {
155    let scale = if sub_dim > 0 {
156        1.0 / (sub_dim as f64)
157    } else {
158        1.0
159    };
160    let mut state = seed;
161    let mut p = vec![vec![0.0_f64; full_dim]; sub_dim];
162    let mut i = 0;
163    let mut j = 0;
164    let total = sub_dim * full_dim;
165    let mut count = 0;
166    while count < total {
167        let (z0, z1, new_state) = box_muller(state);
168        state = new_state;
169        if count < total {
170            p[i][j] = z0 * scale.sqrt();
171            j += 1;
172            if j == full_dim {
173                j = 0;
174                i += 1;
175            }
176            count += 1;
177        }
178        if count < total {
179            p[i][j] = z1 * scale.sqrt();
180            j += 1;
181            if j == full_dim {
182                j = 0;
183                i += 1;
184            }
185            count += 1;
186        }
187    }
188    p
189}
190
191/// Generate a SRHT-approximation sketch (random sign flips + random row sampling).
192///
193/// Exact SRHT requires Hadamard transform; here we use a cost-equivalent
194/// Rademacher sketch followed by random row selection (same statistical guarantees
195/// for subspace embedding purposes at the same parameter budget).
196fn srht_sketch(full_dim: usize, sub_dim: usize, seed: u64) -> Vec<Vec<f64>> {
197    // Use Rademacher as a practical SRHT substitute.
198    rademacher_sketch(full_dim, sub_dim, seed)
199}
200
201// ───────────────────────────────────────────── SubspaceEmbeddingOptimizer ──
202
203/// Optimizer that works in a random low-dimensional subspace.
204#[derive(Debug, Clone)]
205pub struct SubspaceEmbeddingOptimizer {
206    config: SubspaceConfig,
207    /// LCG state (advanced at each use).
208    rng_state: u64,
209}
210
211impl SubspaceEmbeddingOptimizer {
212    /// Create a new optimizer with the given config.
213    pub fn new(config: SubspaceConfig) -> Self {
214        let rng_state = config.seed;
215        Self { config, rng_state }
216    }
217
218    /// Generate the projection matrix according to `sketch_type`.
219    pub fn embed(sketch: &SketchType, full_dim: usize, sub_dim: usize, seed: u64) -> Vec<Vec<f64>> {
220        match sketch {
221            SketchType::Gaussian => gaussian_sketch(full_dim, sub_dim, seed),
222            SketchType::Rademacher => rademacher_sketch(full_dim, sub_dim, seed),
223            SketchType::CountSketch => count_sketch(full_dim, sub_dim, seed),
224            SketchType::SRHT => srht_sketch(full_dim, sub_dim, seed),
225        }
226    }
227
228    /// Multiply P^T (shape full_dim × sub_dim) by y (length sub_dim) to get x in full_dim.
229    ///
230    /// x[j] = Σ_i P[i][j] * y[i]
231    fn backproject(p: &[Vec<f64>], y: &[f64]) -> Vec<f64> {
232        let full_dim = if p.is_empty() { 0 } else { p[0].len() };
233        let sub_dim = p.len();
234        let mut x = vec![0.0_f64; full_dim];
235        for i in 0..sub_dim.min(y.len()) {
236            for j in 0..full_dim {
237                x[j] += p[i][j] * y[i];
238            }
239        }
240        x
241    }
242
243    /// Clip a point to the provided box bounds.
244    fn clip_to_bounds(x: &mut [f64], bounds: Option<&[(f64, f64)]>) {
245        if let Some(b) = bounds {
246            for (xi, &(lo, hi)) in x.iter_mut().zip(b.iter()) {
247                if xi.is_nan() {
248                    *xi = (lo + hi) / 2.0;
249                } else {
250                    *xi = xi.clamp(lo, hi);
251                }
252            }
253        }
254    }
255
256    /// Minimise `f` over `full_dim` dimensions by optimising in a random subspace.
257    ///
258    /// # Arguments
259    ///
260    /// - `f`: Objective function. Receives a slice of length `full_dim`.
261    /// - `full_dim`: Dimensionality of the original search space.
262    /// - `bounds`: Optional box constraints `[(lo, hi); full_dim]`.
263    ///
264    /// # Returns
265    ///
266    /// `(x_best, f_best)` — the best point found across all restarts.
267    pub fn minimize(
268        &mut self,
269        f: impl Fn(&[f64]) -> f64 + Clone,
270        full_dim: usize,
271        bounds: Option<&[(f64, f64)]>,
272    ) -> OptimizeResult<(Vec<f64>, f64)> {
273        if full_dim == 0 {
274            return Err(OptimizeError::InvalidInput(
275                "full_dim must be > 0".to_string(),
276            ));
277        }
278        let sub_dim = self.config.subspace_dim.min(full_dim);
279        let n_restarts = self.config.n_restarts;
280        let n_local_iter = self.config.n_local_iter;
281        let fd_step = self.config.fd_step;
282        let step_size = self.config.step_size;
283        let sketch_type = self.config.sketch_type.clone();
284
285        let mut best_x: Option<Vec<f64>> = None;
286        let mut best_val = f64::INFINITY;
287
288        for restart in 0..n_restarts {
289            // Unique seed per restart.
290            self.rng_state = lcg_next(self.config.seed.wrapping_add(restart as u64 * 1_000_003));
291            let seed_r = self.rng_state;
292
293            let p = Self::embed(&sketch_type, full_dim, sub_dim, seed_r);
294
295            // Initialise y (subspace point) randomly in [-1, 1]^sub_dim.
296            let mut y: Vec<f64> = (0..sub_dim)
297                .map(|_| {
298                    self.rng_state = lcg_next(self.rng_state);
299                    lcg_to_f64(self.rng_state) * 2.0 - 1.0
300                })
301                .collect();
302
303            // Wrap f so it operates via back-projection.
304            let p_ref = &p;
305            let f_sub = |y: &[f64]| -> f64 {
306                let mut x = Self::backproject(p_ref, y);
307                Self::clip_to_bounds(&mut x, bounds);
308                f(&x)
309            };
310
311            // Local coordinate-descent in subspace.
312            let (y_opt, _) =
313                coord_descent_subspace(f_sub, y.clone(), n_local_iter, step_size, fd_step);
314            y = y_opt;
315
316            // Recover full-space point.
317            let mut x = Self::backproject(&p, &y);
318            Self::clip_to_bounds(&mut x, bounds);
319            let val = f(&x);
320
321            if val < best_val {
322                best_val = val;
323                best_x = Some(x);
324            }
325        }
326
327        match best_x {
328            Some(x) => Ok((x, best_val)),
329            None => Err(OptimizeError::ConvergenceError(
330                "Subspace optimizer: no valid restart completed".to_string(),
331            )),
332        }
333    }
334}
335
336// ──────────────────────────────────────────── coord_descent_subspace ──
337
338/// Coordinate descent in the subspace.
339///
340/// Uses central finite differences to estimate each partial derivative, then
341/// takes a step of size `step_size` in the steepest descent direction along
342/// each coordinate.
343///
344/// # Arguments
345///
346/// - `f`: Objective function on subspace points.
347/// - `x0`: Initial point in the subspace.
348/// - `n_iter`: Total number of coordinate updates.
349/// - `step`: Gradient-descent step size.
350/// - `fd_step`: Finite-difference step for partial derivative estimation.
351///
352/// # Returns
353///
354/// `(x_best, f_best)` — best point found during descent.
355pub fn coord_descent_subspace(
356    f: impl Fn(&[f64]) -> f64,
357    x0: Vec<f64>,
358    n_iter: usize,
359    step: f64,
360    fd_step: f64,
361) -> (Vec<f64>, f64) {
362    let d = x0.len();
363    if d == 0 {
364        return (x0, 0.0);
365    }
366    let mut x = x0;
367    let mut fx = f(&x);
368    let mut best_x = x.clone();
369    let mut best_val = fx;
370
371    for iter in 0..n_iter {
372        let coord = iter % d;
373        // Central finite difference along `coord`.
374        let mut x_plus = x.clone();
375        let mut x_minus = x.clone();
376        x_plus[coord] += fd_step;
377        x_minus[coord] -= fd_step;
378        let grad_coord = (f(&x_plus) - f(&x_minus)) / (2.0 * fd_step);
379
380        x[coord] -= step * grad_coord;
381        fx = f(&x);
382
383        if fx < best_val {
384            best_val = fx;
385            best_x = x.clone();
386        }
387    }
388
389    (best_x, best_val)
390}
391
392// ═══════════════════════════════════════════════════════════════════ tests ═══
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    /// Quadratic f(x) = Σ (x_i - t_i)^2 with known minimum at target.
399    fn quadratic(x: &[f64], target: &[f64]) -> f64 {
400        x.iter()
401            .zip(target.iter())
402            .map(|(xi, ti)| (xi - ti).powi(2))
403            .sum()
404    }
405
406    #[test]
407    fn gaussian_sketch_correct_shape() {
408        let p = gaussian_sketch(20, 5, 42);
409        assert_eq!(p.len(), 5, "sub_dim rows");
410        assert_eq!(p[0].len(), 20, "full_dim cols");
411    }
412
413    #[test]
414    fn rademacher_sketch_values_are_unit_scaled() {
415        let sub = 4;
416        let p = rademacher_sketch(10, sub, 7);
417        let expected = 1.0 / (sub as f64).sqrt();
418        for row in &p {
419            for &v in row {
420                assert!((v.abs() - expected).abs() < 1e-12, "v={v}");
421            }
422        }
423    }
424
425    #[test]
426    fn count_sketch_one_nonzero_per_column() {
427        let full = 20;
428        let sub = 4;
429        let p = count_sketch(full, sub, 99);
430        for j in 0..full {
431            let nonzero: usize = (0..sub).filter(|&i| p[i][j] != 0.0).count();
432            assert_eq!(
433                nonzero, 1,
434                "column {j} should have exactly 1 non-zero entry"
435            );
436        }
437    }
438
439    #[test]
440    fn coord_descent_subspace_descends_on_quadratic() {
441        let target = vec![3.0_f64; 5];
442        let f = |x: &[f64]| {
443            x.iter()
444                .zip(target.iter())
445                .map(|(xi, ti)| (xi - ti).powi(2))
446                .sum::<f64>()
447        };
448        let x0 = vec![0.0_f64; 5];
449        let (x_opt, val) = coord_descent_subspace(f, x0, 200, 0.5, 1e-5);
450        assert!(val < 1.0, "Should converge toward target; val={val}");
451        for (xi, ti) in x_opt.iter().zip(target.iter()) {
452            assert!((xi - ti).abs() < 0.5, "xi={xi}, ti={ti}");
453        }
454    }
455
456    #[test]
457    fn subspace_optimizer_minimizes_quadratic() {
458        let target: Vec<f64> = (0..10).map(|i| i as f64 * 0.1).collect();
459        let config = SubspaceConfig {
460            subspace_dim: 5,
461            n_restarts: 3,
462            n_local_iter: 100,
463            seed: 42,
464            sketch_type: SketchType::Rademacher,
465            fd_step: 1e-5,
466            step_size: 0.3,
467        };
468        let mut opt = SubspaceEmbeddingOptimizer::new(config);
469        let t = target.clone();
470        let result = opt.minimize(move |x| quadratic(x, &t), 10, None);
471        assert!(result.is_ok(), "Optimizer should succeed");
472        let (x_opt, f_opt) = result.unwrap();
473
474        // Compute the true minimum (all zeros since we project and the min in
475        // subspace approximates the full-space min up to projection error).
476        // We only require that the found value is less than the initial f(0) = Σ t_i^2.
477        let f_zero: f64 = target.iter().map(|t| t * t).sum();
478        assert!(
479            f_opt < f_zero,
480            "Optimizer should improve over f(0)={f_zero}, got f_opt={f_opt}"
481        );
482        assert_eq!(x_opt.len(), 10);
483    }
484
485    #[test]
486    fn subspace_optimizer_respects_bounds() {
487        let target = vec![10.0_f64; 4]; // far outside bounds
488        let bounds: Vec<(f64, f64)> = vec![(-1.0, 1.0); 4];
489        let config = SubspaceConfig {
490            subspace_dim: 2,
491            n_restarts: 2,
492            n_local_iter: 20,
493            ..Default::default()
494        };
495        let mut opt = SubspaceEmbeddingOptimizer::new(config);
496        let t = target.clone();
497        let (x_opt, _) = opt
498            .minimize(move |x| quadratic(x, &t), 4, Some(&bounds))
499            .unwrap();
500        for &xi in &x_opt {
501            assert!(
502                xi >= -1.0 - 1e-10 && xi <= 1.0 + 1e-10,
503                "xi={xi} out of bounds"
504            );
505        }
506    }
507
508    #[test]
509    fn subspace_optimizer_zero_dim_errors() {
510        let mut opt = SubspaceEmbeddingOptimizer::new(SubspaceConfig::default());
511        let result = opt.minimize(|_x| 0.0_f64, 0, None);
512        assert!(result.is_err());
513    }
514}