Skip to main content

oxilean_std/convex_optimization/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::functions::*;
6use super::functions::{ConvexFunction, ProxableFunction};
7
8/// Gradient descent optimizer.
9#[derive(Debug, Clone)]
10pub struct GradientDescent {
11    /// Fixed learning rate.
12    pub learning_rate: f64,
13    /// Maximum number of iterations.
14    pub max_iter: usize,
15    /// Convergence tolerance (on gradient norm).
16    pub tol: f64,
17}
18impl GradientDescent {
19    /// Create a new gradient descent optimizer.
20    pub fn new(lr: f64, max_iter: usize, tol: f64) -> Self {
21        Self {
22            learning_rate: lr,
23            max_iter,
24            tol,
25        }
26    }
27    /// Backtracking line search satisfying the Armijo condition.
28    ///
29    /// Returns a step size α such that f(x - α ∇f(x)) ≤ f(x) - c₁ α ‖∇f(x)‖².
30    pub fn backtracking_line_search<F: ConvexFunction>(f: &F, x: &[f64], grad: &[f64]) -> f64 {
31        let c1 = 1e-4_f64;
32        let rho = 0.5_f64;
33        let mut alpha = 1.0_f64;
34        let fx = f.eval(x);
35        let grad_norm_sq: f64 = grad.iter().map(|g| g * g).sum();
36        for _ in 0..50 {
37            let x_new: Vec<f64> = x.iter().zip(grad).map(|(xi, gi)| xi - alpha * gi).collect();
38            if f.eval(&x_new) <= fx - c1 * alpha * grad_norm_sq {
39                break;
40            }
41            alpha *= rho;
42        }
43        alpha
44    }
45    /// Minimize `f` starting from `x0`.
46    ///
47    /// Returns `(x_star, f(x_star), iterations)`.
48    pub fn minimize<F: ConvexFunction>(&self, f: &F, x0: &[f64]) -> (Vec<f64>, f64, usize) {
49        let mut x = x0.to_vec();
50        let mut iters = 0_usize;
51        for k in 0..self.max_iter {
52            let grad = f.gradient(&x);
53            let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
54            if grad_norm < self.tol {
55                iters = k;
56                break;
57            }
58            let alpha = Self::backtracking_line_search(f, &x, &grad);
59            for (xi, gi) in x.iter_mut().zip(&grad) {
60                *xi -= alpha * gi;
61            }
62            iters = k + 1;
63        }
64        let fval = f.eval(&x);
65        (x, fval, iters)
66    }
67}
68/// Mirror descent optimizer using a Bregman divergence generating function.
69///
70/// Minimizes f(x) over a convex domain using the update:
71///   x^{t+1} = argmin_{x in X} { eta * <grad f(x^t), x> + D_h(x, x^t) }
72///
73/// With negative entropy as h, this gives the Multiplicative Weights / Hedge algorithm.
74/// With squared Euclidean norm as h, this reduces to standard gradient descent.
75#[derive(Debug, Clone)]
76#[allow(dead_code)]
77pub struct MirrorDescentSolver {
78    /// Learning rate eta.
79    pub eta: f64,
80    /// Maximum number of iterations.
81    pub max_iter: usize,
82    /// Convergence tolerance.
83    pub tol: f64,
84    /// Whether to use negative entropy (simplex domain) or Euclidean norm (R^n).
85    pub use_entropy: bool,
86}
87impl MirrorDescentSolver {
88    /// Create a new mirror descent solver.
89    pub fn new(eta: f64, max_iter: usize, tol: f64, use_entropy: bool) -> Self {
90        Self {
91            eta,
92            max_iter,
93            tol,
94            use_entropy,
95        }
96    }
97    /// Project onto the probability simplex: x_i >= 0, sum x_i = 1.
98    pub fn project_simplex(v: &[f64]) -> Vec<f64> {
99        let _n = v.len();
100        let mut u: Vec<f64> = v.to_vec();
101        u.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
102        let mut cssv = 0.0_f64;
103        let mut rho = 0_usize;
104        for (j, &uj) in u.iter().enumerate() {
105            cssv += uj;
106            if uj - (cssv - 1.0) / (j as f64 + 1.0) > 0.0 {
107                rho = j;
108            }
109        }
110        let cssv_rho: f64 = u[..=rho].iter().sum();
111        let theta = (cssv_rho - 1.0) / (rho as f64 + 1.0);
112        v.iter().map(|vi| (vi - theta).max(0.0)).collect()
113    }
114    /// Mirror descent step with negative entropy (Multiplicative Weights):
115    ///   x^{t+1}_i = x^t_i * exp(-eta * grad_i) / Z
116    fn entropy_step(x: &[f64], grad: &[f64], eta: f64) -> Vec<f64> {
117        let log_x_new: Vec<f64> = x
118            .iter()
119            .zip(grad)
120            .map(|(xi, gi)| xi.ln() - eta * gi)
121            .collect();
122        let max_log = log_x_new.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
123        let exp_vals: Vec<f64> = log_x_new.iter().map(|v| (v - max_log).exp()).collect();
124        let z: f64 = exp_vals.iter().sum();
125        exp_vals.iter().map(|v| v / z).collect()
126    }
127    /// Run mirror descent to minimize `f`. Returns `(x_star, f(x_star), iterations)`.
128    pub fn minimize<F: ConvexFunction>(&self, f: &F, x0: &[f64]) -> (Vec<f64>, f64, usize) {
129        let mut x = if self.use_entropy {
130            Self::project_simplex(x0)
131        } else {
132            x0.to_vec()
133        };
134        let mut best_x = x.clone();
135        let mut best_f = f.eval(&x);
136        let mut iters = self.max_iter;
137        for k in 0..self.max_iter {
138            let grad = f.gradient(&x);
139            let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
140            if grad_norm < self.tol {
141                iters = k;
142                break;
143            }
144            let x_new = if self.use_entropy {
145                Self::entropy_step(&x, &grad, self.eta)
146            } else {
147                x.iter()
148                    .zip(&grad)
149                    .map(|(xi, gi)| xi - self.eta * gi)
150                    .collect()
151            };
152            let fx_new = f.eval(&x_new);
153            if fx_new < best_f {
154                best_f = fx_new;
155                best_x = x_new.clone();
156            }
157            x = x_new;
158        }
159        (best_x, best_f, iters)
160    }
161    /// Compute the Bregman divergence D_h(x, y) for h = negative entropy:
162    ///   D_h(x,y) = sum_i [ x_i * log(x_i / y_i) - x_i + y_i ]  (KL divergence).
163    pub fn bregman_kl(x: &[f64], y: &[f64]) -> f64 {
164        x.iter()
165            .zip(y)
166            .map(|(xi, yi)| {
167                if *xi <= 0.0 {
168                    return 0.0;
169                }
170                xi * (xi / yi).ln() - xi + yi
171            })
172            .sum()
173    }
174}
175/// Verifier for the Restricted Isometry Property (RIP) of a matrix.
176///
177/// Checks the RIP-s condition: for all s-sparse vectors x,
178///   (1 - delta) ||x||^2 <= ||Ax||^2 <= (1 + delta) ||x||^2.
179///
180/// Uses a greedy approximation by testing random sparse vectors.
181#[derive(Debug, Clone)]
182#[allow(dead_code)]
183pub struct RipVerifier {
184    /// Sparsity level s to check.
185    pub sparsity: usize,
186    /// Number of random tests to perform.
187    pub num_trials: usize,
188}
189impl RipVerifier {
190    /// Create a new RIP verifier.
191    pub fn new(sparsity: usize, num_trials: usize) -> Self {
192        Self {
193            sparsity,
194            num_trials,
195        }
196    }
197    /// Compute Ax for matrix A (m x n) and vector x (n).
198    fn mat_vec(a: &[Vec<f64>], x: &[f64]) -> Vec<f64> {
199        a.iter()
200            .map(|row| row.iter().zip(x).map(|(aij, xi)| aij * xi).sum::<f64>())
201            .collect()
202    }
203    /// Estimate the RIP constant delta_s for matrix A by testing sparse vectors.
204    ///
205    /// Returns `(delta_lower, delta_upper)`: the tightest bounds found over trials.
206    /// A small delta_upper < 1 indicates A likely satisfies RIP-s.
207    pub fn estimate_rip_constant(&self, a: &[Vec<f64>]) -> (f64, f64) {
208        if a.is_empty() {
209            return (0.0, 0.0);
210        }
211        let n = a[0].len();
212        let s = self.sparsity.min(n);
213        let mut delta_lower = 0.0_f64;
214        let mut delta_upper = 0.0_f64;
215        for trial in 0..self.num_trials {
216            let mut x = vec![0.0_f64; n];
217            for k in 0..s {
218                let idx = (trial * s + k) % n;
219                x[idx] = if k % 2 == 0 { 1.0 } else { -1.0 };
220            }
221            let x_norm_sq: f64 = x.iter().map(|xi| xi * xi).sum();
222            if x_norm_sq < 1e-12 {
223                continue;
224            }
225            let ax = Self::mat_vec(a, &x);
226            let ax_norm_sq: f64 = ax.iter().map(|axi| axi * axi).sum();
227            let ratio = ax_norm_sq / x_norm_sq;
228            let dev = (ratio - 1.0).abs();
229            if dev > delta_upper {
230                delta_upper = dev;
231            }
232            let lower_dev = 1.0 - ratio;
233            if lower_dev > delta_lower {
234                delta_lower = lower_dev;
235            }
236        }
237        (delta_lower, delta_upper)
238    }
239    /// Check whether matrix A satisfies RIP-s with parameter delta.
240    ///
241    /// Returns true if the estimated RIP constant is less than delta.
242    pub fn satisfies_rip(&self, a: &[Vec<f64>], delta: f64) -> bool {
243        let (_, upper) = self.estimate_rip_constant(a);
244        upper < delta
245    }
246    /// Soft thresholding operator for basis pursuit denoising / LASSO.
247    ///
248    /// Returns the element-wise soft threshold: sign(x_i) * max(|x_i| - lambda, 0).
249    pub fn soft_threshold(x: &[f64], lambda: f64) -> Vec<f64> {
250        x.iter()
251            .map(|xi| xi.signum() * (xi.abs() - lambda).max(0.0))
252            .collect()
253    }
254}
255/// Cutting-plane solver (Kelley's method) for convex nonsmooth minimization.
256///
257/// Maintains a piecewise-linear lower model built from subgradient cuts and
258/// solves QP subproblems approximately using gradient descent on the model.
259#[derive(Debug, Clone)]
260pub struct CuttingPlaneSolver {
261    /// Maximum number of cutting-plane iterations.
262    pub max_iter: usize,
263    /// Convergence tolerance (on the optimality gap).
264    pub tol: f64,
265    /// Trust-region radius for the QP subproblem.
266    pub trust_radius: f64,
267}
268impl CuttingPlaneSolver {
269    /// Create a new cutting-plane solver.
270    pub fn new(max_iter: usize, tol: f64, trust_radius: f64) -> Self {
271        Self {
272            max_iter,
273            tol,
274            trust_radius,
275        }
276    }
277    /// Minimize `f` starting from `x0` using Kelley's cutting-plane method.
278    ///
279    /// Returns `(x_star, f(x_star), iterations)`.
280    pub fn minimize<F: ConvexFunction>(&self, f: &F, x0: &[f64]) -> (Vec<f64>, f64, usize) {
281        let n = x0.len();
282        let mut x = x0.to_vec();
283        let mut cuts: Vec<(Vec<f64>, f64, Vec<f64>)> = Vec::new();
284        let mut best_x = x.clone();
285        let mut best_f = f.eval(&x);
286        let mut iters = self.max_iter;
287        for k in 0..self.max_iter {
288            let fk = f.eval(&x);
289            let gk = f.gradient(&x);
290            cuts.push((x.clone(), fk, gk.clone()));
291            if fk < best_f {
292                best_f = fk;
293                best_x = x.clone();
294            }
295            let mut z = x.clone();
296            let step_model = 0.01_f64 * self.trust_radius;
297            for _ in 0..200 {
298                let model_vals: Vec<f64> = cuts
299                    .iter()
300                    .map(|(xj, fj, gj)| {
301                        fj + gj
302                            .iter()
303                            .zip(&z)
304                            .zip(xj.iter())
305                            .map(|((gi, zi), xji)| gi * (zi - xji))
306                            .sum::<f64>()
307                    })
308                    .collect();
309                let active = model_vals
310                    .iter()
311                    .enumerate()
312                    .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
313                    .map(|(i, _)| i)
314                    .unwrap_or(0);
315                let grad_model = &cuts[active].2;
316                let mut z_new: Vec<f64> = z
317                    .iter()
318                    .zip(grad_model)
319                    .map(|(zi, gi)| zi - step_model * gi)
320                    .collect();
321                let dist_sq: f64 = z_new.iter().zip(&x).map(|(zi, xi)| (zi - xi).powi(2)).sum();
322                if dist_sq > self.trust_radius * self.trust_radius {
323                    let scale = self.trust_radius / dist_sq.sqrt();
324                    for i in 0..n {
325                        z_new[i] = x[i] + scale * (z_new[i] - x[i]);
326                    }
327                }
328                z = z_new;
329            }
330            let model_at_z: f64 = cuts
331                .iter()
332                .map(|(xj, fj, gj)| {
333                    fj + gj
334                        .iter()
335                        .zip(&z)
336                        .zip(xj.iter())
337                        .map(|((gi, zi), xji)| gi * (zi - xji))
338                        .sum::<f64>()
339                })
340                .fold(f64::NEG_INFINITY, f64::max);
341            let gap = best_f - model_at_z;
342            if gap < self.tol {
343                iters = k + 1;
344                break;
345            }
346            x = z;
347        }
348        (best_x, best_f, iters)
349    }
350}
351/// Proximal gradient method (ISTA/FISTA) for composite optimization.
352///
353/// Minimizes f(x) + g(x) where f is L-smooth (known Lipschitz constant)
354/// and g has a cheap prox operator. Supports both ISTA (beta=0) and
355/// FISTA (beta>0, Nesterov momentum).
356#[derive(Debug, Clone)]
357#[allow(dead_code)]
358pub struct ProximalGradientSolver {
359    /// Lipschitz constant L of gradient of f.
360    pub lipschitz: f64,
361    /// Maximum number of iterations.
362    pub max_iter: usize,
363    /// Convergence tolerance on successive iterate change.
364    pub tol: f64,
365    /// Use FISTA acceleration (true) or plain ISTA (false).
366    pub accelerated: bool,
367}
368impl ProximalGradientSolver {
369    /// Create a proximal gradient solver.
370    pub fn new(lipschitz: f64, max_iter: usize, tol: f64, accelerated: bool) -> Self {
371        Self {
372            lipschitz,
373            max_iter,
374            tol,
375            accelerated,
376        }
377    }
378    /// Minimize `smooth + regularizer`. Returns `(x_star, iters)`.
379    pub fn minimize<F, G>(&self, smooth: &F, regularizer: &G, x0: &[f64]) -> (Vec<f64>, usize)
380    where
381        F: ConvexFunction,
382        G: ProxableFunction,
383    {
384        let n = x0.len();
385        let step = 1.0 / self.lipschitz;
386        let mut x = x0.to_vec();
387        let mut y = x.clone();
388        let mut t = 1.0_f64;
389        let mut iters = self.max_iter;
390        for k in 1..=self.max_iter {
391            let grad = smooth.gradient(&y);
392            let v: Vec<f64> = y.iter().zip(&grad).map(|(yi, gi)| yi - step * gi).collect();
393            let x_new = regularizer.prox(&v, step);
394            let diff_norm: f64 = x_new
395                .iter()
396                .zip(&x)
397                .map(|(a, b)| (a - b).powi(2))
398                .sum::<f64>()
399                .sqrt();
400            if self.accelerated {
401                let t_new = (1.0 + (1.0 + 4.0 * t * t).sqrt()) / 2.0;
402                let beta = (t - 1.0) / t_new;
403                let mut y_new = vec![0.0_f64; n];
404                for i in 0..n {
405                    y_new[i] = x_new[i] + beta * (x_new[i] - x[i]);
406                }
407                t = t_new;
408                y = y_new;
409            } else {
410                y = x_new.clone();
411            }
412            x = x_new;
413            if diff_norm < self.tol {
414                iters = k;
415                break;
416            }
417        }
418        (x, iters)
419    }
420    /// Estimate the Lipschitz constant via power iteration on the Hessian approximation.
421    ///
422    /// Returns an upper bound on L by computing `max ||grad f(x + eps * v) - grad f(x)||/eps`.
423    pub fn estimate_lipschitz<F: ConvexFunction>(f: &F, x: &[f64], num_trials: usize) -> f64 {
424        let eps = 1e-5_f64;
425        let n = x.len();
426        let grad0 = f.gradient(x);
427        let mut max_l = 0.0_f64;
428        for i in 0..num_trials.min(n) {
429            let mut x_pert = x.to_vec();
430            x_pert[i] += eps;
431            let grad_pert = f.gradient(&x_pert);
432            let diff_norm: f64 = grad_pert
433                .iter()
434                .zip(&grad0)
435                .map(|(a, b)| (a - b).powi(2))
436                .sum::<f64>()
437                .sqrt();
438            max_l = max_l.max(diff_norm / eps);
439        }
440        max_l.max(1e-8)
441    }
442}
443/// FISTA: Fast Iterative Shrinkage-Thresholding Algorithm.
444///
445/// Minimizes f(x) + g(x) where f is smooth (L-Lipschitz gradient) and
446/// g has a cheap proximal operator. Achieves O(1/k²) convergence.
447#[derive(Debug, Clone)]
448pub struct FISTASolver {
449    /// Lipschitz constant L of ∇f.
450    pub lipschitz: f64,
451    /// Maximum number of iterations.
452    pub max_iter: usize,
453    /// Convergence tolerance on successive iterates.
454    pub tol: f64,
455}
456impl FISTASolver {
457    /// Create a new FISTA solver.
458    pub fn new(lipschitz: f64, max_iter: usize, tol: f64) -> Self {
459        Self {
460            lipschitz,
461            max_iter,
462            tol,
463        }
464    }
465    /// Run FISTA to minimize `smooth` + `regularizer`.
466    ///
467    /// Returns `(x_star, iterations)`.
468    pub fn minimize<F, G>(&self, smooth: &F, regularizer: &G, x0: &[f64]) -> (Vec<f64>, usize)
469    where
470        F: ConvexFunction,
471        G: ProxableFunction,
472    {
473        let n = x0.len();
474        let step = 1.0 / self.lipschitz;
475        let mut x = x0.to_vec();
476        let mut y = x.clone();
477        let mut t = 1.0_f64;
478        let mut iters = self.max_iter;
479        for k in 1..=self.max_iter {
480            let grad = smooth.gradient(&y);
481            let v: Vec<f64> = y.iter().zip(&grad).map(|(yi, gi)| yi - step * gi).collect();
482            let x_new = regularizer.prox(&v, step);
483            let t_new = (1.0 + (1.0 + 4.0 * t * t).sqrt()) / 2.0;
484            let beta = (t - 1.0) / t_new;
485            let mut diff_norm = 0.0_f64;
486            let mut y_new = vec![0.0_f64; n];
487            for i in 0..n {
488                y_new[i] = x_new[i] + beta * (x_new[i] - x[i]);
489                diff_norm += (x_new[i] - x[i]).powi(2);
490            }
491            x = x_new;
492            y = y_new;
493            t = t_new;
494            if diff_norm.sqrt() < self.tol {
495                iters = k;
496                break;
497            }
498        }
499        (x, iters)
500    }
501}
502/// L1 norm penalty: f(x) = λ · ‖x‖₁.
503#[derive(Debug, Clone)]
504pub struct L1NormFunction {
505    /// Regularization weight λ ≥ 0.
506    pub lambda: f64,
507}
508impl L1NormFunction {
509    /// Create a new L1 penalty with weight `lambda`.
510    pub fn new(lambda: f64) -> Self {
511        Self { lambda }
512    }
513}
514/// Sinkhorn algorithm for entropic-regularized optimal transport.
515///
516/// Computes the approximate optimal transport plan between discrete measures
517/// mu (source) and nu (target) with cost matrix C, using entropy regularization:
518///   min_{gamma >= 0, gamma 1 = mu, gamma^T 1 = nu} <C, gamma> - eps * H(gamma)
519///
520/// The Sinkhorn iterations alternate between row and column normalization.
521#[derive(Debug, Clone)]
522#[allow(dead_code)]
523pub struct SinkhornSolver {
524    /// Entropy regularization parameter epsilon > 0.
525    pub epsilon: f64,
526    /// Maximum number of Sinkhorn iterations.
527    pub max_iter: usize,
528    /// Convergence tolerance on marginal constraint violation.
529    pub tol: f64,
530}
531impl SinkhornSolver {
532    /// Create a new Sinkhorn solver.
533    pub fn new(epsilon: f64, max_iter: usize, tol: f64) -> Self {
534        Self {
535            epsilon,
536            max_iter,
537            tol,
538        }
539    }
540    /// Compute the log-domain Gibbs kernel: K_{ij} = exp(-C_{ij} / epsilon).
541    fn gibbs_kernel(cost: &[Vec<f64>], epsilon: f64) -> Vec<Vec<f64>> {
542        cost.iter()
543            .map(|row| row.iter().map(|&c| (-c / epsilon).exp()).collect())
544            .collect()
545    }
546    /// Solve the entropic OT problem.
547    ///
548    /// `mu`: source distribution (sums to 1, length m).
549    /// `nu`: target distribution (sums to 1, length n).
550    /// `cost`: m x n cost matrix.
551    ///
552    /// Returns `(transport_plan, wasserstein_cost)`.
553    pub fn solve(&self, mu: &[f64], nu: &[f64], cost: &[Vec<f64>]) -> (Vec<Vec<f64>>, f64) {
554        let m = mu.len();
555        let n = nu.len();
556        let k = Self::gibbs_kernel(cost, self.epsilon);
557        let mut u = vec![1.0_f64; m];
558        let mut v = vec![1.0_f64; n];
559        for _ in 0..self.max_iter {
560            let kv: Vec<f64> = (0..m)
561                .map(|i| k[i].iter().zip(&v).map(|(kij, vj)| kij * vj).sum::<f64>())
562                .collect();
563            let u_new: Vec<f64> = mu
564                .iter()
565                .zip(&kv)
566                .map(|(mi, kvi)| mi / kvi.max(1e-300))
567                .collect();
568            let kt_u: Vec<f64> = (0..n)
569                .map(|j| k.iter().zip(&u_new).map(|(ki, ui)| ki[j] * ui).sum::<f64>())
570                .collect();
571            let v_new: Vec<f64> = nu
572                .iter()
573                .zip(&kt_u)
574                .map(|(nj, ktuj)| nj / ktuj.max(1e-300))
575                .collect();
576            let err: f64 = u_new
577                .iter()
578                .zip(&u)
579                .map(|(a, b)| (a - b).abs())
580                .sum::<f64>()
581                + v_new
582                    .iter()
583                    .zip(&v)
584                    .map(|(a, b)| (a - b).abs())
585                    .sum::<f64>();
586            u = u_new;
587            v = v_new;
588            if err < self.tol {
589                break;
590            }
591        }
592        let mut gamma = vec![vec![0.0_f64; n]; m];
593        let mut w_cost = 0.0_f64;
594        for i in 0..m {
595            for j in 0..n {
596                gamma[i][j] = u[i] * k[i][j] * v[j];
597                w_cost += gamma[i][j] * cost[i][j];
598            }
599        }
600        (gamma, w_cost)
601    }
602    /// Compute the squared 2-Wasserstein distance approximation between two 1D measures.
603    ///
604    /// `points_mu`: support points of mu (sorted).
605    /// `weights_mu`: weights of mu (sums to 1).
606    /// `points_nu`: support points of nu (sorted).
607    /// `weights_nu`: weights of nu (sums to 1).
608    pub fn wasserstein2_1d(
609        points_mu: &[f64],
610        weights_mu: &[f64],
611        points_nu: &[f64],
612        weights_nu: &[f64],
613    ) -> f64 {
614        let m = points_mu.len();
615        let n = points_nu.len();
616        let cost: Vec<Vec<f64>> = (0..m)
617            .map(|i| {
618                (0..n)
619                    .map(|j| (points_mu[i] - points_nu[j]).powi(2))
620                    .collect()
621            })
622            .collect();
623        let solver = Self::new(0.01, 200, 1e-8);
624        let (_, w2) = solver.solve(weights_mu, weights_nu, &cost);
625        w2
626    }
627}
628/// Geometric program solver via convex transformation.
629///
630/// A GP in standard form: minimize p_0(x) subject to p_i(x) ≤ 1, i=1..m,
631/// where each p_i is a posynomial. Under the change of variables x = exp(y),
632/// the GP becomes convex (log-sum-exp minimization).
633///
634/// This solver applies gradient descent on the log-domain objective.
635#[derive(Debug, Clone)]
636pub struct GeometricProgramSolver {
637    /// Maximum iterations for the inner gradient descent.
638    pub max_iter: usize,
639    /// Step size for gradient descent in log-domain.
640    pub step_size: f64,
641    /// Convergence tolerance.
642    pub tol: f64,
643}
644impl GeometricProgramSolver {
645    /// Create a new GP solver.
646    pub fn new(max_iter: usize, step_size: f64, tol: f64) -> Self {
647        Self {
648            max_iter,
649            step_size,
650            tol,
651        }
652    }
653    /// Evaluate a monomial c · ∏ x_i^{a_i} at log-domain point y (x = exp(y)).
654    /// Returns log(c) + a^T y.
655    pub fn eval_log_monomial(log_c: f64, exponents: &[f64], y: &[f64]) -> f64 {
656        let dot: f64 = exponents.iter().zip(y).map(|(ai, yi)| ai * yi).sum();
657        log_c + dot
658    }
659    /// Evaluate log of a posynomial: log(∑_k exp(log_c_k + a_k^T y)).
660    pub fn log_sum_exp_posynomial(monomials: &[(f64, Vec<f64>)], y: &[f64]) -> f64 {
661        let vals: Vec<f64> = monomials
662            .iter()
663            .map(|(lc, exp)| Self::eval_log_monomial(*lc, exp, y))
664            .collect();
665        let max_val = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
666        if max_val.is_infinite() {
667            return max_val;
668        }
669        max_val + vals.iter().map(|v| (v - max_val).exp()).sum::<f64>().ln()
670    }
671    /// Solve the GP: minimize objective posynomial subject to constraint posynomials ≤ 1.
672    ///
673    /// `objective`: list of (log_coefficient, exponent_vector) pairs for objective posynomial.
674    /// `constraints`: list of posynomials, each a list of (log_c, exponents) pairs.
675    ///
676    /// Returns the optimal y = log(x) and the optimal objective value.
677    pub fn solve(
678        &self,
679        objective: &[(f64, Vec<f64>)],
680        constraints: &[Vec<(f64, Vec<f64>)>],
681        y0: &[f64],
682    ) -> (Vec<f64>, f64) {
683        let n = y0.len();
684        let mut y = y0.to_vec();
685        for _ in 0..self.max_iter {
686            let obj_vals: Vec<f64> = objective
687                .iter()
688                .map(|(lc, exp)| Self::eval_log_monomial(*lc, exp, &y))
689                .collect();
690            let max_v = obj_vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
691            let weights: Vec<f64> = obj_vals.iter().map(|v| (v - max_v).exp()).collect();
692            let w_sum: f64 = weights.iter().sum();
693            let mut grad = vec![0.0_f64; n];
694            for (k, (_, exp)) in objective.iter().enumerate() {
695                let wk = weights[k] / w_sum;
696                for i in 0..n {
697                    grad[i] += wk * exp[i];
698                }
699            }
700            for constraint in constraints {
701                let c_lse = Self::log_sum_exp_posynomial(constraint, &y);
702                if c_lse > 0.0 {
703                    let rho = 10.0_f64;
704                    let c_vals: Vec<f64> = constraint
705                        .iter()
706                        .map(|(lc, exp)| Self::eval_log_monomial(*lc, exp, &y))
707                        .collect();
708                    let c_max = c_vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
709                    let c_weights: Vec<f64> = c_vals.iter().map(|v| (v - c_max).exp()).collect();
710                    let c_wsum: f64 = c_weights.iter().sum();
711                    for (k, (_, exp)) in constraint.iter().enumerate() {
712                        let wk = c_weights[k] / c_wsum;
713                        for i in 0..n {
714                            grad[i] += rho * wk * exp[i];
715                        }
716                    }
717                }
718            }
719            let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
720            if grad_norm < self.tol {
721                break;
722            }
723            for i in 0..n {
724                y[i] -= self.step_size * grad[i];
725            }
726        }
727        let obj_val = Self::log_sum_exp_posynomial(objective, &y).exp();
728        (y, obj_val)
729    }
730}
731/// Follow-The-Regularized-Leader (FTRL) online convex optimizer.
732///
733/// At each round t, plays x_t = argmin_{x in X} { sum_{s<t} f_s(x) + R(x) / eta }
734/// where R(x) is a strongly convex regularizer. With R(x) = (1/2)||x||^2 this
735/// recovers Online Gradient Descent. Achieves O(sqrt(T)) regret.
736#[derive(Debug, Clone)]
737#[allow(dead_code)]
738pub struct OnlineLearner {
739    /// Learning rate eta > 0.
740    pub eta: f64,
741    /// Dimension of the decision space.
742    pub dim: usize,
743    /// Accumulated gradient sum (cumulative subgradients).
744    pub cumulative_grad: Vec<f64>,
745    /// Round counter.
746    pub round: usize,
747    /// Total regret accumulated (sum of f_t(x_t) - f_t(x*) approximation).
748    pub cumulative_loss: f64,
749}
750impl OnlineLearner {
751    /// Create a new FTRL online learner.
752    pub fn new(eta: f64, dim: usize) -> Self {
753        Self {
754            eta,
755            dim,
756            cumulative_grad: vec![0.0_f64; dim],
757            round: 0,
758            cumulative_loss: 0.0,
759        }
760    }
761    /// Get the current decision x_t (FTRL update with L2 regularizer).
762    ///
763    /// FTRL with L2: x_t = -eta * sum_{s<t} g_s.
764    pub fn current_decision(&self) -> Vec<f64> {
765        self.cumulative_grad.iter().map(|g| -self.eta * g).collect()
766    }
767    /// Receive a subgradient `g_t` for the current round's loss.
768    ///
769    /// Updates internal state and returns the loss suffered at x_t
770    /// (approximated as g_t^T x_t, a linear approximation).
771    pub fn update(&mut self, grad: &[f64]) -> f64 {
772        let x_t = self.current_decision();
773        let loss_t: f64 = grad.iter().zip(&x_t).map(|(gi, xi)| gi * xi).sum();
774        for (cg, g) in self.cumulative_grad.iter_mut().zip(grad) {
775            *cg += g;
776        }
777        self.cumulative_loss += loss_t;
778        self.round += 1;
779        loss_t
780    }
781    /// Compute the regret bound O(||x*|| * G * sqrt(T)) for gradient bound G.
782    pub fn regret_bound(&self, optimal_norm: f64, grad_bound: f64) -> f64 {
783        optimal_norm * grad_bound * (self.round as f64).sqrt() / self.eta
784            + 0.5 * self.eta * grad_bound * grad_bound * self.round as f64
785    }
786    /// Reset the learner to initial state.
787    pub fn reset(&mut self) {
788        self.cumulative_grad = vec![0.0_f64; self.dim];
789        self.round = 0;
790        self.cumulative_loss = 0.0;
791    }
792}
793/// Bundle method for nonsmooth convex optimization.
794///
795/// Maintains a bundle of subgradients and uses a stability center with
796/// a proximal term to regularize the QP subproblem. Supports serious steps
797/// (descent) and null steps (bundle update only).
798#[derive(Debug, Clone)]
799pub struct BundleMethodSolver {
800    /// Proximal parameter μ > 0 (controls step aggressiveness).
801    pub mu: f64,
802    /// Descent tolerance m_L ∈ (0, 1) for serious step acceptance.
803    pub m_l: f64,
804    /// Maximum bundle size (older cuts are dropped when exceeded).
805    pub max_bundle_size: usize,
806    /// Maximum iterations.
807    pub max_iter: usize,
808    /// Convergence tolerance.
809    pub tol: f64,
810}
811impl BundleMethodSolver {
812    /// Create a new bundle method solver.
813    pub fn new(mu: f64, m_l: f64, max_bundle_size: usize, max_iter: usize, tol: f64) -> Self {
814        Self {
815            mu,
816            m_l,
817            max_bundle_size,
818            max_iter,
819            tol,
820        }
821    }
822    /// Solve min f(x) starting from x0. Returns (x_star, f(x_star), iterations).
823    pub fn minimize<F: ConvexFunction>(&self, f: &F, x0: &[f64]) -> (Vec<f64>, f64, usize) {
824        let n = x0.len();
825        let mut xhat = x0.to_vec();
826        let mut fhat = f.eval(&xhat);
827        let mut bundle: Vec<(Vec<f64>, f64)> = Vec::new();
828        let g0 = f.gradient(&xhat);
829        bundle.push((g0, 0.0));
830        let mut iters = self.max_iter;
831        for k in 0..self.max_iter {
832            let mut d = vec![0.0_f64; n];
833            let step_d = 1.0 / (self.mu + 1.0);
834            for _ in 0..500 {
835                let active_val = bundle
836                    .iter()
837                    .map(|(gj, alphaj)| {
838                        let dot: f64 = gj.iter().zip(&d).map(|(gi, di)| gi * di).sum();
839                        -alphaj + dot
840                    })
841                    .enumerate()
842                    .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
843                    .map(|(i, _)| i)
844                    .unwrap_or(0);
845                let (gj, _alphaj) = &bundle[active_val];
846                let grad_d: Vec<f64> = gj
847                    .iter()
848                    .zip(&d)
849                    .map(|(gi, di)| gi + self.mu * di)
850                    .collect();
851                let grad_norm: f64 = grad_d.iter().map(|g| g * g).sum::<f64>().sqrt();
852                if grad_norm < 1e-10 {
853                    break;
854                }
855                for i in 0..n {
856                    d[i] -= step_d * grad_d[i];
857                }
858            }
859            let x_cand: Vec<f64> = xhat.iter().zip(&d).map(|(xi, di)| xi + di).collect();
860            let f_cand = f.eval(&x_cand);
861            let g_cand = f.gradient(&x_cand);
862            let delta = fhat - f_cand;
863            let model_pred: f64 = bundle
864                .iter()
865                .map(|(gj, alphaj)| {
866                    let dot: f64 = gj.iter().zip(&d).map(|(gi, di)| gi * di).sum();
867                    -alphaj + dot
868                })
869                .fold(f64::NEG_INFINITY, f64::max);
870            let model_decrease = -model_pred;
871            if model_decrease.abs() < self.tol {
872                iters = k + 1;
873                break;
874            }
875            let alpha_new: f64 = fhat
876                - f_cand
877                - g_cand
878                    .iter()
879                    .zip(&d)
880                    .map(|(gi, di)| gi * (-di))
881                    .sum::<f64>();
882            if model_decrease > 1e-12 && delta >= self.m_l * model_decrease {
883                xhat = x_cand.clone();
884                fhat = f_cand;
885                bundle.clear();
886                bundle.push((g_cand, 0.0_f64.max(alpha_new)));
887            } else {
888                bundle.push((g_cand, 0.0_f64.max(alpha_new)));
889                if bundle.len() > self.max_bundle_size {
890                    bundle.remove(0);
891                }
892            }
893        }
894        (xhat, fhat, iters)
895    }
896}
897/// Projected gradient descent with box constraints lb ≤ x ≤ ub.
898#[derive(Debug, Clone)]
899pub struct ProjectedGradient {
900    /// Fixed learning rate.
901    pub learning_rate: f64,
902    /// Maximum number of iterations.
903    pub max_iter: usize,
904    /// Convergence tolerance.
905    pub tol: f64,
906    /// Lower bound per coordinate.
907    pub lb: Vec<f64>,
908    /// Upper bound per coordinate.
909    pub ub: Vec<f64>,
910}
911impl ProjectedGradient {
912    /// Create a new projected gradient optimizer.
913    pub fn new(lr: f64, max_iter: usize, tol: f64, lb: Vec<f64>, ub: Vec<f64>) -> Self {
914        Self {
915            learning_rate: lr,
916            max_iter,
917            tol,
918            lb,
919            ub,
920        }
921    }
922    /// Project `x` onto the box [lb, ub].
923    pub fn project(&self, x: &[f64]) -> Vec<f64> {
924        x.iter()
925            .enumerate()
926            .map(|(i, &xi)| xi.clamp(self.lb[i], self.ub[i]))
927            .collect()
928    }
929    /// Minimize `f` starting from `x0` with box-constraint projection.
930    ///
931    /// Returns `(x_star, f(x_star))`.
932    pub fn minimize<F: ConvexFunction>(&self, f: &F, x0: &[f64]) -> (Vec<f64>, f64) {
933        let mut x = self.project(x0);
934        for _ in 0..self.max_iter {
935            let grad = f.gradient(&x);
936            let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
937            if grad_norm < self.tol {
938                break;
939            }
940            let x_new: Vec<f64> = x
941                .iter()
942                .zip(&grad)
943                .map(|(xi, gi)| xi - self.learning_rate * gi)
944                .collect();
945            x = self.project(&x_new);
946        }
947        let fval = f.eval(&x);
948        (x, fval)
949    }
950}
951/// Quadratic objective: f(x) = 0.5 x^T Q x + c^T x + d.
952#[derive(Debug, Clone)]
953pub struct QuadraticFunction {
954    /// Positive semidefinite coefficient matrix Q (n × n).
955    pub coeffs: Vec<Vec<f64>>,
956    /// Linear coefficient vector c (length n).
957    pub linear: Vec<f64>,
958    /// Constant term d.
959    pub constant: f64,
960}
961impl QuadraticFunction {
962    /// Create a new quadratic function with matrix `Q`, linear part `c`, constant `d`.
963    pub fn new(q: Vec<Vec<f64>>, c: Vec<f64>, d: f64) -> Self {
964        Self {
965            coeffs: q,
966            linear: c,
967            constant: d,
968        }
969    }
970}
971/// Alternating Direction Method of Multipliers (consensus form).
972#[derive(Debug, Clone)]
973pub struct ADMM {
974    /// Penalty parameter ρ > 0.
975    pub rho: f64,
976    /// Maximum number of iterations.
977    pub max_iter: usize,
978    /// Convergence tolerance.
979    pub tol: f64,
980}
981impl ADMM {
982    /// Create an ADMM solver with penalty `rho`.
983    pub fn new(rho: f64) -> Self {
984        Self {
985            rho,
986            max_iter: 1000,
987            tol: 1e-6,
988        }
989    }
990    /// Solve the LASSO problem: minimize 0.5‖Ax − b‖² + λ‖x‖₁.
991    ///
992    /// This is a stub that returns the zero vector (placeholder).
993    pub fn solve_lasso(&self, a: &[Vec<f64>], b: &[f64], lambda: f64) -> Vec<f64> {
994        let _ = (a, b, lambda, self.rho, self.max_iter, self.tol);
995        let n = a.first().map(|row| row.len()).unwrap_or(0);
996        vec![0.0; n]
997    }
998}
999/// SDP relaxation of a quadratic program.
1000///
1001/// Lifts QP: min x^T Q x + c^T x, s.t. Ax ≤ b, x ∈ {0,1}^n
1002/// to an SDP over the matrix variable X = x x^T, X ⪰ 0, diag(X) = x.
1003/// This is a structural stub; `solve` returns a placeholder bound.
1004#[derive(Debug, Clone)]
1005pub struct SDPRelaxation {
1006    /// Objective matrix Q (n × n, PSD).
1007    pub q: Vec<Vec<f64>>,
1008    /// Linear objective c (length n).
1009    pub c: Vec<f64>,
1010    /// Constraint matrix A (m × n).
1011    pub a_mat: Vec<Vec<f64>>,
1012    /// RHS vector b (length m).
1013    pub b_vec: Vec<f64>,
1014}
1015impl SDPRelaxation {
1016    /// Create an SDP relaxation instance.
1017    pub fn new(q: Vec<Vec<f64>>, c: Vec<f64>, a_mat: Vec<Vec<f64>>, b_vec: Vec<f64>) -> Self {
1018        Self { q, c, a_mat, b_vec }
1019    }
1020    /// Return the dimension n.
1021    pub fn dim(&self) -> usize {
1022        self.c.len()
1023    }
1024    /// Compute the SDP lower bound via the trace relaxation:
1025    /// bound ≤ min_{X ⪰ 0} tr(Q X) + c^T x.
1026    ///
1027    /// This stub evaluates the objective at x = 0 (trivial feasible point).
1028    pub fn solve(&self) -> f64 {
1029        let _ = (&self.q, &self.c, &self.a_mat, &self.b_vec);
1030        0.0
1031    }
1032    /// Check whether a given matrix (flattened row-major) is PSD using
1033    /// Sylvester's criterion (all leading principal minors ≥ 0).
1034    pub fn is_psd(mat: &[Vec<f64>]) -> bool {
1035        let n = mat.len();
1036        for size in 1..=n {
1037            let mut sub: Vec<Vec<f64>> = (0..size).map(|i| mat[i][..size].to_vec()).collect();
1038            let mut det = 1.0_f64;
1039            for col in 0..size {
1040                let pivot_row = (col..size).find(|&r| sub[r][col].abs() > 1e-12);
1041                let pr = match pivot_row {
1042                    Some(r) => r,
1043                    None => return false,
1044                };
1045                if pr != col {
1046                    sub.swap(col, pr);
1047                    det = -det;
1048                }
1049                det *= sub[col][col];
1050                let pv = sub[col][col];
1051                for r in (col + 1)..size {
1052                    let factor = sub[r][col] / pv;
1053                    for c in col..size {
1054                        let val = sub[col][c];
1055                        sub[r][c] -= factor * val;
1056                    }
1057                }
1058            }
1059            if det < -1e-9 {
1060                return false;
1061            }
1062        }
1063        true
1064    }
1065}