Skip to main content

scirs2_optimize/decomposition/
benders.rs

1//! Decomposition methods for large-scale optimization
2//!
3//! Implements:
4//! - [`BendersDecomposition`]: Master + subproblem decomposition
5//! - [`DantzigWolfe`]: Dantzig-Wolfe decomposition for structured LP
6//! - `ADMM`: Alternating Direction Method of Multipliers
7//! - [`ProximalBundle`]: Proximal bundle method for nonsmooth optimization
8
9use crate::error::{OptimizeError, OptimizeResult};
10
11// ---------------------------------------------------------------------------
12// Benders Decomposition
13// ---------------------------------------------------------------------------
14
15/// Options for Benders decomposition
16#[derive(Debug, Clone)]
17pub struct BendersOptions {
18    /// Maximum number of outer iterations (adding cuts)
19    pub max_iter: usize,
20    /// Convergence tolerance (upper - lower bound gap)
21    pub tol: f64,
22    /// Feasibility tolerance for subproblem
23    pub feas_tol: f64,
24    /// Maximum subproblem iterations
25    pub max_sub_iter: usize,
26    /// Subproblem convergence tolerance
27    pub sub_tol: f64,
28    /// Whether to use multi-cut variant (separate cut per constraint group)
29    pub multi_cut: bool,
30}
31
32impl Default for BendersOptions {
33    fn default() -> Self {
34        BendersOptions {
35            max_iter: 100,
36            tol: 1e-7,
37            feas_tol: 1e-7,
38            max_sub_iter: 500,
39            sub_tol: 1e-9,
40            multi_cut: false,
41        }
42    }
43}
44
45/// Result of Benders decomposition
46#[derive(Debug, Clone)]
47pub struct BendersResult {
48    /// Optimal first-stage (master) variables
49    pub x: Vec<f64>,
50    /// Optimal second-stage (subproblem) variables
51    pub y: Vec<f64>,
52    /// Objective value
53    pub fun: f64,
54    /// Lower bound at termination
55    pub lower_bound: f64,
56    /// Upper bound at termination
57    pub upper_bound: f64,
58    /// Number of Benders cuts added
59    pub n_cuts: usize,
60    /// Number of iterations
61    pub nit: usize,
62    /// Whether the algorithm converged
63    pub success: bool,
64    /// Termination message
65    pub message: String,
66}
67
68/// Benders decomposition solver.
69///
70/// Decomposes the problem:
71///
72/// ```text
73/// min_{x,y}  c^T x + d^T y
74/// s.t.        A x + B y >= b    (coupling constraints)
75///             x in X,  y in Y
76/// ```
77///
78/// into a master problem (in x) and subproblems (in y for fixed x).
79/// Benders cuts are added to the master from subproblem dual solutions.
80///
81/// This implementation uses a projected gradient method for both the master
82/// and subproblem, as a general-purpose (non-LP-specific) variant.
83pub struct BendersDecomposition {
84    /// Algorithm options
85    pub options: BendersOptions,
86}
87
88impl Default for BendersDecomposition {
89    fn default() -> Self {
90        BendersDecomposition {
91            options: BendersOptions::default(),
92        }
93    }
94}
95
96impl BendersDecomposition {
97    /// Create a Benders solver with custom options
98    pub fn new(options: BendersOptions) -> Self {
99        BendersDecomposition { options }
100    }
101
102    /// Solve a separable optimization problem via Benders decomposition.
103    ///
104    /// # Arguments
105    ///
106    /// * `master_obj` - Master problem objective (depends on x only)
107    /// * `sub_obj` - Subproblem objective (depends on x and y)
108    /// * `sub_constraints` - Subproblem constraints for fixed x: returns (violation, dual)
109    ///   Each entry: `Fn(&[f64], &[f64]) -> (f64, f64)` (feasibility, dual multiplier)
110    /// * `x0` - Initial first-stage variables
111    /// * `y0` - Initial second-stage variables
112    pub fn solve<FM, FS>(
113        &self,
114        master_obj: FM,
115        sub_obj: FS,
116        x0: &[f64],
117        y0: &[f64],
118    ) -> OptimizeResult<BendersResult>
119    where
120        FM: Fn(&[f64]) -> f64,
121        FS: Fn(&[f64], &[f64]) -> f64,
122    {
123        let nx = x0.len();
124        let ny = y0.len();
125
126        if nx == 0 {
127            return Err(OptimizeError::InvalidInput(
128                "First-stage (master) variables must be non-empty".to_string(),
129            ));
130        }
131
132        let h = 1e-7f64;
133        let mut x = x0.to_vec();
134        let mut y = y0.to_vec();
135        let mut lower_bound = f64::NEG_INFINITY;
136        let mut upper_bound = f64::INFINITY;
137        let mut n_cuts = 0usize;
138        let mut nit = 0usize;
139
140        // Benders cuts: each cut is (g, beta) where the cut is g^T * (x - x_k) + beta <= eta
141        // i.e., the recourse function Q(x) >= g^T * x + (beta - g^T * x_k)
142        let mut cuts: Vec<(Vec<f64>, f64)> = Vec::new();
143
144        for outer in 0..self.options.max_iter {
145            nit = outer + 1;
146
147            // Step 1: Solve subproblem for fixed x (minimize over y)
148            let mut y_opt = y.clone();
149            let mut sub_f = sub_obj(&x, &y_opt);
150
151            for _sub in 0..self.options.max_sub_iter {
152                let mut grad_y = vec![0.0f64; ny];
153                for i in 0..ny {
154                    let mut yf = y_opt.clone();
155                    yf[i] += h;
156                    grad_y[i] = (sub_obj(&x, &yf) - sub_f) / h;
157                }
158                let gnorm: f64 = grad_y.iter().map(|g| g * g).sum::<f64>().sqrt();
159                if gnorm < self.options.sub_tol {
160                    break;
161                }
162                let step = 0.1f64 / (1.0 + gnorm);
163                let mut y_new = vec![0.0f64; ny];
164                for i in 0..ny {
165                    y_new[i] = y_opt[i] - step * grad_y[i];
166                }
167                let f_new = sub_obj(&x, &y_new);
168                if f_new < sub_f {
169                    y_opt = y_new;
170                    sub_f = f_new;
171                } else {
172                    break;
173                }
174            }
175
176            // Compute Benders cut: subgradient of Q(x) w.r.t. x
177            // Q(x) ≈ Q(x_k) + g^T (x - x_k) where g = ∂_x sub_obj(x, y*(x))
178            let mut cut_grad = vec![0.0f64; nx];
179            for i in 0..nx {
180                let mut xf = x.clone();
181                xf[i] += h;
182                cut_grad[i] = (sub_obj(&xf, &y_opt) - sub_obj(&x, &y_opt)) / h;
183            }
184            let cut_offset = sub_f;
185            cuts.push((cut_grad, cut_offset));
186            n_cuts += 1;
187
188            // Compute upper bound: master + subproblem
189            let master_f = master_obj(&x);
190            let current_ub = master_f + sub_f;
191            if current_ub < upper_bound {
192                upper_bound = current_ub;
193                y = y_opt.clone();
194            }
195
196            // Step 2: Solve master problem with current cuts
197            // Master: min master_obj(x) + eta  s.t. eta >= cut_k^T x + const_k for all k
198            // We represent this as: min master_obj(x) + max_k(cut_k^T x + const_k - cut_k^T x_k)
199
200            let master_with_cuts = |x: &[f64]| -> f64 {
201                let base = master_obj(x);
202                let recourse = cuts
203                    .iter()
204                    .map(|(g, offset)| {
205                        let inner: f64 = g.iter().zip(x.iter()).map(|(gi, xi)| gi * xi).sum();
206                        inner - g.iter().zip(x0.iter()).map(|(gi, xi)| gi * xi).sum::<f64>()
207                            + offset
208                    })
209                    .fold(f64::NEG_INFINITY, f64::max);
210                let recourse = if recourse.is_finite() { recourse } else { 0.0 };
211                base + recourse
212            };
213
214            // Gradient descent on master with cuts
215            let mut x_new = x.clone();
216            let mut master_val = master_with_cuts(&x_new);
217
218            for _master_iter in 0..200 {
219                let mut grad = vec![0.0f64; nx];
220                for i in 0..nx {
221                    let mut xf = x_new.clone();
222                    xf[i] += h;
223                    grad[i] = (master_with_cuts(&xf) - master_val) / h;
224                }
225                let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
226                if gnorm < self.options.tol {
227                    break;
228                }
229                let step = 0.1f64 / (1.0 + gnorm);
230                let mut xt = vec![0.0f64; nx];
231                for i in 0..nx {
232                    xt[i] = x_new[i] - step * grad[i];
233                }
234                let ft = master_with_cuts(&xt);
235                if ft < master_val {
236                    x_new = xt;
237                    master_val = ft;
238                } else {
239                    break;
240                }
241            }
242
243            lower_bound = master_val;
244            x = x_new;
245
246            // Convergence check
247            let gap = upper_bound - lower_bound;
248            if gap.abs() < self.options.tol * (1.0 + upper_bound.abs()) {
249                break;
250            }
251        }
252
253        Ok(BendersResult {
254            x,
255            y,
256            fun: upper_bound,
257            lower_bound,
258            upper_bound,
259            n_cuts,
260            nit,
261            success: (upper_bound - lower_bound).abs()
262                < self.options.tol * (1.0 + upper_bound.abs()),
263            message: "Benders decomposition completed".to_string(),
264        })
265    }
266}
267
268// ---------------------------------------------------------------------------
269// Dantzig-Wolfe Decomposition
270// ---------------------------------------------------------------------------
271
272/// Options for Dantzig-Wolfe decomposition
273#[derive(Debug, Clone)]
274pub struct DantzigWolfeOptions {
275    /// Maximum number of column generation iterations
276    pub max_iter: usize,
277    /// Optimality tolerance (reduced cost threshold)
278    pub opt_tol: f64,
279    /// Maximum subproblem iterations
280    pub max_sub_iter: usize,
281    /// Subproblem convergence tolerance
282    pub sub_tol: f64,
283    /// Step size for restricted master problem
284    pub master_step: f64,
285}
286
287impl Default for DantzigWolfeOptions {
288    fn default() -> Self {
289        DantzigWolfeOptions {
290            max_iter: 200,
291            opt_tol: 1e-7,
292            max_sub_iter: 200,
293            sub_tol: 1e-9,
294            master_step: 0.1,
295        }
296    }
297}
298
299/// Result of Dantzig-Wolfe decomposition
300#[derive(Debug, Clone)]
301pub struct DantzigWolfeResult {
302    /// Optimal solution (primal)
303    pub x: Vec<f64>,
304    /// Optimal objective value
305    pub fun: f64,
306    /// Number of columns (proposals) generated
307    pub n_columns: usize,
308    /// Number of iterations
309    pub nit: usize,
310    /// Dual variables (prices) at optimum
311    pub duals: Vec<f64>,
312    /// Whether the algorithm converged
313    pub success: bool,
314    /// Termination message
315    pub message: String,
316}
317
318/// Dantzig-Wolfe decomposition for structured optimization problems.
319///
320/// Decomposes problems with a "linking" constraint structure:
321///
322/// ```text
323/// min  c^T x
324/// s.t. A0 x = b0    (linking constraints)
325///      A_i x_i = b_i for each block i  (block constraints)
326///      x_i in X_i
327/// ```
328///
329/// The restricted master problem uses convex combinations of extreme points
330/// of each X_i, with column generation to add improving columns.
331///
332/// This implementation handles smooth (not necessarily LP) problems.
333pub struct DantzigWolfe {
334    /// Algorithm options
335    pub options: DantzigWolfeOptions,
336}
337
338impl Default for DantzigWolfe {
339    fn default() -> Self {
340        DantzigWolfe {
341            options: DantzigWolfeOptions::default(),
342        }
343    }
344}
345
346impl DantzigWolfe {
347    /// Create a Dantzig-Wolfe solver
348    pub fn new(options: DantzigWolfeOptions) -> Self {
349        DantzigWolfe { options }
350    }
351
352    /// Solve the master problem and generate columns.
353    ///
354    /// # Arguments
355    ///
356    /// * `master_obj` - Master objective f(x): smooth, depends on all x
357    /// * `pricing_obj` - Pricing problem: for duals π, minimize c_bar^T x_sub
358    ///   Given duals `pi`, return (optimal x_sub, min reduced cost)
359    /// * `x0` - Initial point for the master
360    /// * `n_blocks` - Number of subproblem blocks
361    pub fn solve<FM, FP>(
362        &self,
363        master_obj: FM,
364        pricing_oracle: FP,
365        x0: &[f64],
366        _n_blocks: usize,
367    ) -> OptimizeResult<DantzigWolfeResult>
368    where
369        FM: Fn(&[f64]) -> f64,
370        FP: Fn(&[f64]) -> (Vec<f64>, f64), // pricing oracle: duals -> (proposal, reduced_cost)
371    {
372        let n = x0.len();
373        let h = 1e-7f64;
374
375        if n == 0 {
376            return Err(OptimizeError::InvalidInput(
377                "Initial point must be non-empty".to_string(),
378            ));
379        }
380
381        let mut x = x0.to_vec();
382        let mut columns: Vec<Vec<f64>> = vec![x.clone()]; // pool of generated columns
383        let mut weights: Vec<f64> = vec![1.0]; // convex combination weights
384        let mut duals = vec![0.0f64; n];
385        let mut n_iter = 0usize;
386        let mut n_columns = 1usize;
387
388        for outer in 0..self.options.max_iter {
389            n_iter = outer + 1;
390
391            // Step 1: Compute restricted master solution (gradient descent)
392            let restricted_obj = |w: &[f64]| -> f64 {
393                let n_cols = columns.len();
394                let mut xc = vec![0.0f64; n];
395                for (j, col) in columns.iter().enumerate() {
396                    let wj = if j < w.len() { w[j] } else { 0.0 };
397                    for i in 0..n {
398                        xc[i] += wj * col[i];
399                    }
400                }
401                master_obj(&xc)
402            };
403
404            let nc = columns.len();
405            let mut f_w = restricted_obj(&weights[..nc.min(weights.len())]);
406
407            for _inner in 0..200 {
408                let mut grad_w = vec![0.0f64; nc];
409                let w_cur = &weights[..nc.min(weights.len())];
410                let w_padded: Vec<f64> = {
411                    let mut wp = w_cur.to_vec();
412                    while wp.len() < nc {
413                        wp.push(0.0);
414                    }
415                    wp
416                };
417                for j in 0..nc {
418                    let mut wf = w_padded.clone();
419                    let delta = h;
420                    wf[j] += delta;
421                    // Renormalize
422                    let sum: f64 = wf.iter().sum();
423                    let wf_norm: Vec<f64> = wf.iter().map(|wj| wj / sum).collect();
424                    grad_w[j] = (restricted_obj(&wf_norm) - f_w) / delta;
425                }
426                let gnorm: f64 = grad_w.iter().map(|g| g * g).sum::<f64>().sqrt();
427                if gnorm < self.options.sub_tol {
428                    break;
429                }
430                let step = self.options.master_step / (1.0 + gnorm);
431                let w_cur_len = w_padded.len();
432                let mut w_new: Vec<f64> = (0..w_cur_len)
433                    .map(|j| (w_padded[j] - step * grad_w[j]).max(0.0))
434                    .collect();
435                // Project onto simplex
436                let sum: f64 = w_new.iter().sum();
437                if sum > 1e-14 {
438                    for wj in w_new.iter_mut() {
439                        *wj /= sum;
440                    }
441                }
442                let f_new = restricted_obj(&w_new);
443                if f_new < f_w {
444                    weights = w_new;
445                    f_w = f_new;
446                } else {
447                    break;
448                }
449            }
450
451            // Reconstruct x from weights and columns
452            let nc = columns.len();
453            x = vec![0.0f64; n];
454            for (j, col) in columns.iter().enumerate() {
455                let wj = if j < weights.len() { weights[j] } else { 0.0 };
456                for i in 0..n {
457                    x[i] += wj * col[i];
458                }
459            }
460
461            // Step 2: Compute dual variables (gradient of master w.r.t. x at current iterate)
462            let f0 = master_obj(&x);
463            for i in 0..n {
464                let mut xf = x.clone();
465                xf[i] += h;
466                duals[i] = (master_obj(&xf) - f0) / h;
467            }
468
469            // Step 3: Pricing problem — find column with most negative reduced cost
470            let (proposal, reduced_cost) = pricing_oracle(&duals);
471
472            if reduced_cost >= -self.options.opt_tol {
473                // No improving column: optimal
474                break;
475            }
476
477            // Add new column
478            columns.push(proposal);
479            weights.push(0.01); // small initial weight
480                                // Renormalize
481            let sum: f64 = weights.iter().sum();
482            for wj in weights.iter_mut() {
483                *wj /= sum;
484            }
485            n_columns += 1;
486        }
487
488        let fun = master_obj(&x);
489
490        Ok(DantzigWolfeResult {
491            x,
492            fun,
493            n_columns,
494            nit: n_iter,
495            duals,
496            success: n_columns < self.options.max_iter,
497            message: "Dantzig-Wolfe decomposition completed".to_string(),
498        })
499    }
500}
501
502// ---------------------------------------------------------------------------
503// ADMM: Alternating Direction Method of Multipliers
504// ---------------------------------------------------------------------------
505
506/// Options for ADMM
507#[derive(Debug, Clone)]
508pub struct AdmmOptions {
509    /// Penalty parameter ρ (augmented Lagrangian)
510    pub rho: f64,
511    /// Maximum iterations
512    pub max_iter: usize,
513    /// Primal feasibility tolerance (||Ax + Bz - c||)
514    pub eps_primal: f64,
515    /// Dual feasibility tolerance (||ρ B^T (z^{k+1} - z^k)||)
516    pub eps_dual: f64,
517    /// Adaptive ρ: increase by this factor when primal/dual residual ratio > threshold
518    pub rho_adapt: bool,
519    /// Adaptive ρ factor
520    pub rho_factor: f64,
521    /// Adaptive ρ threshold
522    pub rho_threshold: f64,
523    /// Inner solver iterations for x and z updates
524    pub inner_iter: usize,
525    /// Inner solver tolerance
526    pub inner_tol: f64,
527}
528
529impl Default for AdmmOptions {
530    fn default() -> Self {
531        AdmmOptions {
532            rho: 1.0,
533            max_iter: 1000,
534            eps_primal: 1e-6,
535            eps_dual: 1e-6,
536            rho_adapt: true,
537            rho_factor: 2.0,
538            rho_threshold: 10.0,
539            inner_iter: 50,
540            inner_tol: 1e-8,
541        }
542    }
543}
544
545/// Result of ADMM
546#[derive(Debug, Clone)]
547pub struct AdmmResult {
548    /// Primal solution x
549    pub x: Vec<f64>,
550    /// Auxiliary variable z
551    pub z: Vec<f64>,
552    /// Dual variable u (scaled)
553    pub u: Vec<f64>,
554    /// Objective value f(x) + g(z)
555    pub fun: f64,
556    /// Primal residual at termination
557    pub primal_residual: f64,
558    /// Dual residual at termination
559    pub dual_residual: f64,
560    /// Number of iterations
561    pub nit: usize,
562    /// Whether the algorithm converged
563    pub success: bool,
564    /// Termination message
565    pub message: String,
566}
567
568/// ADMM: Alternating Direction Method of Multipliers.
569///
570/// Solves the consensus problem:
571///
572/// ```text
573/// min  f(x) + g(z)
574/// s.t. x - z = 0
575/// ```
576///
577/// via the augmented Lagrangian:
578///
579/// ```text
580/// L_ρ(x, z, u) = f(x) + g(z) + (ρ/2) ||x - z + u||²
581/// ```
582///
583/// Iterations:
584/// - x-update: x^{k+1} = argmin_x f(x) + (ρ/2)||x - z^k + u^k||²
585/// - z-update: z^{k+1} = prox_{g/ρ}(x^{k+1} + u^k)
586/// - u-update: u^{k+1} = u^k + x^{k+1} - z^{k+1}
587pub struct Admm {
588    /// Algorithm options
589    pub options: AdmmOptions,
590}
591
592impl Default for Admm {
593    fn default() -> Self {
594        Admm {
595            options: AdmmOptions::default(),
596        }
597    }
598}
599
600impl Admm {
601    /// Create an ADMM solver with options
602    pub fn new(options: AdmmOptions) -> Self {
603        Admm { options }
604    }
605
606    /// Run ADMM for the consensus problem min f(x) + g(z) s.t. x = z.
607    ///
608    /// # Arguments
609    ///
610    /// * `f_obj` - Smooth objective f(x)
611    /// * `prox_g` - Proximal operator for g: prox_{g/ρ}(v) = argmin_z g(z) + (ρ/2)||z-v||²
612    /// * `x0` - Initial x
613    pub fn solve<F, PG>(&self, f_obj: F, prox_g: PG, x0: &[f64]) -> OptimizeResult<AdmmResult>
614    where
615        F: Fn(&[f64]) -> f64,
616        PG: Fn(&[f64], f64) -> Vec<f64>, // prox_g(v, rho) -> z
617    {
618        let n = x0.len();
619        if n == 0 {
620            return Err(OptimizeError::InvalidInput(
621                "Initial point must be non-empty".to_string(),
622            ));
623        }
624
625        let h = 1e-7f64;
626        let mut x = x0.to_vec();
627        let mut z = x.clone();
628        let mut u = vec![0.0f64; n];
629        let mut rho = self.options.rho;
630        let mut nit = 0usize;
631
632        for iter in 0..self.options.max_iter {
633            nit = iter + 1;
634
635            // x-update: min f(x) + (ρ/2)||x - (z - u)||²
636            let target_x: Vec<f64> = z.iter().zip(u.iter()).map(|(zi, ui)| zi - ui).collect();
637            let x_aug = |x: &[f64]| -> f64 {
638                let f = f_obj(x);
639                let pen: f64 = x
640                    .iter()
641                    .zip(target_x.iter())
642                    .map(|(xi, ti)| (xi - ti).powi(2))
643                    .sum();
644                f + 0.5 * rho * pen
645            };
646
647            let mut f_x = x_aug(&x);
648            for _xi in 0..self.options.inner_iter {
649                let mut grad = vec![0.0f64; n];
650                for i in 0..n {
651                    let mut xf = x.clone();
652                    xf[i] += h;
653                    grad[i] = (x_aug(&xf) - f_x) / h;
654                }
655                let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
656                if gnorm < self.options.inner_tol {
657                    break;
658                }
659                let step = 1.0 / (rho + gnorm);
660                let mut x_new: Vec<f64> = x
661                    .iter()
662                    .zip(grad.iter())
663                    .map(|(xi, gi)| xi - step * gi)
664                    .collect();
665                let f_new = x_aug(&x_new);
666                if f_new < f_x {
667                    x = x_new;
668                    f_x = f_new;
669                } else {
670                    // Backtrack
671                    let mut s = step * 0.5;
672                    for _ in 0..20 {
673                        x_new = x
674                            .iter()
675                            .zip(grad.iter())
676                            .map(|(xi, gi)| xi - s * gi)
677                            .collect();
678                        let fn2 = x_aug(&x_new);
679                        if fn2 < f_x {
680                            x = x_new;
681                            f_x = fn2;
682                            break;
683                        }
684                        s *= 0.5;
685                    }
686                    break;
687                }
688            }
689
690            // z-update: prox_{g/ρ}(x + u)
691            let z_prev = z.clone();
692            let v: Vec<f64> = x.iter().zip(u.iter()).map(|(xi, ui)| xi + ui).collect();
693            z = prox_g(&v, rho);
694
695            // u-update: u += x - z
696            for i in 0..n {
697                u[i] += x[i] - z[i];
698            }
699
700            // Compute residuals
701            let primal_res: f64 = x
702                .iter()
703                .zip(z.iter())
704                .map(|(xi, zi)| (xi - zi).powi(2))
705                .sum::<f64>()
706                .sqrt();
707            let dual_res: f64 = z
708                .iter()
709                .zip(z_prev.iter())
710                .map(|(zi, zpi)| (rho * (zi - zpi)).powi(2))
711                .sum::<f64>()
712                .sqrt();
713
714            // Adaptive ρ
715            if self.options.rho_adapt {
716                if primal_res > self.options.rho_threshold * dual_res {
717                    rho *= self.options.rho_factor;
718                    for ui in u.iter_mut() {
719                        *ui /= self.options.rho_factor;
720                    }
721                } else if dual_res > self.options.rho_threshold * primal_res {
722                    rho /= self.options.rho_factor;
723                    for ui in u.iter_mut() {
724                        *ui *= self.options.rho_factor;
725                    }
726                }
727            }
728
729            if primal_res < self.options.eps_primal && dual_res < self.options.eps_dual {
730                let f_val = f_obj(&x);
731                return Ok(AdmmResult {
732                    x,
733                    z,
734                    u,
735                    fun: f_val,
736                    primal_residual: primal_res,
737                    dual_residual: dual_res,
738                    nit,
739                    success: true,
740                    message: "ADMM converged".to_string(),
741                });
742            }
743        }
744
745        let primal_res: f64 = x
746            .iter()
747            .zip(z.iter())
748            .map(|(xi, zi)| (xi - zi).powi(2))
749            .sum::<f64>()
750            .sqrt();
751        let dual_res = 0.0f64; // approximation at max iter
752        let f_val = f_obj(&x);
753
754        Ok(AdmmResult {
755            x,
756            z,
757            u,
758            fun: f_val,
759            primal_residual: primal_res,
760            dual_residual: dual_res,
761            nit,
762            success: primal_res < self.options.eps_primal * 100.0,
763            message: "ADMM: maximum iterations reached".to_string(),
764        })
765    }
766}
767
768// ---------------------------------------------------------------------------
769// ProximalBundle: Proximal Bundle Method for Nonsmooth Optimization
770// ---------------------------------------------------------------------------
771
772/// Options for proximal bundle method
773#[derive(Debug, Clone)]
774pub struct ProximalBundleOptions {
775    /// Proximal parameter μ (regularization)
776    pub mu: f64,
777    /// Maximum number of bundle iterations
778    pub max_iter: usize,
779    /// Null-step tolerance (serious step condition)
780    pub m_l: f64,
781    /// Maximum bundle size
782    pub max_bundle_size: usize,
783    /// Convergence tolerance
784    pub tol: f64,
785    /// Minimum proximal parameter
786    pub min_mu: f64,
787    /// Maximum proximal parameter
788    pub max_mu: f64,
789}
790
791impl Default for ProximalBundleOptions {
792    fn default() -> Self {
793        ProximalBundleOptions {
794            mu: 1.0,
795            max_iter: 300,
796            m_l: 0.1,
797            max_bundle_size: 50,
798            tol: 1e-6,
799            min_mu: 1e-8,
800            max_mu: 1e8,
801        }
802    }
803}
804
805/// A bundle element: (subgradient, function value, center)
806#[derive(Debug, Clone)]
807struct BundleElement {
808    /// Subgradient g_i at bundle point
809    g: Vec<f64>,
810    /// Function value f_i at bundle point
811    f: f64,
812    /// Bundle point
813    x: Vec<f64>,
814}
815
816/// Result of proximal bundle method
817#[derive(Debug, Clone)]
818pub struct ProximalBundleResult {
819    /// Optimal solution
820    pub x: Vec<f64>,
821    /// Objective value at optimum
822    pub fun: f64,
823    /// Subgradient norm at optimum
824    pub subgrad_norm: f64,
825    /// Number of serious steps (actual descent)
826    pub n_serious_steps: usize,
827    /// Number of null steps
828    pub n_null_steps: usize,
829    /// Total iterations
830    pub nit: usize,
831    /// Whether the algorithm converged
832    pub success: bool,
833    /// Termination message
834    pub message: String,
835}
836
837/// Proximal bundle method for nonsmooth convex optimization.
838///
839/// Solves min f(x) where f is convex but possibly nonsmooth.
840/// Uses a bundle of subgradients to build a cutting-plane model:
841///
842/// ```text
843/// f̂(x; y) = max_{i in B} { f(x_i) + g_i^T (x - x_i) }
844/// ```
845///
846/// The proximal subproblem:
847/// ```text
848/// x^{k+1} = argmin_x { f̂(x; y^k) + (μ/2) ||x - y^k||² }
849/// ```
850///
851/// generates either a serious step (sufficient decrease) or null step.
852pub struct ProximalBundle {
853    /// Algorithm options
854    pub options: ProximalBundleOptions,
855}
856
857impl Default for ProximalBundle {
858    fn default() -> Self {
859        ProximalBundle {
860            options: ProximalBundleOptions::default(),
861        }
862    }
863}
864
865impl ProximalBundle {
866    /// Create a proximal bundle solver
867    pub fn new(options: ProximalBundleOptions) -> Self {
868        ProximalBundle { options }
869    }
870
871    /// Solve min f(x) using the proximal bundle method.
872    ///
873    /// # Arguments
874    ///
875    /// * `func` - Objective function (can be nonsmooth)
876    /// * `subgrad` - Subgradient of f at x: returns (f(x), g ∈ ∂f(x))
877    /// * `x0` - Initial point
878    pub fn minimize<FS>(&self, func: FS, x0: &[f64]) -> OptimizeResult<ProximalBundleResult>
879    where
880        FS: Fn(&[f64]) -> (f64, Vec<f64>), // returns (value, subgradient)
881    {
882        let n = x0.len();
883        if n == 0 {
884            return Err(OptimizeError::InvalidInput(
885                "Initial point must be non-empty".to_string(),
886            ));
887        }
888
889        let mut y = x0.to_vec(); // proximal center (serious step point)
890        let (mut f_y, mut g_y) = func(&y);
891        let mut mu = self.options.mu;
892        let mut bundle: Vec<BundleElement> = vec![BundleElement {
893            g: g_y.clone(),
894            f: f_y,
895            x: y.clone(),
896        }];
897
898        let mut n_serious = 0usize;
899        let mut n_null = 0usize;
900        let mut nit = 0usize;
901
902        for iter in 0..self.options.max_iter {
903            nit = iter + 1;
904
905            // Check convergence: subgradient norm at center
906            let gnorm: f64 = g_y.iter().map(|g| g * g).sum::<f64>().sqrt();
907            if gnorm < self.options.tol {
908                return Ok(ProximalBundleResult {
909                    x: y,
910                    fun: f_y,
911                    subgrad_norm: gnorm,
912                    n_serious_steps: n_serious,
913                    n_null_steps: n_null,
914                    nit,
915                    success: true,
916                    message: "Proximal bundle converged (subgradient norm)".to_string(),
917                });
918            }
919
920            // Solve proximal subproblem: min f̂(x; y) + (μ/2)||x - y||²
921            // The cutting-plane model: f̂(x) = max_i {f_i + g_i^T (x - x_i)}
922            // The proximal subproblem reduces to a QP. We use gradient descent on the dual.
923            //
924            // Dual formulation: max_{λ>=0, Σλ=1} { Σλ_i f_i - (1/2μ) ||Σλ_i g_i||² - (Σλ_i g_i)^T y + Σλ_i g_i^T x_i }
925            // This is a QP in λ.
926            let nb = bundle.len();
927            let mut lambda = vec![1.0f64 / nb as f64; nb]; // uniform start
928
929            // Gradient of dual w.r.t. λ
930            let dual_obj = |lam: &[f64]| -> f64 {
931                let mut agg_g = vec![0.0f64; n];
932                let mut sum_fg = 0.0f64;
933                for (i, be) in bundle.iter().enumerate() {
934                    let li = if i < lam.len() { lam[i] } else { 0.0 };
935                    for j in 0..n {
936                        agg_g[j] += li * be.g[j];
937                    }
938                    sum_fg += li
939                        * (be.f
940                            - be.g
941                                .iter()
942                                .zip(be.x.iter())
943                                .map(|(gj, xj)| gj * xj)
944                                .sum::<f64>());
945                }
946                let agg_norm_sq: f64 = agg_g.iter().map(|g| g * g).sum();
947                // Dual objective (to maximize, so negate for minimization):
948                -(sum_fg
949                    - agg_g
950                        .iter()
951                        .zip(y.iter())
952                        .map(|(gj, yj)| gj * yj)
953                        .sum::<f64>()
954                    - 0.5 / mu * agg_norm_sq)
955            };
956
957            let h = 1e-7f64;
958            let mut f_lam = dual_obj(&lambda);
959            for _di in 0..200 {
960                let mut grad_lam = vec![0.0f64; nb];
961                for i in 0..nb {
962                    let mut lf = lambda.clone();
963                    lf[i] += h;
964                    // Re-project onto simplex
965                    let sum: f64 = lf.iter().sum();
966                    let lf: Vec<f64> = lf.iter().map(|li| (li / sum).max(0.0)).collect();
967                    grad_lam[i] = (dual_obj(&lf) - f_lam) / h;
968                }
969                let gnorm_lam: f64 = grad_lam.iter().map(|g| g * g).sum::<f64>().sqrt();
970                if gnorm_lam < 1e-10 {
971                    break;
972                }
973                let step_lam = 0.1 / (1.0 + gnorm_lam);
974                let mut l_new: Vec<f64> = lambda
975                    .iter()
976                    .zip(grad_lam.iter())
977                    .map(|(li, gi)| (li - step_lam * gi).max(0.0))
978                    .collect();
979                // Project onto simplex
980                let sum: f64 = l_new.iter().sum();
981                if sum > 1e-14 {
982                    for li in l_new.iter_mut() {
983                        *li /= sum;
984                    }
985                }
986                let f_new = dual_obj(&l_new);
987                if f_new < f_lam {
988                    lambda = l_new;
989                    f_lam = f_new;
990                } else {
991                    break;
992                }
993            }
994
995            // Recover primal step from dual: x* = y - (1/μ) Σ λ_i g_i
996            let mut agg_g = vec![0.0f64; n];
997            for (i, be) in bundle.iter().enumerate() {
998                let li = if i < lambda.len() { lambda[i] } else { 0.0 };
999                for j in 0..n {
1000                    agg_g[j] += li * be.g[j];
1001                }
1002            }
1003            let x_trial: Vec<f64> = y
1004                .iter()
1005                .zip(agg_g.iter())
1006                .map(|(yj, gj)| yj - gj / mu)
1007                .collect();
1008
1009            // Evaluate at trial point
1010            let (f_trial, g_trial) = func(&x_trial);
1011
1012            // Compute cutting-plane model value at x_trial (for acceptance test)
1013            let f_model: f64 = bundle
1014                .iter()
1015                .map(|be| {
1016                    let gx: f64 =
1017                        be.g.iter()
1018                            .zip(x_trial.iter())
1019                            .zip(be.x.iter())
1020                            .map(|((gi, xi), xi_k)| gi * (xi - xi_k))
1021                            .sum();
1022                    be.f + gx
1023                })
1024                .fold(f64::NEG_INFINITY, f64::max);
1025
1026            // Serious step condition: f(x_trial) <= f(y) - mL * (f̂(y) - f̂(x_trial))
1027            // Simplified: f_trial <= f_y - mL * (f_y - f_model)
1028            let descent = f_y - f_model;
1029            let is_serious = f_trial <= f_y - self.options.m_l * descent.max(0.0);
1030
1031            if is_serious {
1032                y = x_trial.clone();
1033                f_y = f_trial;
1034                g_y = g_trial.clone();
1035                n_serious += 1;
1036            } else {
1037                n_null += 1;
1038            }
1039
1040            // Add new bundle element
1041            bundle.push(BundleElement {
1042                g: g_trial,
1043                f: f_trial,
1044                x: x_trial,
1045            });
1046
1047            // Trim bundle if too large
1048            if bundle.len() > self.options.max_bundle_size {
1049                bundle.remove(0);
1050            }
1051
1052            // Update μ: increase for null steps, reset for serious steps
1053            if is_serious {
1054                mu = (mu * 0.5).max(self.options.min_mu);
1055            } else if n_null > 3 * (n_serious + 1) {
1056                mu = (mu * 2.0).min(self.options.max_mu);
1057            }
1058        }
1059
1060        let gnorm_final: f64 = g_y.iter().map(|g| g * g).sum::<f64>().sqrt();
1061
1062        Ok(ProximalBundleResult {
1063            x: y,
1064            fun: f_y,
1065            subgrad_norm: gnorm_final,
1066            n_serious_steps: n_serious,
1067            n_null_steps: n_null,
1068            nit,
1069            success: gnorm_final < self.options.tol * 100.0,
1070            message: "Proximal bundle: maximum iterations reached".to_string(),
1071        })
1072    }
1073}
1074
1075// ---------------------------------------------------------------------------
1076// Tests
1077// ---------------------------------------------------------------------------
1078
1079#[cfg(test)]
1080mod tests {
1081    use super::*;
1082
1083    #[test]
1084    fn test_benders_separable() {
1085        // Master: min x^2, Sub: min (x + y)^2 → optimal y = -x, combined = 0
1086        let result = BendersDecomposition::default()
1087            .solve(
1088                |x: &[f64]| x[0].powi(2),
1089                |x: &[f64], y: &[f64]| (x[0] + y[0]).powi(2),
1090                &[1.0],
1091                &[0.0],
1092            )
1093            .expect("unexpected None or Err");
1094        assert!(
1095            result.upper_bound < 2.0,
1096            "Upper bound {} should be < 2.0",
1097            result.upper_bound
1098        );
1099    }
1100
1101    #[test]
1102    fn test_dantzig_wolfe_basic() {
1103        // Simple quadratic: min (x-1)^2
1104        // Pricing: minimize 2*(x-1)*g[0] w.r.t. x in [0,2]
1105        let result = DantzigWolfe::default()
1106            .solve(
1107                |x: &[f64]| (x[0] - 1.0).powi(2),
1108                |pi: &[f64]| {
1109                    // Optimal proposal in [0, 2]: x* = 1 - pi[0]/2 (if inside bounds)
1110                    let x_star = (1.0 - pi[0] * 0.5).clamp(0.0, 2.0);
1111                    let rc = pi[0] * x_star - pi[0]; // reduced cost
1112                    (vec![x_star], rc)
1113                },
1114                &[0.5],
1115                1,
1116            )
1117            .expect("unexpected None or Err");
1118        assert!(result.fun < 0.5, "Expected fun < 0.5, got {}", result.fun);
1119    }
1120
1121    #[test]
1122    fn test_admm_consensus() {
1123        // min x^2 + ||z-1||^2 s.t. x = z → optimal x = z = 0.5
1124        // prox_{g/ρ}(v) = argmin ||z-1||^2 + ρ/2||z-v||^2 = (1 + ρ/2 * v) / (1 + ρ/2)
1125        let result = Admm::default()
1126            .solve(
1127                |x: &[f64]| x[0].powi(2),
1128                |v: &[f64], rho: f64| vec![(1.0 + rho * 0.5 * v[0]) / (1.0 + rho * 0.5)],
1129                &[1.0],
1130            )
1131            .expect("unexpected None or Err");
1132        assert!(
1133            (result.x[0] - 0.5).abs() < 0.1 || result.fun < 1.0,
1134            "fun = {}, x = {:?}",
1135            result.fun,
1136            result.x
1137        );
1138    }
1139
1140    #[test]
1141    fn test_admm_lasso_like() {
1142        // min (x-3)^2 + |z| s.t. x = z
1143        // prox_{|.|/ρ}(v) = soft-threshold(v, 1/ρ)
1144        let soft_thresh = |v: &[f64], rho: f64| -> Vec<f64> {
1145            let thresh = 1.0 / rho;
1146            v.iter()
1147                .map(|vi| vi.signum() * (vi.abs() - thresh).max(0.0))
1148                .collect()
1149        };
1150
1151        let result = Admm::new(AdmmOptions {
1152            rho: 1.0,
1153            max_iter: 500,
1154            eps_primal: 1e-5,
1155            eps_dual: 1e-5,
1156            ..Default::default()
1157        })
1158        .solve(|x: &[f64]| (x[0] - 3.0).powi(2), soft_thresh, &[0.0])
1159        .expect("unexpected None or Err");
1160        // Optimal x ≈ 2.5 (balance between pulling toward 3 and L1 penalty)
1161        assert!(result.fun < 5.0, "fun = {}", result.fun);
1162    }
1163
1164    #[test]
1165    fn test_proximal_bundle_smooth() {
1166        // Test on smooth convex function (f = x^2, g = 2x)
1167        let result = ProximalBundle::default()
1168            .minimize(|x: &[f64]| (x[0].powi(2), vec![2.0 * x[0]]), &[2.0])
1169            .expect("unexpected None or Err");
1170        assert!(result.fun < 0.5, "Expected fun < 0.5, got {}", result.fun);
1171    }
1172
1173    #[test]
1174    fn test_proximal_bundle_max_function() {
1175        // max(x, -x) = |x|, subgradient: sign(x) or 0 if x=0
1176        let result = ProximalBundle::new(ProximalBundleOptions {
1177            tol: 1e-5,
1178            max_iter: 200,
1179            ..Default::default()
1180        })
1181        .minimize(
1182            |x: &[f64]| {
1183                let v = x[0].abs();
1184                let g = if x[0] > 0.0 {
1185                    1.0
1186                } else if x[0] < 0.0 {
1187                    -1.0
1188                } else {
1189                    0.0
1190                };
1191                (v, vec![g])
1192            },
1193            &[2.0],
1194        )
1195        .expect("unexpected None or Err");
1196        assert!(
1197            result.fun < 0.5,
1198            "Expected |x| < 0.5 at optimum, got {}",
1199            result.fun
1200        );
1201    }
1202}