scirs2_optimize/decomposition/benders.rs
1//! Decomposition methods for large-scale optimization
2//!
3//! Implements:
4//! - [`BendersDecomposition`]: Master + subproblem decomposition
5//! - [`DantzigWolfe`]: Dantzig-Wolfe decomposition for structured LP
6//! - `ADMM`: Alternating Direction Method of Multipliers
7//! - [`ProximalBundle`]: Proximal bundle method for nonsmooth optimization
8
9use crate::error::{OptimizeError, OptimizeResult};
10
11// ---------------------------------------------------------------------------
12// Benders Decomposition
13// ---------------------------------------------------------------------------
14
15/// Options for Benders decomposition
16#[derive(Debug, Clone)]
17pub struct BendersOptions {
18 /// Maximum number of outer iterations (adding cuts)
19 pub max_iter: usize,
20 /// Convergence tolerance (upper - lower bound gap)
21 pub tol: f64,
22 /// Feasibility tolerance for subproblem
23 pub feas_tol: f64,
24 /// Maximum subproblem iterations
25 pub max_sub_iter: usize,
26 /// Subproblem convergence tolerance
27 pub sub_tol: f64,
28 /// Whether to use multi-cut variant (separate cut per constraint group)
29 pub multi_cut: bool,
30}
31
32impl Default for BendersOptions {
33 fn default() -> Self {
34 BendersOptions {
35 max_iter: 100,
36 tol: 1e-7,
37 feas_tol: 1e-7,
38 max_sub_iter: 500,
39 sub_tol: 1e-9,
40 multi_cut: false,
41 }
42 }
43}
44
45/// Result of Benders decomposition
46#[derive(Debug, Clone)]
47pub struct BendersResult {
48 /// Optimal first-stage (master) variables
49 pub x: Vec<f64>,
50 /// Optimal second-stage (subproblem) variables
51 pub y: Vec<f64>,
52 /// Objective value
53 pub fun: f64,
54 /// Lower bound at termination
55 pub lower_bound: f64,
56 /// Upper bound at termination
57 pub upper_bound: f64,
58 /// Number of Benders cuts added
59 pub n_cuts: usize,
60 /// Number of iterations
61 pub nit: usize,
62 /// Whether the algorithm converged
63 pub success: bool,
64 /// Termination message
65 pub message: String,
66}
67
68/// Benders decomposition solver.
69///
70/// Decomposes the problem:
71///
72/// ```text
73/// min_{x,y} c^T x + d^T y
74/// s.t. A x + B y >= b (coupling constraints)
75/// x in X, y in Y
76/// ```
77///
78/// into a master problem (in x) and subproblems (in y for fixed x).
79/// Benders cuts are added to the master from subproblem dual solutions.
80///
81/// This implementation uses a projected gradient method for both the master
82/// and subproblem, as a general-purpose (non-LP-specific) variant.
83pub struct BendersDecomposition {
84 /// Algorithm options
85 pub options: BendersOptions,
86}
87
88impl Default for BendersDecomposition {
89 fn default() -> Self {
90 BendersDecomposition {
91 options: BendersOptions::default(),
92 }
93 }
94}
95
96impl BendersDecomposition {
97 /// Create a Benders solver with custom options
98 pub fn new(options: BendersOptions) -> Self {
99 BendersDecomposition { options }
100 }
101
102 /// Solve a separable optimization problem via Benders decomposition.
103 ///
104 /// # Arguments
105 ///
106 /// * `master_obj` - Master problem objective (depends on x only)
107 /// * `sub_obj` - Subproblem objective (depends on x and y)
108 /// * `sub_constraints` - Subproblem constraints for fixed x: returns (violation, dual)
109 /// Each entry: `Fn(&[f64], &[f64]) -> (f64, f64)` (feasibility, dual multiplier)
110 /// * `x0` - Initial first-stage variables
111 /// * `y0` - Initial second-stage variables
112 pub fn solve<FM, FS>(
113 &self,
114 master_obj: FM,
115 sub_obj: FS,
116 x0: &[f64],
117 y0: &[f64],
118 ) -> OptimizeResult<BendersResult>
119 where
120 FM: Fn(&[f64]) -> f64,
121 FS: Fn(&[f64], &[f64]) -> f64,
122 {
123 let nx = x0.len();
124 let ny = y0.len();
125
126 if nx == 0 {
127 return Err(OptimizeError::InvalidInput(
128 "First-stage (master) variables must be non-empty".to_string(),
129 ));
130 }
131
132 let h = 1e-7f64;
133 let mut x = x0.to_vec();
134 let mut y = y0.to_vec();
135 let mut lower_bound = f64::NEG_INFINITY;
136 let mut upper_bound = f64::INFINITY;
137 let mut n_cuts = 0usize;
138 let mut nit = 0usize;
139
140 // Benders cuts: each cut is (g, beta) where the cut is g^T * (x - x_k) + beta <= eta
141 // i.e., the recourse function Q(x) >= g^T * x + (beta - g^T * x_k)
142 let mut cuts: Vec<(Vec<f64>, f64)> = Vec::new();
143
144 for outer in 0..self.options.max_iter {
145 nit = outer + 1;
146
147 // Step 1: Solve subproblem for fixed x (minimize over y)
148 let mut y_opt = y.clone();
149 let mut sub_f = sub_obj(&x, &y_opt);
150
151 for _sub in 0..self.options.max_sub_iter {
152 let mut grad_y = vec![0.0f64; ny];
153 for i in 0..ny {
154 let mut yf = y_opt.clone();
155 yf[i] += h;
156 grad_y[i] = (sub_obj(&x, &yf) - sub_f) / h;
157 }
158 let gnorm: f64 = grad_y.iter().map(|g| g * g).sum::<f64>().sqrt();
159 if gnorm < self.options.sub_tol {
160 break;
161 }
162 let step = 0.1f64 / (1.0 + gnorm);
163 let mut y_new = vec![0.0f64; ny];
164 for i in 0..ny {
165 y_new[i] = y_opt[i] - step * grad_y[i];
166 }
167 let f_new = sub_obj(&x, &y_new);
168 if f_new < sub_f {
169 y_opt = y_new;
170 sub_f = f_new;
171 } else {
172 break;
173 }
174 }
175
176 // Compute Benders cut: subgradient of Q(x) w.r.t. x
177 // Q(x) ≈ Q(x_k) + g^T (x - x_k) where g = ∂_x sub_obj(x, y*(x))
178 let mut cut_grad = vec![0.0f64; nx];
179 for i in 0..nx {
180 let mut xf = x.clone();
181 xf[i] += h;
182 cut_grad[i] = (sub_obj(&xf, &y_opt) - sub_obj(&x, &y_opt)) / h;
183 }
184 let cut_offset = sub_f;
185 cuts.push((cut_grad, cut_offset));
186 n_cuts += 1;
187
188 // Compute upper bound: master + subproblem
189 let master_f = master_obj(&x);
190 let current_ub = master_f + sub_f;
191 if current_ub < upper_bound {
192 upper_bound = current_ub;
193 y = y_opt.clone();
194 }
195
196 // Step 2: Solve master problem with current cuts
197 // Master: min master_obj(x) + eta s.t. eta >= cut_k^T x + const_k for all k
198 // We represent this as: min master_obj(x) + max_k(cut_k^T x + const_k - cut_k^T x_k)
199
200 let master_with_cuts = |x: &[f64]| -> f64 {
201 let base = master_obj(x);
202 let recourse = cuts
203 .iter()
204 .map(|(g, offset)| {
205 let inner: f64 = g.iter().zip(x.iter()).map(|(gi, xi)| gi * xi).sum();
206 inner - g.iter().zip(x0.iter()).map(|(gi, xi)| gi * xi).sum::<f64>()
207 + offset
208 })
209 .fold(f64::NEG_INFINITY, f64::max);
210 let recourse = if recourse.is_finite() { recourse } else { 0.0 };
211 base + recourse
212 };
213
214 // Gradient descent on master with cuts
215 let mut x_new = x.clone();
216 let mut master_val = master_with_cuts(&x_new);
217
218 for _master_iter in 0..200 {
219 let mut grad = vec![0.0f64; nx];
220 for i in 0..nx {
221 let mut xf = x_new.clone();
222 xf[i] += h;
223 grad[i] = (master_with_cuts(&xf) - master_val) / h;
224 }
225 let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
226 if gnorm < self.options.tol {
227 break;
228 }
229 let step = 0.1f64 / (1.0 + gnorm);
230 let mut xt = vec![0.0f64; nx];
231 for i in 0..nx {
232 xt[i] = x_new[i] - step * grad[i];
233 }
234 let ft = master_with_cuts(&xt);
235 if ft < master_val {
236 x_new = xt;
237 master_val = ft;
238 } else {
239 break;
240 }
241 }
242
243 lower_bound = master_val;
244 x = x_new;
245
246 // Convergence check
247 let gap = upper_bound - lower_bound;
248 if gap.abs() < self.options.tol * (1.0 + upper_bound.abs()) {
249 break;
250 }
251 }
252
253 Ok(BendersResult {
254 x,
255 y,
256 fun: upper_bound,
257 lower_bound,
258 upper_bound,
259 n_cuts,
260 nit,
261 success: (upper_bound - lower_bound).abs()
262 < self.options.tol * (1.0 + upper_bound.abs()),
263 message: "Benders decomposition completed".to_string(),
264 })
265 }
266}
267
268// ---------------------------------------------------------------------------
269// Dantzig-Wolfe Decomposition
270// ---------------------------------------------------------------------------
271
272/// Options for Dantzig-Wolfe decomposition
273#[derive(Debug, Clone)]
274pub struct DantzigWolfeOptions {
275 /// Maximum number of column generation iterations
276 pub max_iter: usize,
277 /// Optimality tolerance (reduced cost threshold)
278 pub opt_tol: f64,
279 /// Maximum subproblem iterations
280 pub max_sub_iter: usize,
281 /// Subproblem convergence tolerance
282 pub sub_tol: f64,
283 /// Step size for restricted master problem
284 pub master_step: f64,
285}
286
287impl Default for DantzigWolfeOptions {
288 fn default() -> Self {
289 DantzigWolfeOptions {
290 max_iter: 200,
291 opt_tol: 1e-7,
292 max_sub_iter: 200,
293 sub_tol: 1e-9,
294 master_step: 0.1,
295 }
296 }
297}
298
299/// Result of Dantzig-Wolfe decomposition
300#[derive(Debug, Clone)]
301pub struct DantzigWolfeResult {
302 /// Optimal solution (primal)
303 pub x: Vec<f64>,
304 /// Optimal objective value
305 pub fun: f64,
306 /// Number of columns (proposals) generated
307 pub n_columns: usize,
308 /// Number of iterations
309 pub nit: usize,
310 /// Dual variables (prices) at optimum
311 pub duals: Vec<f64>,
312 /// Whether the algorithm converged
313 pub success: bool,
314 /// Termination message
315 pub message: String,
316}
317
318/// Dantzig-Wolfe decomposition for structured optimization problems.
319///
320/// Decomposes problems with a "linking" constraint structure:
321///
322/// ```text
323/// min c^T x
324/// s.t. A0 x = b0 (linking constraints)
325/// A_i x_i = b_i for each block i (block constraints)
326/// x_i in X_i
327/// ```
328///
329/// The restricted master problem uses convex combinations of extreme points
330/// of each X_i, with column generation to add improving columns.
331///
332/// This implementation handles smooth (not necessarily LP) problems.
333pub struct DantzigWolfe {
334 /// Algorithm options
335 pub options: DantzigWolfeOptions,
336}
337
338impl Default for DantzigWolfe {
339 fn default() -> Self {
340 DantzigWolfe {
341 options: DantzigWolfeOptions::default(),
342 }
343 }
344}
345
346impl DantzigWolfe {
347 /// Create a Dantzig-Wolfe solver
348 pub fn new(options: DantzigWolfeOptions) -> Self {
349 DantzigWolfe { options }
350 }
351
352 /// Solve the master problem and generate columns.
353 ///
354 /// # Arguments
355 ///
356 /// * `master_obj` - Master objective f(x): smooth, depends on all x
357 /// * `pricing_obj` - Pricing problem: for duals π, minimize c_bar^T x_sub
358 /// Given duals `pi`, return (optimal x_sub, min reduced cost)
359 /// * `x0` - Initial point for the master
360 /// * `n_blocks` - Number of subproblem blocks
361 pub fn solve<FM, FP>(
362 &self,
363 master_obj: FM,
364 pricing_oracle: FP,
365 x0: &[f64],
366 _n_blocks: usize,
367 ) -> OptimizeResult<DantzigWolfeResult>
368 where
369 FM: Fn(&[f64]) -> f64,
370 FP: Fn(&[f64]) -> (Vec<f64>, f64), // pricing oracle: duals -> (proposal, reduced_cost)
371 {
372 let n = x0.len();
373 let h = 1e-7f64;
374
375 if n == 0 {
376 return Err(OptimizeError::InvalidInput(
377 "Initial point must be non-empty".to_string(),
378 ));
379 }
380
381 let mut x = x0.to_vec();
382 let mut columns: Vec<Vec<f64>> = vec![x.clone()]; // pool of generated columns
383 let mut weights: Vec<f64> = vec![1.0]; // convex combination weights
384 let mut duals = vec![0.0f64; n];
385 let mut n_iter = 0usize;
386 let mut n_columns = 1usize;
387
388 for outer in 0..self.options.max_iter {
389 n_iter = outer + 1;
390
391 // Step 1: Compute restricted master solution (gradient descent)
392 let restricted_obj = |w: &[f64]| -> f64 {
393 let n_cols = columns.len();
394 let mut xc = vec![0.0f64; n];
395 for (j, col) in columns.iter().enumerate() {
396 let wj = if j < w.len() { w[j] } else { 0.0 };
397 for i in 0..n {
398 xc[i] += wj * col[i];
399 }
400 }
401 master_obj(&xc)
402 };
403
404 let nc = columns.len();
405 let mut f_w = restricted_obj(&weights[..nc.min(weights.len())]);
406
407 for _inner in 0..200 {
408 let mut grad_w = vec![0.0f64; nc];
409 let w_cur = &weights[..nc.min(weights.len())];
410 let w_padded: Vec<f64> = {
411 let mut wp = w_cur.to_vec();
412 while wp.len() < nc {
413 wp.push(0.0);
414 }
415 wp
416 };
417 for j in 0..nc {
418 let mut wf = w_padded.clone();
419 let delta = h;
420 wf[j] += delta;
421 // Renormalize
422 let sum: f64 = wf.iter().sum();
423 let wf_norm: Vec<f64> = wf.iter().map(|wj| wj / sum).collect();
424 grad_w[j] = (restricted_obj(&wf_norm) - f_w) / delta;
425 }
426 let gnorm: f64 = grad_w.iter().map(|g| g * g).sum::<f64>().sqrt();
427 if gnorm < self.options.sub_tol {
428 break;
429 }
430 let step = self.options.master_step / (1.0 + gnorm);
431 let w_cur_len = w_padded.len();
432 let mut w_new: Vec<f64> = (0..w_cur_len)
433 .map(|j| (w_padded[j] - step * grad_w[j]).max(0.0))
434 .collect();
435 // Project onto simplex
436 let sum: f64 = w_new.iter().sum();
437 if sum > 1e-14 {
438 for wj in w_new.iter_mut() {
439 *wj /= sum;
440 }
441 }
442 let f_new = restricted_obj(&w_new);
443 if f_new < f_w {
444 weights = w_new;
445 f_w = f_new;
446 } else {
447 break;
448 }
449 }
450
451 // Reconstruct x from weights and columns
452 let nc = columns.len();
453 x = vec![0.0f64; n];
454 for (j, col) in columns.iter().enumerate() {
455 let wj = if j < weights.len() { weights[j] } else { 0.0 };
456 for i in 0..n {
457 x[i] += wj * col[i];
458 }
459 }
460
461 // Step 2: Compute dual variables (gradient of master w.r.t. x at current iterate)
462 let f0 = master_obj(&x);
463 for i in 0..n {
464 let mut xf = x.clone();
465 xf[i] += h;
466 duals[i] = (master_obj(&xf) - f0) / h;
467 }
468
469 // Step 3: Pricing problem — find column with most negative reduced cost
470 let (proposal, reduced_cost) = pricing_oracle(&duals);
471
472 if reduced_cost >= -self.options.opt_tol {
473 // No improving column: optimal
474 break;
475 }
476
477 // Add new column
478 columns.push(proposal);
479 weights.push(0.01); // small initial weight
480 // Renormalize
481 let sum: f64 = weights.iter().sum();
482 for wj in weights.iter_mut() {
483 *wj /= sum;
484 }
485 n_columns += 1;
486 }
487
488 let fun = master_obj(&x);
489
490 Ok(DantzigWolfeResult {
491 x,
492 fun,
493 n_columns,
494 nit: n_iter,
495 duals,
496 success: n_columns < self.options.max_iter,
497 message: "Dantzig-Wolfe decomposition completed".to_string(),
498 })
499 }
500}
501
502// ---------------------------------------------------------------------------
503// ADMM: Alternating Direction Method of Multipliers
504// ---------------------------------------------------------------------------
505
506/// Options for ADMM
507#[derive(Debug, Clone)]
508pub struct AdmmOptions {
509 /// Penalty parameter ρ (augmented Lagrangian)
510 pub rho: f64,
511 /// Maximum iterations
512 pub max_iter: usize,
513 /// Primal feasibility tolerance (||Ax + Bz - c||)
514 pub eps_primal: f64,
515 /// Dual feasibility tolerance (||ρ B^T (z^{k+1} - z^k)||)
516 pub eps_dual: f64,
517 /// Adaptive ρ: increase by this factor when primal/dual residual ratio > threshold
518 pub rho_adapt: bool,
519 /// Adaptive ρ factor
520 pub rho_factor: f64,
521 /// Adaptive ρ threshold
522 pub rho_threshold: f64,
523 /// Inner solver iterations for x and z updates
524 pub inner_iter: usize,
525 /// Inner solver tolerance
526 pub inner_tol: f64,
527}
528
529impl Default for AdmmOptions {
530 fn default() -> Self {
531 AdmmOptions {
532 rho: 1.0,
533 max_iter: 1000,
534 eps_primal: 1e-6,
535 eps_dual: 1e-6,
536 rho_adapt: true,
537 rho_factor: 2.0,
538 rho_threshold: 10.0,
539 inner_iter: 50,
540 inner_tol: 1e-8,
541 }
542 }
543}
544
545/// Result of ADMM
546#[derive(Debug, Clone)]
547pub struct AdmmResult {
548 /// Primal solution x
549 pub x: Vec<f64>,
550 /// Auxiliary variable z
551 pub z: Vec<f64>,
552 /// Dual variable u (scaled)
553 pub u: Vec<f64>,
554 /// Objective value f(x) + g(z)
555 pub fun: f64,
556 /// Primal residual at termination
557 pub primal_residual: f64,
558 /// Dual residual at termination
559 pub dual_residual: f64,
560 /// Number of iterations
561 pub nit: usize,
562 /// Whether the algorithm converged
563 pub success: bool,
564 /// Termination message
565 pub message: String,
566}
567
568/// ADMM: Alternating Direction Method of Multipliers.
569///
570/// Solves the consensus problem:
571///
572/// ```text
573/// min f(x) + g(z)
574/// s.t. x - z = 0
575/// ```
576///
577/// via the augmented Lagrangian:
578///
579/// ```text
580/// L_ρ(x, z, u) = f(x) + g(z) + (ρ/2) ||x - z + u||²
581/// ```
582///
583/// Iterations:
584/// - x-update: x^{k+1} = argmin_x f(x) + (ρ/2)||x - z^k + u^k||²
585/// - z-update: z^{k+1} = prox_{g/ρ}(x^{k+1} + u^k)
586/// - u-update: u^{k+1} = u^k + x^{k+1} - z^{k+1}
587pub struct Admm {
588 /// Algorithm options
589 pub options: AdmmOptions,
590}
591
592impl Default for Admm {
593 fn default() -> Self {
594 Admm {
595 options: AdmmOptions::default(),
596 }
597 }
598}
599
600impl Admm {
601 /// Create an ADMM solver with options
602 pub fn new(options: AdmmOptions) -> Self {
603 Admm { options }
604 }
605
606 /// Run ADMM for the consensus problem min f(x) + g(z) s.t. x = z.
607 ///
608 /// # Arguments
609 ///
610 /// * `f_obj` - Smooth objective f(x)
611 /// * `prox_g` - Proximal operator for g: prox_{g/ρ}(v) = argmin_z g(z) + (ρ/2)||z-v||²
612 /// * `x0` - Initial x
613 pub fn solve<F, PG>(&self, f_obj: F, prox_g: PG, x0: &[f64]) -> OptimizeResult<AdmmResult>
614 where
615 F: Fn(&[f64]) -> f64,
616 PG: Fn(&[f64], f64) -> Vec<f64>, // prox_g(v, rho) -> z
617 {
618 let n = x0.len();
619 if n == 0 {
620 return Err(OptimizeError::InvalidInput(
621 "Initial point must be non-empty".to_string(),
622 ));
623 }
624
625 let h = 1e-7f64;
626 let mut x = x0.to_vec();
627 let mut z = x.clone();
628 let mut u = vec![0.0f64; n];
629 let mut rho = self.options.rho;
630 let mut nit = 0usize;
631
632 for iter in 0..self.options.max_iter {
633 nit = iter + 1;
634
635 // x-update: min f(x) + (ρ/2)||x - (z - u)||²
636 let target_x: Vec<f64> = z.iter().zip(u.iter()).map(|(zi, ui)| zi - ui).collect();
637 let x_aug = |x: &[f64]| -> f64 {
638 let f = f_obj(x);
639 let pen: f64 = x
640 .iter()
641 .zip(target_x.iter())
642 .map(|(xi, ti)| (xi - ti).powi(2))
643 .sum();
644 f + 0.5 * rho * pen
645 };
646
647 let mut f_x = x_aug(&x);
648 for _xi in 0..self.options.inner_iter {
649 let mut grad = vec![0.0f64; n];
650 for i in 0..n {
651 let mut xf = x.clone();
652 xf[i] += h;
653 grad[i] = (x_aug(&xf) - f_x) / h;
654 }
655 let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
656 if gnorm < self.options.inner_tol {
657 break;
658 }
659 let step = 1.0 / (rho + gnorm);
660 let mut x_new: Vec<f64> = x
661 .iter()
662 .zip(grad.iter())
663 .map(|(xi, gi)| xi - step * gi)
664 .collect();
665 let f_new = x_aug(&x_new);
666 if f_new < f_x {
667 x = x_new;
668 f_x = f_new;
669 } else {
670 // Backtrack
671 let mut s = step * 0.5;
672 for _ in 0..20 {
673 x_new = x
674 .iter()
675 .zip(grad.iter())
676 .map(|(xi, gi)| xi - s * gi)
677 .collect();
678 let fn2 = x_aug(&x_new);
679 if fn2 < f_x {
680 x = x_new;
681 f_x = fn2;
682 break;
683 }
684 s *= 0.5;
685 }
686 break;
687 }
688 }
689
690 // z-update: prox_{g/ρ}(x + u)
691 let z_prev = z.clone();
692 let v: Vec<f64> = x.iter().zip(u.iter()).map(|(xi, ui)| xi + ui).collect();
693 z = prox_g(&v, rho);
694
695 // u-update: u += x - z
696 for i in 0..n {
697 u[i] += x[i] - z[i];
698 }
699
700 // Compute residuals
701 let primal_res: f64 = x
702 .iter()
703 .zip(z.iter())
704 .map(|(xi, zi)| (xi - zi).powi(2))
705 .sum::<f64>()
706 .sqrt();
707 let dual_res: f64 = z
708 .iter()
709 .zip(z_prev.iter())
710 .map(|(zi, zpi)| (rho * (zi - zpi)).powi(2))
711 .sum::<f64>()
712 .sqrt();
713
714 // Adaptive ρ
715 if self.options.rho_adapt {
716 if primal_res > self.options.rho_threshold * dual_res {
717 rho *= self.options.rho_factor;
718 for ui in u.iter_mut() {
719 *ui /= self.options.rho_factor;
720 }
721 } else if dual_res > self.options.rho_threshold * primal_res {
722 rho /= self.options.rho_factor;
723 for ui in u.iter_mut() {
724 *ui *= self.options.rho_factor;
725 }
726 }
727 }
728
729 if primal_res < self.options.eps_primal && dual_res < self.options.eps_dual {
730 let f_val = f_obj(&x);
731 return Ok(AdmmResult {
732 x,
733 z,
734 u,
735 fun: f_val,
736 primal_residual: primal_res,
737 dual_residual: dual_res,
738 nit,
739 success: true,
740 message: "ADMM converged".to_string(),
741 });
742 }
743 }
744
745 let primal_res: f64 = x
746 .iter()
747 .zip(z.iter())
748 .map(|(xi, zi)| (xi - zi).powi(2))
749 .sum::<f64>()
750 .sqrt();
751 let dual_res = 0.0f64; // approximation at max iter
752 let f_val = f_obj(&x);
753
754 Ok(AdmmResult {
755 x,
756 z,
757 u,
758 fun: f_val,
759 primal_residual: primal_res,
760 dual_residual: dual_res,
761 nit,
762 success: primal_res < self.options.eps_primal * 100.0,
763 message: "ADMM: maximum iterations reached".to_string(),
764 })
765 }
766}
767
768// ---------------------------------------------------------------------------
769// ProximalBundle: Proximal Bundle Method for Nonsmooth Optimization
770// ---------------------------------------------------------------------------
771
772/// Options for proximal bundle method
773#[derive(Debug, Clone)]
774pub struct ProximalBundleOptions {
775 /// Proximal parameter μ (regularization)
776 pub mu: f64,
777 /// Maximum number of bundle iterations
778 pub max_iter: usize,
779 /// Null-step tolerance (serious step condition)
780 pub m_l: f64,
781 /// Maximum bundle size
782 pub max_bundle_size: usize,
783 /// Convergence tolerance
784 pub tol: f64,
785 /// Minimum proximal parameter
786 pub min_mu: f64,
787 /// Maximum proximal parameter
788 pub max_mu: f64,
789}
790
791impl Default for ProximalBundleOptions {
792 fn default() -> Self {
793 ProximalBundleOptions {
794 mu: 1.0,
795 max_iter: 300,
796 m_l: 0.1,
797 max_bundle_size: 50,
798 tol: 1e-6,
799 min_mu: 1e-8,
800 max_mu: 1e8,
801 }
802 }
803}
804
805/// A bundle element: (subgradient, function value, center)
806#[derive(Debug, Clone)]
807struct BundleElement {
808 /// Subgradient g_i at bundle point
809 g: Vec<f64>,
810 /// Function value f_i at bundle point
811 f: f64,
812 /// Bundle point
813 x: Vec<f64>,
814}
815
816/// Result of proximal bundle method
817#[derive(Debug, Clone)]
818pub struct ProximalBundleResult {
819 /// Optimal solution
820 pub x: Vec<f64>,
821 /// Objective value at optimum
822 pub fun: f64,
823 /// Subgradient norm at optimum
824 pub subgrad_norm: f64,
825 /// Number of serious steps (actual descent)
826 pub n_serious_steps: usize,
827 /// Number of null steps
828 pub n_null_steps: usize,
829 /// Total iterations
830 pub nit: usize,
831 /// Whether the algorithm converged
832 pub success: bool,
833 /// Termination message
834 pub message: String,
835}
836
837/// Proximal bundle method for nonsmooth convex optimization.
838///
839/// Solves min f(x) where f is convex but possibly nonsmooth.
840/// Uses a bundle of subgradients to build a cutting-plane model:
841///
842/// ```text
843/// f̂(x; y) = max_{i in B} { f(x_i) + g_i^T (x - x_i) }
844/// ```
845///
846/// The proximal subproblem:
847/// ```text
848/// x^{k+1} = argmin_x { f̂(x; y^k) + (μ/2) ||x - y^k||² }
849/// ```
850///
851/// generates either a serious step (sufficient decrease) or null step.
852pub struct ProximalBundle {
853 /// Algorithm options
854 pub options: ProximalBundleOptions,
855}
856
857impl Default for ProximalBundle {
858 fn default() -> Self {
859 ProximalBundle {
860 options: ProximalBundleOptions::default(),
861 }
862 }
863}
864
865impl ProximalBundle {
866 /// Create a proximal bundle solver
867 pub fn new(options: ProximalBundleOptions) -> Self {
868 ProximalBundle { options }
869 }
870
871 /// Solve min f(x) using the proximal bundle method.
872 ///
873 /// # Arguments
874 ///
875 /// * `func` - Objective function (can be nonsmooth)
876 /// * `subgrad` - Subgradient of f at x: returns (f(x), g ∈ ∂f(x))
877 /// * `x0` - Initial point
878 pub fn minimize<FS>(&self, func: FS, x0: &[f64]) -> OptimizeResult<ProximalBundleResult>
879 where
880 FS: Fn(&[f64]) -> (f64, Vec<f64>), // returns (value, subgradient)
881 {
882 let n = x0.len();
883 if n == 0 {
884 return Err(OptimizeError::InvalidInput(
885 "Initial point must be non-empty".to_string(),
886 ));
887 }
888
889 let mut y = x0.to_vec(); // proximal center (serious step point)
890 let (mut f_y, mut g_y) = func(&y);
891 let mut mu = self.options.mu;
892 let mut bundle: Vec<BundleElement> = vec![BundleElement {
893 g: g_y.clone(),
894 f: f_y,
895 x: y.clone(),
896 }];
897
898 let mut n_serious = 0usize;
899 let mut n_null = 0usize;
900 let mut nit = 0usize;
901
902 for iter in 0..self.options.max_iter {
903 nit = iter + 1;
904
905 // Check convergence: subgradient norm at center
906 let gnorm: f64 = g_y.iter().map(|g| g * g).sum::<f64>().sqrt();
907 if gnorm < self.options.tol {
908 return Ok(ProximalBundleResult {
909 x: y,
910 fun: f_y,
911 subgrad_norm: gnorm,
912 n_serious_steps: n_serious,
913 n_null_steps: n_null,
914 nit,
915 success: true,
916 message: "Proximal bundle converged (subgradient norm)".to_string(),
917 });
918 }
919
920 // Solve proximal subproblem: min f̂(x; y) + (μ/2)||x - y||²
921 // The cutting-plane model: f̂(x) = max_i {f_i + g_i^T (x - x_i)}
922 // The proximal subproblem reduces to a QP. We use gradient descent on the dual.
923 //
924 // Dual formulation: max_{λ>=0, Σλ=1} { Σλ_i f_i - (1/2μ) ||Σλ_i g_i||² - (Σλ_i g_i)^T y + Σλ_i g_i^T x_i }
925 // This is a QP in λ.
926 let nb = bundle.len();
927 let mut lambda = vec![1.0f64 / nb as f64; nb]; // uniform start
928
929 // Gradient of dual w.r.t. λ
930 let dual_obj = |lam: &[f64]| -> f64 {
931 let mut agg_g = vec![0.0f64; n];
932 let mut sum_fg = 0.0f64;
933 for (i, be) in bundle.iter().enumerate() {
934 let li = if i < lam.len() { lam[i] } else { 0.0 };
935 for j in 0..n {
936 agg_g[j] += li * be.g[j];
937 }
938 sum_fg += li
939 * (be.f
940 - be.g
941 .iter()
942 .zip(be.x.iter())
943 .map(|(gj, xj)| gj * xj)
944 .sum::<f64>());
945 }
946 let agg_norm_sq: f64 = agg_g.iter().map(|g| g * g).sum();
947 // Dual objective (to maximize, so negate for minimization):
948 -(sum_fg
949 - agg_g
950 .iter()
951 .zip(y.iter())
952 .map(|(gj, yj)| gj * yj)
953 .sum::<f64>()
954 - 0.5 / mu * agg_norm_sq)
955 };
956
957 let h = 1e-7f64;
958 let mut f_lam = dual_obj(&lambda);
959 for _di in 0..200 {
960 let mut grad_lam = vec![0.0f64; nb];
961 for i in 0..nb {
962 let mut lf = lambda.clone();
963 lf[i] += h;
964 // Re-project onto simplex
965 let sum: f64 = lf.iter().sum();
966 let lf: Vec<f64> = lf.iter().map(|li| (li / sum).max(0.0)).collect();
967 grad_lam[i] = (dual_obj(&lf) - f_lam) / h;
968 }
969 let gnorm_lam: f64 = grad_lam.iter().map(|g| g * g).sum::<f64>().sqrt();
970 if gnorm_lam < 1e-10 {
971 break;
972 }
973 let step_lam = 0.1 / (1.0 + gnorm_lam);
974 let mut l_new: Vec<f64> = lambda
975 .iter()
976 .zip(grad_lam.iter())
977 .map(|(li, gi)| (li - step_lam * gi).max(0.0))
978 .collect();
979 // Project onto simplex
980 let sum: f64 = l_new.iter().sum();
981 if sum > 1e-14 {
982 for li in l_new.iter_mut() {
983 *li /= sum;
984 }
985 }
986 let f_new = dual_obj(&l_new);
987 if f_new < f_lam {
988 lambda = l_new;
989 f_lam = f_new;
990 } else {
991 break;
992 }
993 }
994
995 // Recover primal step from dual: x* = y - (1/μ) Σ λ_i g_i
996 let mut agg_g = vec![0.0f64; n];
997 for (i, be) in bundle.iter().enumerate() {
998 let li = if i < lambda.len() { lambda[i] } else { 0.0 };
999 for j in 0..n {
1000 agg_g[j] += li * be.g[j];
1001 }
1002 }
1003 let x_trial: Vec<f64> = y
1004 .iter()
1005 .zip(agg_g.iter())
1006 .map(|(yj, gj)| yj - gj / mu)
1007 .collect();
1008
1009 // Evaluate at trial point
1010 let (f_trial, g_trial) = func(&x_trial);
1011
1012 // Compute cutting-plane model value at x_trial (for acceptance test)
1013 let f_model: f64 = bundle
1014 .iter()
1015 .map(|be| {
1016 let gx: f64 =
1017 be.g.iter()
1018 .zip(x_trial.iter())
1019 .zip(be.x.iter())
1020 .map(|((gi, xi), xi_k)| gi * (xi - xi_k))
1021 .sum();
1022 be.f + gx
1023 })
1024 .fold(f64::NEG_INFINITY, f64::max);
1025
1026 // Serious step condition: f(x_trial) <= f(y) - mL * (f̂(y) - f̂(x_trial))
1027 // Simplified: f_trial <= f_y - mL * (f_y - f_model)
1028 let descent = f_y - f_model;
1029 let is_serious = f_trial <= f_y - self.options.m_l * descent.max(0.0);
1030
1031 if is_serious {
1032 y = x_trial.clone();
1033 f_y = f_trial;
1034 g_y = g_trial.clone();
1035 n_serious += 1;
1036 } else {
1037 n_null += 1;
1038 }
1039
1040 // Add new bundle element
1041 bundle.push(BundleElement {
1042 g: g_trial,
1043 f: f_trial,
1044 x: x_trial,
1045 });
1046
1047 // Trim bundle if too large
1048 if bundle.len() > self.options.max_bundle_size {
1049 bundle.remove(0);
1050 }
1051
1052 // Update μ: increase for null steps, reset for serious steps
1053 if is_serious {
1054 mu = (mu * 0.5).max(self.options.min_mu);
1055 } else if n_null > 3 * (n_serious + 1) {
1056 mu = (mu * 2.0).min(self.options.max_mu);
1057 }
1058 }
1059
1060 let gnorm_final: f64 = g_y.iter().map(|g| g * g).sum::<f64>().sqrt();
1061
1062 Ok(ProximalBundleResult {
1063 x: y,
1064 fun: f_y,
1065 subgrad_norm: gnorm_final,
1066 n_serious_steps: n_serious,
1067 n_null_steps: n_null,
1068 nit,
1069 success: gnorm_final < self.options.tol * 100.0,
1070 message: "Proximal bundle: maximum iterations reached".to_string(),
1071 })
1072 }
1073}
1074
1075// ---------------------------------------------------------------------------
1076// Tests
1077// ---------------------------------------------------------------------------
1078
1079#[cfg(test)]
1080mod tests {
1081 use super::*;
1082
1083 #[test]
1084 fn test_benders_separable() {
1085 // Master: min x^2, Sub: min (x + y)^2 → optimal y = -x, combined = 0
1086 let result = BendersDecomposition::default()
1087 .solve(
1088 |x: &[f64]| x[0].powi(2),
1089 |x: &[f64], y: &[f64]| (x[0] + y[0]).powi(2),
1090 &[1.0],
1091 &[0.0],
1092 )
1093 .expect("unexpected None or Err");
1094 assert!(
1095 result.upper_bound < 2.0,
1096 "Upper bound {} should be < 2.0",
1097 result.upper_bound
1098 );
1099 }
1100
1101 #[test]
1102 fn test_dantzig_wolfe_basic() {
1103 // Simple quadratic: min (x-1)^2
1104 // Pricing: minimize 2*(x-1)*g[0] w.r.t. x in [0,2]
1105 let result = DantzigWolfe::default()
1106 .solve(
1107 |x: &[f64]| (x[0] - 1.0).powi(2),
1108 |pi: &[f64]| {
1109 // Optimal proposal in [0, 2]: x* = 1 - pi[0]/2 (if inside bounds)
1110 let x_star = (1.0 - pi[0] * 0.5).clamp(0.0, 2.0);
1111 let rc = pi[0] * x_star - pi[0]; // reduced cost
1112 (vec![x_star], rc)
1113 },
1114 &[0.5],
1115 1,
1116 )
1117 .expect("unexpected None or Err");
1118 assert!(result.fun < 0.5, "Expected fun < 0.5, got {}", result.fun);
1119 }
1120
1121 #[test]
1122 fn test_admm_consensus() {
1123 // min x^2 + ||z-1||^2 s.t. x = z → optimal x = z = 0.5
1124 // prox_{g/ρ}(v) = argmin ||z-1||^2 + ρ/2||z-v||^2 = (1 + ρ/2 * v) / (1 + ρ/2)
1125 let result = Admm::default()
1126 .solve(
1127 |x: &[f64]| x[0].powi(2),
1128 |v: &[f64], rho: f64| vec![(1.0 + rho * 0.5 * v[0]) / (1.0 + rho * 0.5)],
1129 &[1.0],
1130 )
1131 .expect("unexpected None or Err");
1132 assert!(
1133 (result.x[0] - 0.5).abs() < 0.1 || result.fun < 1.0,
1134 "fun = {}, x = {:?}",
1135 result.fun,
1136 result.x
1137 );
1138 }
1139
1140 #[test]
1141 fn test_admm_lasso_like() {
1142 // min (x-3)^2 + |z| s.t. x = z
1143 // prox_{|.|/ρ}(v) = soft-threshold(v, 1/ρ)
1144 let soft_thresh = |v: &[f64], rho: f64| -> Vec<f64> {
1145 let thresh = 1.0 / rho;
1146 v.iter()
1147 .map(|vi| vi.signum() * (vi.abs() - thresh).max(0.0))
1148 .collect()
1149 };
1150
1151 let result = Admm::new(AdmmOptions {
1152 rho: 1.0,
1153 max_iter: 500,
1154 eps_primal: 1e-5,
1155 eps_dual: 1e-5,
1156 ..Default::default()
1157 })
1158 .solve(|x: &[f64]| (x[0] - 3.0).powi(2), soft_thresh, &[0.0])
1159 .expect("unexpected None or Err");
1160 // Optimal x ≈ 2.5 (balance between pulling toward 3 and L1 penalty)
1161 assert!(result.fun < 5.0, "fun = {}", result.fun);
1162 }
1163
1164 #[test]
1165 fn test_proximal_bundle_smooth() {
1166 // Test on smooth convex function (f = x^2, g = 2x)
1167 let result = ProximalBundle::default()
1168 .minimize(|x: &[f64]| (x[0].powi(2), vec![2.0 * x[0]]), &[2.0])
1169 .expect("unexpected None or Err");
1170 assert!(result.fun < 0.5, "Expected fun < 0.5, got {}", result.fun);
1171 }
1172
1173 #[test]
1174 fn test_proximal_bundle_max_function() {
1175 // max(x, -x) = |x|, subgradient: sign(x) or 0 if x=0
1176 let result = ProximalBundle::new(ProximalBundleOptions {
1177 tol: 1e-5,
1178 max_iter: 200,
1179 ..Default::default()
1180 })
1181 .minimize(
1182 |x: &[f64]| {
1183 let v = x[0].abs();
1184 let g = if x[0] > 0.0 {
1185 1.0
1186 } else if x[0] < 0.0 {
1187 -1.0
1188 } else {
1189 0.0
1190 };
1191 (v, vec![g])
1192 },
1193 &[2.0],
1194 )
1195 .expect("unexpected None or Err");
1196 assert!(
1197 result.fun < 0.5,
1198 "Expected |x| < 0.5 at optimum, got {}",
1199 result.fun
1200 );
1201 }
1202}