1use crate::error::{OptimizeError, OptimizeResult};
35use scirs2_core::ndarray::{Array1, Array2};
36use std::fmt;
37
38#[derive(Debug, Clone)]
42pub struct Monomial {
43 pub coefficient: f64,
45 pub exponents: Vec<f64>,
47}
48
49impl Monomial {
50 pub fn new(coefficient: f64, exponents: Vec<f64>) -> OptimizeResult<Self> {
53 if coefficient <= 0.0 {
54 return Err(OptimizeError::ValueError(
55 "monomial coefficient must be strictly positive".into(),
56 ));
57 }
58 Ok(Self {
59 coefficient,
60 exponents,
61 })
62 }
63
64 pub fn evaluate(&self, x: &[f64]) -> OptimizeResult<f64> {
66 if x.len() != self.exponents.len() {
67 return Err(OptimizeError::ValueError(format!(
68 "monomial has {} exponents but x has {} components",
69 self.exponents.len(),
70 x.len()
71 )));
72 }
73 let mut val = self.coefficient;
74 for (xi, ai) in x.iter().zip(self.exponents.iter()) {
75 if *xi <= 0.0 {
76 return Err(OptimizeError::ValueError(
77 "GP variables must be strictly positive".into(),
78 ));
79 }
80 val *= xi.powf(*ai);
81 }
82 Ok(val)
83 }
84
85 #[inline]
87 pub fn n_vars(&self) -> usize {
88 self.exponents.len()
89 }
90}
91
92impl fmt::Display for Monomial {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 write!(f, "{:.4}", self.coefficient)?;
95 for (i, ai) in self.exponents.iter().enumerate() {
96 if *ai != 0.0 {
97 write!(f, " · x{}^{:.4}", i + 1, ai)?;
98 }
99 }
100 Ok(())
101 }
102}
103
104#[derive(Debug, Clone)]
108pub struct Posynomial {
109 pub terms: Vec<Monomial>,
111}
112
113impl Posynomial {
114 pub fn new(terms: Vec<Monomial>) -> OptimizeResult<Self> {
117 if terms.is_empty() {
118 return Err(OptimizeError::ValueError(
119 "posynomial must have at least one term".into(),
120 ));
121 }
122 let n = terms[0].n_vars();
123 for (k, t) in terms.iter().enumerate() {
124 if t.n_vars() != n {
125 return Err(OptimizeError::ValueError(format!(
126 "term {} has {} variables but term 0 has {}",
127 k,
128 t.n_vars(),
129 n
130 )));
131 }
132 }
133 Ok(Self { terms })
134 }
135
136 pub fn evaluate(&self, x: &[f64]) -> OptimizeResult<f64> {
138 let mut sum = 0.0;
139 for t in &self.terms {
140 sum += t.evaluate(x)?;
141 }
142 Ok(sum)
143 }
144
145 #[inline]
147 pub fn n_vars(&self) -> usize {
148 self.terms[0].n_vars()
149 }
150
151 #[inline]
153 pub fn n_terms(&self) -> usize {
154 self.terms.len()
155 }
156}
157
158#[derive(Debug)]
170pub struct GPProblem {
171 pub objective: Posynomial,
173 pub ineq_constraints: Vec<Posynomial>,
175 pub eq_constraints: Vec<Monomial>,
177}
178
179impl GPProblem {
180 pub fn new(
182 objective: Posynomial,
183 ineq_constraints: Vec<Posynomial>,
184 eq_constraints: Vec<Monomial>,
185 ) -> OptimizeResult<Self> {
186 let n = objective.n_vars();
187 for (i, c) in ineq_constraints.iter().enumerate() {
188 if c.n_vars() != n {
189 return Err(OptimizeError::ValueError(format!(
190 "inequality constraint {} has {} variables; expected {}",
191 i,
192 c.n_vars(),
193 n
194 )));
195 }
196 }
197 for (j, c) in eq_constraints.iter().enumerate() {
198 if c.n_vars() != n {
199 return Err(OptimizeError::ValueError(format!(
200 "equality constraint {} has {} variables; expected {}",
201 j,
202 c.n_vars(),
203 n
204 )));
205 }
206 }
207 Ok(Self {
208 objective,
209 ineq_constraints,
210 eq_constraints,
211 })
212 }
213
214 #[inline]
216 pub fn n_vars(&self) -> usize {
217 self.objective.n_vars()
218 }
219}
220
221#[derive(Debug, Clone)]
225pub struct GPSolverConfig {
226 pub max_outer_iters: usize,
228 pub max_inner_iters: usize,
230 pub outer_tol: f64,
232 pub inner_tol: f64,
234 pub t_init: f64,
236 pub mu: f64,
238 pub initial_y: Option<Vec<f64>>,
240 pub ls_alpha: f64,
242 pub ls_beta: f64,
244}
245
246impl Default for GPSolverConfig {
247 fn default() -> Self {
248 Self {
249 max_outer_iters: 50,
250 max_inner_iters: 30,
251 outer_tol: 1e-8,
252 inner_tol: 1e-8,
253 t_init: 1.0,
254 mu: 10.0,
255 initial_y: None,
256 ls_alpha: 0.01,
257 ls_beta: 0.5,
258 }
259 }
260}
261
262#[derive(Debug)]
266pub struct GPResult {
267 pub x: Vec<f64>,
269 pub obj_value: f64,
271 pub outer_iters: usize,
273 pub converged: bool,
275 pub gap: f64,
277}
278
279#[derive(Debug)]
292pub struct LogConvexProblem {
293 pub obj_a: Array2<f64>,
295 pub obj_b: Array1<f64>,
297 pub con_a: Vec<Array2<f64>>,
299 pub con_b: Vec<Array1<f64>>,
301 pub eq_a: Array2<f64>,
303 pub eq_b: Array1<f64>,
305}
306
307pub fn gp_to_convex(prob: &GPProblem) -> LogConvexProblem {
312 let n = prob.n_vars();
313
314 let (obj_a, obj_b) = posynomial_to_log_matrices(&prob.objective, n);
315
316 let (con_a, con_b): (Vec<_>, Vec<_>) = prob
317 .ineq_constraints
318 .iter()
319 .map(|c| posynomial_to_log_matrices(c, n))
320 .unzip();
321
322 let n_eq = prob.eq_constraints.len();
323 let mut eq_a = Array2::<f64>::zeros((n_eq.max(1), n));
324 let mut eq_b = Array1::<f64>::zeros(n_eq.max(1));
325 for (j, m) in prob.eq_constraints.iter().enumerate() {
326 for (k, a) in m.exponents.iter().enumerate() {
327 eq_a[[j, k]] = *a;
328 }
329 eq_b[j] = m.coefficient.ln();
330 }
331
332 LogConvexProblem {
333 obj_a,
334 obj_b,
335 con_a,
336 con_b,
337 eq_a,
338 eq_b,
339 }
340}
341
342fn posynomial_to_log_matrices(p: &Posynomial, n: usize) -> (Array2<f64>, Array1<f64>) {
345 let k = p.n_terms();
346 let mut a = Array2::<f64>::zeros((k, n));
347 let mut b = Array1::<f64>::zeros(k);
348 for (row, term) in p.terms.iter().enumerate() {
349 for (col, exp) in term.exponents.iter().enumerate() {
350 a[[row, col]] = *exp;
351 }
352 b[row] = term.coefficient.ln();
353 }
354 (a, b)
355}
356
357fn log_sum_exp(v: &[f64]) -> f64 {
361 if v.is_empty() {
362 return f64::NEG_INFINITY;
363 }
364 let max_v = v.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
365 if max_v == f64::NEG_INFINITY {
366 return f64::NEG_INFINITY;
367 }
368 let sum: f64 = v.iter().map(|vi| (vi - max_v).exp()).sum();
369 max_v + sum.ln()
370}
371
372fn softmax(v: &[f64]) -> Vec<f64> {
374 let lse = log_sum_exp(v);
375 v.iter().map(|vi| (vi - lse).exp()).collect()
376}
377
378fn lse_objective(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> f64 {
382 let v: Vec<f64> = (0..a.nrows())
383 .map(|k| {
384 let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
385 inner + b[k]
386 })
387 .collect();
388 log_sum_exp(&v)
389}
390
391fn lse_gradient(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> Vec<f64> {
393 let v: Vec<f64> = (0..a.nrows())
394 .map(|k| {
395 let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
396 inner + b[k]
397 })
398 .collect();
399 let sm = softmax(&v);
400 let n = a.ncols();
401 let mut grad = vec![0.0_f64; n];
402 for k in 0..a.nrows() {
403 for j in 0..n {
404 grad[j] += sm[k] * a[[k, j]];
405 }
406 }
407 grad
408}
409
410fn lse_hessian(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> Array2<f64> {
412 let v: Vec<f64> = (0..a.nrows())
413 .map(|k| {
414 let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
415 inner + b[k]
416 })
417 .collect();
418 let sm = softmax(&v);
419 let n = a.ncols();
420 let mut h = Array2::<f64>::zeros((n, n));
421 for k in 0..a.nrows() {
423 for i in 0..n {
424 for j in 0..n {
425 h[[i, j]] += sm[k] * a[[k, i]] * a[[k, j]];
426 }
427 }
428 }
429 let g = lse_gradient(a, b, y);
431 for i in 0..n {
432 for j in 0..n {
433 h[[i, j]] -= g[i] * g[j];
434 }
435 }
436 h
437}
438
439fn solve_newton_system(h: &Array2<f64>, g: &[f64]) -> Vec<f64> {
442 let n = h.nrows();
443 let reg = 1e-12;
445 let mut mat: Vec<Vec<f64>> = (0..n)
447 .map(|i| {
448 let mut row: Vec<f64> = (0..n).map(|j| h[[i, j]]).collect();
449 row[i] += reg;
450 row.push(-g[i]);
451 row
452 })
453 .collect();
454
455 for col in 0..n {
456 let mut max_row = col;
458 let mut max_val = mat[col][col].abs();
459 for row in (col + 1)..n {
460 let v = mat[row][col].abs();
461 if v > max_val {
462 max_val = v;
463 max_row = row;
464 }
465 }
466 mat.swap(col, max_row);
467
468 let pivot = mat[col][col];
469 if pivot.abs() < 1e-30 {
470 continue;
471 }
472 let inv_pivot = 1.0 / pivot;
473 for j in col..=n {
474 mat[col][j] *= inv_pivot;
475 }
476 for row in 0..n {
477 if row == col {
478 continue;
479 }
480 let factor = mat[row][col];
481 for j in col..=n {
482 let delta = factor * mat[col][j];
483 mat[row][j] -= delta;
484 }
485 }
486 }
487
488 (0..n).map(|i| mat[i][n]).collect()
489}
490
491fn barrier_value(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Option<f64> {
499 let mut val = t * lse_objective(&lcp.obj_a, &lcp.obj_b, y);
500 for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
501 let lse_c = lse_objective(ca, cb, y);
502 if lse_c >= 0.0 {
503 return None; }
505 val -= (-lse_c).ln();
506 }
507 Some(val)
508}
509
510fn barrier_gradient(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Vec<f64> {
512 let n = y.len();
513 let mut grad: Vec<f64> = lse_gradient(&lcp.obj_a, &lcp.obj_b, y)
514 .iter()
515 .map(|g| t * g)
516 .collect();
517 for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
518 let lse_c = lse_objective(ca, cb, y);
519 let factor = 1.0 / (-lse_c).max(1e-30);
520 let gc = lse_gradient(ca, cb, y);
521 for j in 0..n {
522 grad[j] += factor * gc[j];
523 }
524 }
525 grad
526}
527
528fn barrier_hessian(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Array2<f64> {
530 let n = y.len();
531 let mut h = lse_hessian(&lcp.obj_a, &lcp.obj_b, y);
532 for i in 0..n {
533 for j in 0..n {
534 h[[i, j]] *= t;
535 }
536 }
537 for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
538 let lse_c = lse_objective(ca, cb, y);
539 let s = (-lse_c).max(1e-30);
540 let gc = lse_gradient(ca, cb, y);
541 let hc = lse_hessian(ca, cb, y);
542 let inv_s = 1.0 / s;
543 let inv_s2 = inv_s * inv_s;
544 for i in 0..n {
545 for j in 0..n {
546 h[[i, j]] += inv_s * hc[[i, j]] + inv_s2 * gc[i] * gc[j];
547 }
548 }
549 }
550 h
551}
552
553fn find_feasible_point(lcp: &LogConvexProblem, y0: &[f64]) -> OptimizeResult<Vec<f64>> {
556 let n = y0.len();
557 let m = lcp.con_a.len();
558 if m == 0 {
559 return Ok(y0.to_vec());
560 }
561
562 let infeasible = lcp
564 .con_a
565 .iter()
566 .zip(lcp.con_b.iter())
567 .any(|(ca, cb)| lse_objective(ca, cb, y0) >= 0.0);
568 if !infeasible {
569 return Ok(y0.to_vec());
570 }
571
572 let max_lse: f64 = lcp
575 .con_a
576 .iter()
577 .zip(lcp.con_b.iter())
578 .map(|(ca, cb)| lse_objective(ca, cb, y0))
579 .fold(f64::NEG_INFINITY, f64::max);
580 let s_init = max_lse + 1.0;
581 let mut y_aug: Vec<f64> = y0.iter().cloned().chain(std::iter::once(s_init)).collect();
582
583 for _iter in 0..200 {
585 let s = y_aug[n];
586 let mut g_y = vec![0.0_f64; n];
587 let mut g_s = 1.0_f64;
588 for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
589 let lse_c = lse_objective(ca, cb, &y_aug[..n]);
590 let margin = lse_c - s + 0.1;
591 if margin > 0.0 {
592 let gc = lse_gradient(ca, cb, &y_aug[..n]);
593 for j in 0..n {
594 g_y[j] += gc[j];
595 }
596 g_s -= 1.0;
597 }
598 }
599 let step = 0.01;
600 for j in 0..n {
601 y_aug[j] -= step * g_y[j];
602 }
603 y_aug[n] -= step * g_s;
604
605 let s_new = y_aug[n];
606 if s_new < -0.1 {
607 return Ok(y_aug[..n].to_vec());
609 }
610 }
611
612 let feasible = lcp
614 .con_a
615 .iter()
616 .zip(lcp.con_b.iter())
617 .all(|(ca, cb)| lse_objective(ca, cb, &y_aug[..n]) < 0.0);
618 if feasible {
619 Ok(y_aug[..n].to_vec())
620 } else {
621 Err(OptimizeError::InitializationError(
622 "could not find a strictly feasible starting point for the GP".into(),
623 ))
624 }
625}
626
627pub fn solve_gp(prob: &GPProblem, config: Option<GPSolverConfig>) -> OptimizeResult<GPResult> {
637 let cfg = config.unwrap_or_default();
638 let lcp = gp_to_convex(prob);
639 let n = prob.n_vars();
640
641 let y0: Vec<f64> = cfg.initial_y.clone().unwrap_or_else(|| vec![0.0_f64; n]);
643 if y0.len() != n {
644 return Err(OptimizeError::ValueError(format!(
645 "initial_y has length {} but problem has {} variables",
646 y0.len(),
647 n
648 )));
649 }
650
651 let mut y = find_feasible_point(&lcp, &y0)?;
656
657 let mut t = cfg.t_init;
658 let m = lcp.con_a.len();
659 let duality_gap = |t: f64| (m as f64) / t;
661
662 let mut outer_iters = 0_usize;
663 let mut converged = false;
664
665 for _outer in 0..cfg.max_outer_iters {
666 outer_iters += 1;
667
668 for _inner in 0..cfg.max_inner_iters {
670 let grad = barrier_gradient(&lcp, t, &y);
671 let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
672
673 if grad_norm < cfg.inner_tol {
674 break;
675 }
676
677 let hess = barrier_hessian(&lcp, t, &y);
678 let delta = solve_newton_system(&hess, &grad);
679
680 let newton_dec: f64 = grad
682 .iter()
683 .zip(delta.iter())
684 .map(|(g, d)| g * d)
685 .sum::<f64>();
686 if newton_dec.abs() < cfg.inner_tol {
687 break;
688 }
689
690 let f0 = match barrier_value(&lcp, t, &y) {
692 Some(v) => v,
693 None => break,
694 };
695 let mut step = 1.0_f64;
696 let ls_thresh = cfg.ls_alpha * newton_dec.abs();
697 let y_new = loop {
698 let candidate: Vec<f64> = y
699 .iter()
700 .zip(delta.iter())
701 .map(|(yi, di)| yi + step * di)
702 .collect();
703 if let Some(f_new) = barrier_value(&lcp, t, &candidate) {
704 if f0 - f_new >= step * ls_thresh {
705 break candidate;
706 }
707 }
708 step *= cfg.ls_beta;
709 if step < 1e-20 {
710 break y.clone();
711 }
712 };
713 y = y_new;
714 }
715
716 let gap = duality_gap(t);
718 if gap < cfg.outer_tol {
719 converged = true;
720 break;
721 }
722
723 t *= cfg.mu;
724 }
725
726 let x: Vec<f64> = y.iter().map(|yi| yi.exp()).collect();
728 let obj_value = prob.objective.evaluate(&x)?;
729
730 Ok(GPResult {
731 x,
732 obj_value,
733 outer_iters,
734 converged,
735 gap: (m as f64) / t,
736 })
737}
738
739#[cfg(test)]
742mod tests {
743 use super::*;
744
745 fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
746 (a - b).abs() < tol
747 }
748
749 #[test]
750 fn test_monomial_evaluate() {
751 let m = Monomial::new(2.0, vec![1.0, 2.0]).expect("valid monomial");
753 let val = m.evaluate(&[3.0, 4.0]).expect("evaluation");
754 assert!(approx_eq(val, 96.0, 1e-10));
756 }
757
758 #[test]
759 fn test_posynomial_evaluate() {
760 let m1 = Monomial::new(1.0, vec![1.0, 0.0]).expect("m1");
762 let m2 = Monomial::new(1.0, vec![0.0, 1.0]).expect("m2");
763 let p = Posynomial::new(vec![m1, m2]).expect("posynomial");
764 let val = p.evaluate(&[2.0, 3.0]).expect("eval");
765 assert!(approx_eq(val, 5.0, 1e-10));
766 }
767
768 #[test]
769 fn test_gp_unconstrained() {
770 let m1 = Monomial::new(1.0, vec![1.0]).expect("m1");
776 let m2 = Monomial::new(1.0, vec![-1.0]).expect("m2");
777 let obj = Posynomial::new(vec![m1, m2]).expect("obj");
778 let prob = GPProblem::new(obj, vec![], vec![]).expect("prob");
779 let cfg = GPSolverConfig {
780 initial_y: Some(vec![0.5]),
781 ..Default::default()
782 };
783 let result = solve_gp(&prob, Some(cfg)).expect("solve");
784 assert!(
785 approx_eq(result.obj_value, 2.0, 0.01),
786 "expected ~2.0, got {}",
787 result.obj_value
788 );
789 assert!(
790 approx_eq(result.x[0], 1.0, 0.05),
791 "expected x≈1, got {}",
792 result.x[0]
793 );
794 }
795
796 #[test]
797 fn test_gp_with_constraint() {
798 let m1 = Monomial::new(1.0, vec![1.0, 0.0]).expect("m1");
801 let m2 = Monomial::new(1.0, vec![0.0, 1.0]).expect("m2");
802 let obj = Posynomial::new(vec![m1, m2]).expect("obj");
803
804 let con_mono = Monomial::new(1.0, vec![-1.0, -1.0]).expect("con");
806 let con_posy = Posynomial::new(vec![con_mono]).expect("con_p");
807
808 let prob = GPProblem::new(obj, vec![con_posy], vec![]).expect("prob");
809 let cfg = GPSolverConfig {
810 initial_y: Some(vec![0.0, 0.0]),
811 ..Default::default()
812 };
813 let result = solve_gp(&prob, Some(cfg)).expect("solve");
814 assert!(
815 approx_eq(result.obj_value, 2.0, 0.05),
816 "expected ~2.0, got {}",
817 result.obj_value
818 );
819 }
820
821 #[test]
822 fn test_log_sum_exp_stable() {
823 let v = vec![1000.0, 1001.0, 1002.0];
824 let lse = log_sum_exp(&v);
825 assert!(lse > 1001.0 && lse < 1003.0);
827 }
828
829 #[test]
830 fn test_monomial_invalid_coefficient() {
831 assert!(Monomial::new(-1.0, vec![1.0]).is_err());
832 assert!(Monomial::new(0.0, vec![1.0]).is_err());
833 }
834}