use std::fmt;
use scirs2_core::ndarray::{Array1, Array2};
use crate::error::{OptimizeError, OptimizeResult};
#[derive(Debug, Clone)]
pub struct Monomial {
pub coefficient: f64,
pub exponents: Vec<f64>,
}
impl Monomial {
pub fn new(coefficient: f64, exponents: Vec<f64>) -> OptimizeResult<Self> {
if coefficient <= 0.0 {
return Err(OptimizeError::ValueError(
"monomial coefficient must be strictly positive".into(),
));
}
Ok(Self { coefficient, exponents })
}
pub fn evaluate(&self, x: &[f64]) -> OptimizeResult<f64> {
if x.len() != self.exponents.len() {
return Err(OptimizeError::ValueError(format!(
"monomial has {} exponents but x has {} components",
self.exponents.len(),
x.len()
)));
}
let mut val = self.coefficient;
for (xi, ai) in x.iter().zip(self.exponents.iter()) {
if *xi <= 0.0 {
return Err(OptimizeError::ValueError(
"GP variables must be strictly positive".into(),
));
}
val *= xi.powf(*ai);
}
Ok(val)
}
#[inline]
pub fn n_vars(&self) -> usize {
self.exponents.len()
}
}
impl fmt::Display for Monomial {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:.4}", self.coefficient)?;
for (i, ai) in self.exponents.iter().enumerate() {
if *ai != 0.0 {
write!(f, " · x{}^{:.4}", i + 1, ai)?;
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Posynomial {
pub terms: Vec<Monomial>,
}
impl Posynomial {
pub fn new(terms: Vec<Monomial>) -> OptimizeResult<Self> {
if terms.is_empty() {
return Err(OptimizeError::ValueError("posynomial must have at least one term".into()));
}
let n = terms[0].n_vars();
for (k, t) in terms.iter().enumerate() {
if t.n_vars() != n {
return Err(OptimizeError::ValueError(format!(
"term {} has {} variables but term 0 has {}",
k,
t.n_vars(),
n
)));
}
}
Ok(Self { terms })
}
pub fn evaluate(&self, x: &[f64]) -> OptimizeResult<f64> {
let mut sum = 0.0;
for t in &self.terms {
sum += t.evaluate(x)?;
}
Ok(sum)
}
#[inline]
pub fn n_vars(&self) -> usize {
self.terms[0].n_vars()
}
#[inline]
pub fn n_terms(&self) -> usize {
self.terms.len()
}
}
#[derive(Debug)]
pub struct GPProblem {
pub objective: Posynomial,
pub ineq_constraints: Vec<Posynomial>,
pub eq_constraints: Vec<Monomial>,
}
impl GPProblem {
pub fn new(
objective: Posynomial,
ineq_constraints: Vec<Posynomial>,
eq_constraints: Vec<Monomial>,
) -> OptimizeResult<Self> {
let n = objective.n_vars();
for (i, c) in ineq_constraints.iter().enumerate() {
if c.n_vars() != n {
return Err(OptimizeError::ValueError(format!(
"inequality constraint {} has {} variables; expected {}",
i, c.n_vars(), n
)));
}
}
for (j, c) in eq_constraints.iter().enumerate() {
if c.n_vars() != n {
return Err(OptimizeError::ValueError(format!(
"equality constraint {} has {} variables; expected {}",
j, c.n_vars(), n
)));
}
}
Ok(Self { objective, ineq_constraints, eq_constraints })
}
#[inline]
pub fn n_vars(&self) -> usize {
self.objective.n_vars()
}
}
#[derive(Debug, Clone)]
pub struct GPSolverConfig {
pub max_outer_iters: usize,
pub max_inner_iters: usize,
pub outer_tol: f64,
pub inner_tol: f64,
pub t_init: f64,
pub mu: f64,
pub initial_y: Option<Vec<f64>>,
pub ls_alpha: f64,
pub ls_beta: f64,
}
impl Default for GPSolverConfig {
fn default() -> Self {
Self {
max_outer_iters: 50,
max_inner_iters: 30,
outer_tol: 1e-8,
inner_tol: 1e-8,
t_init: 1.0,
mu: 10.0,
initial_y: None,
ls_alpha: 0.01,
ls_beta: 0.5,
}
}
}
#[derive(Debug)]
pub struct GPResult {
pub x: Vec<f64>,
pub obj_value: f64,
pub outer_iters: usize,
pub converged: bool,
pub gap: f64,
}
#[derive(Debug)]
pub struct LogConvexProblem {
pub obj_a: Array2<f64>,
pub obj_b: Array1<f64>,
pub con_a: Vec<Array2<f64>>,
pub con_b: Vec<Array1<f64>>,
pub eq_a: Array2<f64>,
pub eq_b: Array1<f64>,
}
pub fn gp_to_convex(prob: &GPProblem) -> LogConvexProblem {
let n = prob.n_vars();
let (obj_a, obj_b) = posynomial_to_log_matrices(&prob.objective, n);
let (con_a, con_b): (Vec<_>, Vec<_>) = prob
.ineq_constraints
.iter()
.map(|c| posynomial_to_log_matrices(c, n))
.unzip();
let n_eq = prob.eq_constraints.len();
let mut eq_a = Array2::<f64>::zeros((n_eq.max(1), n));
let mut eq_b = Array1::<f64>::zeros(n_eq.max(1));
for (j, m) in prob.eq_constraints.iter().enumerate() {
for (k, a) in m.exponents.iter().enumerate() {
eq_a[[j, k]] = *a;
}
eq_b[j] = m.coefficient.ln();
}
LogConvexProblem { obj_a, obj_b, con_a, con_b, eq_a, eq_b }
}
fn posynomial_to_log_matrices(p: &Posynomial, n: usize) -> (Array2<f64>, Array1<f64>) {
let k = p.n_terms();
let mut a = Array2::<f64>::zeros((k, n));
let mut b = Array1::<f64>::zeros(k);
for (row, term) in p.terms.iter().enumerate() {
for (col, exp) in term.exponents.iter().enumerate() {
a[[row, col]] = *exp;
}
b[row] = term.coefficient.ln();
}
(a, b)
}
fn log_sum_exp(v: &[f64]) -> f64 {
if v.is_empty() {
return f64::NEG_INFINITY;
}
let max_v = v.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
if max_v == f64::NEG_INFINITY {
return f64::NEG_INFINITY;
}
let sum: f64 = v.iter().map(|vi| (vi - max_v).exp()).sum();
max_v + sum.ln()
}
fn softmax(v: &[f64]) -> Vec<f64> {
let lse = log_sum_exp(v);
v.iter().map(|vi| (vi - lse).exp()).collect()
}
fn lse_objective(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> f64 {
let v: Vec<f64> = (0..a.nrows()).map(|k| {
let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
inner + b[k]
}).collect();
log_sum_exp(&v)
}
fn lse_gradient(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> Vec<f64> {
let v: Vec<f64> = (0..a.nrows()).map(|k| {
let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
inner + b[k]
}).collect();
let sm = softmax(&v);
let n = a.ncols();
let mut grad = vec![0.0_f64; n];
for k in 0..a.nrows() {
for j in 0..n {
grad[j] += sm[k] * a[[k, j]];
}
}
grad
}
fn lse_hessian(a: &Array2<f64>, b: &Array1<f64>, y: &[f64]) -> Array2<f64> {
let v: Vec<f64> = (0..a.nrows()).map(|k| {
let inner: f64 = (0..a.ncols()).map(|j| a[[k, j]] * y[j]).sum();
inner + b[k]
}).collect();
let sm = softmax(&v);
let n = a.ncols();
let mut h = Array2::<f64>::zeros((n, n));
for k in 0..a.nrows() {
for i in 0..n {
for j in 0..n {
h[[i, j]] += sm[k] * a[[k, i]] * a[[k, j]];
}
}
}
let g = lse_gradient(a, b, y);
for i in 0..n {
for j in 0..n {
h[[i, j]] -= g[i] * g[j];
}
}
h
}
fn solve_newton_system(h: &Array2<f64>, g: &[f64]) -> Vec<f64> {
let n = h.nrows();
let reg = 1e-12;
let mut mat: Vec<Vec<f64>> = (0..n).map(|i| {
let mut row: Vec<f64> = (0..n).map(|j| h[[i, j]]).collect();
row[i] += reg;
row.push(-g[i]);
row
}).collect();
for col in 0..n {
let mut max_row = col;
let mut max_val = mat[col][col].abs();
for row in (col + 1)..n {
let v = mat[row][col].abs();
if v > max_val {
max_val = v;
max_row = row;
}
}
mat.swap(col, max_row);
let pivot = mat[col][col];
if pivot.abs() < 1e-30 {
continue;
}
let inv_pivot = 1.0 / pivot;
for j in col..=n {
mat[col][j] *= inv_pivot;
}
for row in 0..n {
if row == col {
continue;
}
let factor = mat[row][col];
for j in col..=n {
let delta = factor * mat[col][j];
mat[row][j] -= delta;
}
}
}
(0..n).map(|i| mat[i][n]).collect()
}
fn barrier_value(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Option<f64> {
let mut val = t * lse_objective(&lcp.obj_a, &lcp.obj_b, y);
for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
let lse_c = lse_objective(ca, cb, y);
if lse_c >= 0.0 {
return None; }
val -= (-lse_c).ln();
}
Some(val)
}
fn barrier_gradient(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Vec<f64> {
let n = y.len();
let mut grad: Vec<f64> = lse_gradient(&lcp.obj_a, &lcp.obj_b, y)
.iter()
.map(|g| t * g)
.collect();
for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
let lse_c = lse_objective(ca, cb, y);
let factor = 1.0 / (-lse_c).max(1e-30);
let gc = lse_gradient(ca, cb, y);
for j in 0..n {
grad[j] += factor * gc[j];
}
}
grad
}
fn barrier_hessian(lcp: &LogConvexProblem, t: f64, y: &[f64]) -> Array2<f64> {
let n = y.len();
let mut h = lse_hessian(&lcp.obj_a, &lcp.obj_b, y);
for i in 0..n {
for j in 0..n {
h[[i, j]] *= t;
}
}
for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
let lse_c = lse_objective(ca, cb, y);
let s = (-lse_c).max(1e-30);
let gc = lse_gradient(ca, cb, y);
let hc = lse_hessian(ca, cb, y);
let inv_s = 1.0 / s;
let inv_s2 = inv_s * inv_s;
for i in 0..n {
for j in 0..n {
h[[i, j]] += inv_s * hc[[i, j]] + inv_s2 * gc[i] * gc[j];
}
}
}
h
}
fn find_feasible_point(lcp: &LogConvexProblem, y0: &[f64]) -> OptimizeResult<Vec<f64>> {
let n = y0.len();
let m = lcp.con_a.len();
if m == 0 {
return Ok(y0.to_vec());
}
let infeasible = lcp.con_a.iter().zip(lcp.con_b.iter()).any(|(ca, cb)| {
lse_objective(ca, cb, y0) >= 0.0
});
if !infeasible {
return Ok(y0.to_vec());
}
let max_lse: f64 = lcp.con_a.iter().zip(lcp.con_b.iter())
.map(|(ca, cb)| lse_objective(ca, cb, y0))
.fold(f64::NEG_INFINITY, f64::max);
let s_init = max_lse + 1.0;
let mut y_aug: Vec<f64> = y0.iter().cloned().chain(std::iter::once(s_init)).collect();
for _iter in 0..200 {
let s = y_aug[n];
let mut g_y = vec![0.0_f64; n];
let mut g_s = 1.0_f64;
for (ca, cb) in lcp.con_a.iter().zip(lcp.con_b.iter()) {
let lse_c = lse_objective(ca, cb, &y_aug[..n]);
let margin = lse_c - s + 0.1;
if margin > 0.0 {
let gc = lse_gradient(ca, cb, &y_aug[..n]);
for j in 0..n {
g_y[j] += gc[j];
}
g_s -= 1.0;
}
}
let step = 0.01;
for j in 0..n {
y_aug[j] -= step * g_y[j];
}
y_aug[n] -= step * g_s;
let s_new = y_aug[n];
if s_new < -0.1 {
return Ok(y_aug[..n].to_vec());
}
}
let feasible = lcp.con_a.iter().zip(lcp.con_b.iter()).all(|(ca, cb)| {
lse_objective(ca, cb, &y_aug[..n]) < 0.0
});
if feasible {
Ok(y_aug[..n].to_vec())
} else {
Err(OptimizeError::InitializationError(
"could not find a strictly feasible starting point for the GP".into(),
))
}
}
pub fn solve_gp(prob: &GPProblem, config: Option<GPSolverConfig>) -> OptimizeResult<GPResult> {
let cfg = config.unwrap_or_default();
let lcp = gp_to_convex(prob);
let n = prob.n_vars();
let y0: Vec<f64> = cfg.initial_y.clone().unwrap_or_else(|| vec![0.0_f64; n]);
if y0.len() != n {
return Err(OptimizeError::ValueError(format!(
"initial_y has length {} but problem has {} variables",
y0.len(),
n
)));
}
let mut y = find_feasible_point(&lcp, &y0)?;
let mut t = cfg.t_init;
let m = lcp.con_a.len();
let duality_gap = |t: f64| (m as f64) / t;
let mut outer_iters = 0_usize;
let mut converged = false;
for _outer in 0..cfg.max_outer_iters {
outer_iters += 1;
for _inner in 0..cfg.max_inner_iters {
let grad = barrier_gradient(&lcp, t, &y);
let grad_norm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
if grad_norm < cfg.inner_tol {
break;
}
let hess = barrier_hessian(&lcp, t, &y);
let delta = solve_newton_system(&hess, &grad);
let newton_dec: f64 = grad.iter().zip(delta.iter()).map(|(g, d)| g * d).sum::<f64>();
if newton_dec.abs() < cfg.inner_tol {
break;
}
let f0 = match barrier_value(&lcp, t, &y) {
Some(v) => v,
None => break,
};
let mut step = 1.0_f64;
let ls_thresh = cfg.ls_alpha * newton_dec.abs();
let y_new = loop {
let candidate: Vec<f64> =
y.iter().zip(delta.iter()).map(|(yi, di)| yi + step * di).collect();
if let Some(f_new) = barrier_value(&lcp, t, &candidate) {
if f0 - f_new >= step * ls_thresh {
break candidate;
}
}
step *= cfg.ls_beta;
if step < 1e-20 {
break y.clone();
}
};
y = y_new;
}
let gap = duality_gap(t);
if gap < cfg.outer_tol {
converged = true;
break;
}
t *= cfg.mu;
}
let x: Vec<f64> = y.iter().map(|yi| yi.exp()).collect();
let obj_value = prob.objective.evaluate(&x)?;
Ok(GPResult {
x,
obj_value,
outer_iters,
converged,
gap: (m as f64) / t,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
(a - b).abs() < tol
}
#[test]
fn test_monomial_evaluate() {
let m = Monomial::new(2.0, vec![1.0, 2.0]).expect("valid monomial");
let val = m.evaluate(&[3.0, 4.0]).expect("evaluation");
assert!(approx_eq(val, 96.0, 1e-10));
}
#[test]
fn test_posynomial_evaluate() {
let m1 = Monomial::new(1.0, vec![1.0, 0.0]).expect("m1");
let m2 = Monomial::new(1.0, vec![0.0, 1.0]).expect("m2");
let p = Posynomial::new(vec![m1, m2]).expect("posynomial");
let val = p.evaluate(&[2.0, 3.0]).expect("eval");
assert!(approx_eq(val, 5.0, 1e-10));
}
#[test]
fn test_gp_unconstrained() {
let m1 = Monomial::new(1.0, vec![1.0]).expect("m1");
let m2 = Monomial::new(1.0, vec![-1.0]).expect("m2");
let obj = Posynomial::new(vec![m1, m2]).expect("obj");
let prob = GPProblem::new(obj, vec![], vec![]).expect("prob");
let cfg = GPSolverConfig { initial_y: Some(vec![0.5]), ..Default::default() };
let result = solve_gp(&prob, Some(cfg)).expect("solve");
assert!(approx_eq(result.obj_value, 2.0, 0.01),
"expected ~2.0, got {}", result.obj_value);
assert!(approx_eq(result.x[0], 1.0, 0.05),
"expected x≈1, got {}", result.x[0]);
}
#[test]
fn test_gp_with_constraint() {
let m1 = Monomial::new(1.0, vec![1.0, 0.0]).expect("m1");
let m2 = Monomial::new(1.0, vec![0.0, 1.0]).expect("m2");
let obj = Posynomial::new(vec![m1, m2]).expect("obj");
let con_mono = Monomial::new(1.0, vec![-1.0, -1.0]).expect("con");
let con_posy = Posynomial::new(vec![con_mono]).expect("con_p");
let prob = GPProblem::new(obj, vec![con_posy], vec![]).expect("prob");
let cfg = GPSolverConfig {
initial_y: Some(vec![0.0, 0.0]),
..Default::default()
};
let result = solve_gp(&prob, Some(cfg)).expect("solve");
assert!(approx_eq(result.obj_value, 2.0, 0.05),
"expected ~2.0, got {}", result.obj_value);
}
#[test]
fn test_log_sum_exp_stable() {
let v = vec![1000.0, 1001.0, 1002.0];
let lse = log_sum_exp(&v);
assert!(lse > 1001.0 && lse < 1003.0);
}
#[test]
fn test_monomial_invalid_coefficient() {
assert!(Monomial::new(-1.0, vec![1.0]).is_err());
assert!(Monomial::new(0.0, vec![1.0]).is_err());
}
}