use crate::error::{OptimizeError, OptimizeResult};
use super::implicit_diff::solve_implicit_system;
pub fn kkt_matrix(q: &[Vec<f64>], a: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = q.len();
let p = a.len();
let dim = n + p;
let mut k = vec![vec![0.0_f64; dim]; dim];
for i in 0..n {
for j in 0..n {
k[i][j] = if i < q.len() && j < q[i].len() {
q[i][j]
} else {
0.0
};
}
}
for i in 0..p {
for j in 0..n {
let a_val = if i < a.len() && j < a[i].len() {
a[i][j]
} else {
0.0
};
k[j][n + i] = a_val;
}
}
for i in 0..p {
for j in 0..n {
let a_val = if i < a.len() && j < a[i].len() {
a[i][j]
} else {
0.0
};
k[n + i][j] = a_val;
}
}
k
}
#[derive(Debug, Clone)]
pub struct KktGrad {
pub dl_dq: Vec<Vec<f64>>,
pub dl_dc: Vec<f64>,
pub dl_da: Vec<Vec<f64>>,
pub dl_db: Vec<f64>,
pub dx_adj: Vec<f64>,
pub dnu_adj: Vec<f64>,
}
pub fn kkt_sensitivity(
q: &[Vec<f64>],
a: &[Vec<f64>],
x: &[f64],
nu: &[f64],
dl_dx: &[f64],
) -> OptimizeResult<KktGrad> {
let n = x.len();
let p = nu.len();
if dl_dx.len() != n {
return Err(OptimizeError::InvalidInput(format!(
"dl_dx length {} != n {}",
dl_dx.len(),
n
)));
}
let k = kkt_matrix(q, a);
let dim = n + p;
let mut k_t = vec![vec![0.0_f64; dim]; dim];
for i in 0..dim {
for j in 0..dim {
k_t[i][j] = k[j][i];
}
}
let mut rhs = vec![0.0_f64; dim];
for i in 0..n {
rhs[i] = dl_dx[i];
}
let adj = solve_implicit_system(&k_t, &rhs)?;
let dx_adj = adj[..n].to_vec();
let dnu_adj = adj[n..n + p].to_vec();
let dl_dc: Vec<f64> = dx_adj.iter().map(|&v| -v).collect();
let dl_db: Vec<f64> = dnu_adj.to_vec();
let mut dl_dq = vec![vec![0.0_f64; n]; n];
for i in 0..n {
for j in 0..n {
dl_dq[i][j] = -0.5 * (dx_adj[i] * x[j] + x[i] * dx_adj[j]);
}
}
let mut dl_da = vec![vec![0.0_f64; n]; p];
for i in 0..p {
for j in 0..n {
dl_da[i][j] = -(nu[i] * dx_adj[j] + x[j] * dnu_adj[i]);
}
}
Ok(KktGrad {
dl_dq,
dl_dc,
dl_da,
dl_db,
dx_adj,
dnu_adj,
})
}
#[derive(Debug, Clone)]
pub struct NlpGrad {
pub dl_df_params: Vec<f64>,
pub dl_dg_params: Vec<Vec<f64>>,
pub dl_dh_params: Vec<Vec<f64>>,
pub dx_adj: Vec<f64>,
pub dlambda_adj: Vec<f64>,
pub dnu_adj: Vec<f64>,
}
pub fn parametric_nlp_adjoint(
f_grad: &[f64],
g_jac: &[Vec<f64>],
h_jac: &[Vec<f64>],
x_star: &[f64],
lambda_star: &[f64],
nu_star: &[f64],
dl_dx: &[f64],
) -> OptimizeResult<NlpGrad> {
let n = x_star.len();
let m = lambda_star.len();
let p = nu_star.len();
if dl_dx.len() != n {
return Err(OptimizeError::InvalidInput(format!(
"dl_dx length {} != n {}",
dl_dx.len(),
n
)));
}
let active_tol = 1e-8_f64;
let active_ineq: Vec<usize> = (0..m).filter(|&i| lambda_star[i] > active_tol).collect();
let m_act = active_ineq.len();
let reg = 1e-8_f64;
let dim = n + m_act + p;
let mut jac = vec![vec![0.0_f64; dim]; dim];
for i in 0..n {
jac[i][i] = reg;
}
for (col, &ai) in active_ineq.iter().enumerate() {
for row in 0..n {
let g_val = if ai < g_jac.len() && row < g_jac[ai].len() {
g_jac[ai][row]
} else {
0.0
};
jac[row][n + col] = g_val;
}
}
for i in 0..p {
for row in 0..n {
let h_val = if i < h_jac.len() && row < h_jac[i].len() {
h_jac[i][row]
} else {
0.0
};
jac[row][n + m_act + i] = h_val;
}
}
for (row, &ai) in active_ineq.iter().enumerate() {
let lam_i = lambda_star[ai];
for col in 0..n {
let g_val = if ai < g_jac.len() && col < g_jac[ai].len() {
g_jac[ai][col]
} else {
0.0
};
jac[n + row][col] = lam_i * g_val;
}
}
for i in 0..p {
for col in 0..n {
let h_val = if i < h_jac.len() && col < h_jac[i].len() {
h_jac[i][col]
} else {
0.0
};
jac[n + m_act + i][col] = h_val;
}
}
let mut jac_t = vec![vec![0.0_f64; dim]; dim];
for i in 0..dim {
for j in 0..dim {
jac_t[i][j] = jac[j][i];
}
}
let mut rhs = vec![0.0_f64; dim];
for i in 0..n {
rhs[i] = dl_dx[i];
}
let adj = solve_implicit_system(&jac_t, &rhs)?;
let dx_adj = adj[..n].to_vec();
let dlambda_adj_active = adj[n..n + m_act].to_vec();
let dnu_adj = adj[n + m_act..].to_vec();
let mut dlambda_adj = vec![0.0_f64; m];
for (idx, &ai) in active_ineq.iter().enumerate() {
if idx < dlambda_adj_active.len() {
dlambda_adj[ai] = dlambda_adj_active[idx];
}
}
let dl_df_params = dx_adj.clone();
let mut dl_dg_params = vec![vec![0.0_f64; n]; m];
for i in 0..m {
for j in 0..n {
dl_dg_params[i][j] = dlambda_adj[i] * x_star[j] + lambda_star[i] * dx_adj[j];
}
}
let mut dl_dh_params = vec![vec![0.0_f64; n]; p];
for i in 0..p {
for j in 0..n {
dl_dh_params[i][j] = dnu_adj[i] * x_star[j] + nu_star[i] * dx_adj[j];
}
}
Ok(NlpGrad {
dl_df_params,
dl_dg_params,
dl_dh_params,
dx_adj,
dlambda_adj,
dnu_adj,
})
}
pub fn regularize_q(q: &[Vec<f64>], delta: f64) -> Vec<Vec<f64>> {
let n = q.len();
let mut q_reg = q.to_vec();
for i in 0..n {
if i < q_reg.len() && i < q_reg[i].len() {
q_reg[i][i] += delta;
}
}
q_reg
}
pub fn mat_vec(a: &[Vec<f64>], x: &[f64]) -> Vec<f64> {
a.iter()
.map(|row| {
row.iter()
.zip(x.iter())
.map(|(&a_ij, &x_j)| a_ij * x_j)
.sum()
})
.collect()
}
pub fn outer_product(a: &[f64], b: &[f64]) -> Vec<Vec<f64>> {
a.iter()
.map(|&ai| b.iter().map(|&bj| ai * bj).collect())
.collect()
}
pub fn sym_outer_product(a: &[f64], b: &[f64]) -> Vec<Vec<f64>> {
let n = a.len();
let mut c = vec![vec![0.0_f64; n]; n];
for i in 0..n {
for j in 0..n {
c[i][j] = 0.5 * (a[i] * b[j] + b[i] * a[j]);
}
}
c
}
#[derive(Debug, Clone)]
pub struct KktSystem {
pub matrix: Vec<Vec<f64>>,
pub n: usize,
pub p: usize,
}
impl KktSystem {
pub fn new(q: &[Vec<f64>], a: &[Vec<f64>]) -> Self {
let n = q.len();
let p = a.len();
let matrix = kkt_matrix(q, a);
Self { matrix, n, p }
}
pub fn solve(&self, b1: &[f64], b2: &[f64]) -> OptimizeResult<(Vec<f64>, Vec<f64>)> {
let dim = self.n + self.p;
let mut rhs = Vec::with_capacity(dim);
rhs.extend_from_slice(b1);
rhs.extend_from_slice(b2);
let sol = solve_implicit_system(&self.matrix, &rhs)?;
let x = sol[..self.n].to_vec();
let nu = sol[self.n..].to_vec();
Ok((x, nu))
}
pub fn sensitivity(
&self,
q: &[Vec<f64>],
a: &[Vec<f64>],
x: &[f64],
nu: &[f64],
dl_dx: &[f64],
) -> OptimizeResult<KktGrad> {
kkt_sensitivity(q, a, x, nu, dl_dx)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn numerical_gradient_kkt<F>(f: F, x: &[f64], eps: f64) -> Vec<f64>
where
F: Fn(&[f64]) -> f64,
{
let n = x.len();
let mut grad = vec![0.0_f64; n];
for i in 0..n {
let mut xp = x.to_vec();
let mut xm = x.to_vec();
xp[i] += eps;
xm[i] -= eps;
grad[i] = (f(&xp) - f(&xm)) / (2.0 * eps);
}
grad
}
#[test]
fn test_kkt_matrix_shape() {
let q = vec![
vec![2.0, 0.0, 0.0],
vec![0.0, 3.0, 0.0],
vec![0.0, 0.0, 4.0],
];
let a = vec![vec![1.0, 1.0, 0.0], vec![0.0, 1.0, 1.0]];
let k = kkt_matrix(&q, &a);
assert_eq!(k.len(), 5, "KKT matrix should be 5×5");
assert_eq!(k[0].len(), 5);
assert!((k[0][0] - 2.0).abs() < 1e-12);
assert!((k[1][1] - 3.0).abs() < 1e-12);
assert!((k[2][2] - 4.0).abs() < 1e-12);
assert!((k[0][3] - 1.0).abs() < 1e-12);
assert!((k[1][3] - 1.0).abs() < 1e-12);
assert!((k[2][3] - 0.0).abs() < 1e-12);
assert!((k[3][0] - 1.0).abs() < 1e-12);
assert!((k[3][1] - 1.0).abs() < 1e-12);
assert!((k[4][1] - 1.0).abs() < 1e-12);
assert!((k[4][2] - 1.0).abs() < 1e-12);
assert!((k[3][3]).abs() < 1e-12);
assert!((k[4][4]).abs() < 1e-12);
}
#[test]
fn test_kkt_matrix_symmetry() {
let q = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
let a = vec![vec![1.0, 2.0]];
let k = kkt_matrix(&q, &a);
let dim = k.len();
for i in 0..dim {
for j in 0..dim {
assert!(
(k[i][j] - k[j][i]).abs() < 1e-12,
"KKT matrix not symmetric at ({},{}): {} vs {}",
i,
j,
k[i][j],
k[j][i]
);
}
}
}
#[test]
fn test_kkt_sensitivity_unconstrained() {
let q = vec![vec![2.0, 0.0], vec![0.0, 2.0]];
let a: Vec<Vec<f64>> = vec![];
let x = vec![-0.5, -1.0]; let nu: Vec<f64> = vec![];
let dl_dx = vec![1.0, 0.0];
let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
assert!(
(grad.dl_dc[0] - (-0.5)).abs() < 1e-10,
"dl/dc[0] = {} (expected -0.5)",
grad.dl_dc[0]
);
assert!(
grad.dl_dc[1].abs() < 1e-10,
"dl/dc[1] = {} (expected 0.0)",
grad.dl_dc[1]
);
}
#[test]
fn test_kkt_sensitivity_with_equality() {
let q = vec![vec![1.0, 0.0], vec![0.0, 1.0]]; let a = vec![vec![1.0, 1.0]];
let x = vec![0.5, 0.5];
let nu = vec![-0.5];
let dl_dx = vec![1.0, 0.0];
let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
assert!(grad.dl_dc[0].is_finite(), "dl/dc[0] not finite");
assert!(grad.dl_dc[1].is_finite(), "dl/dc[1] not finite");
assert!(grad.dl_db[0].is_finite(), "dl/db[0] not finite");
}
#[test]
fn test_kkt_sensitivity_gradient_check_c() {
let q = vec![vec![4.0, 0.0], vec![0.0, 4.0]];
let a: Vec<Vec<f64>> = vec![];
let c = vec![2.0_f64, 0.0];
let x = vec![-0.5_f64, 0.0]; let nu: Vec<f64> = vec![];
let dl_dx = vec![-0.5_f64, 0.0];
let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
let eps = 1e-5_f64;
let solve_unconstrained = |c_vec: &[f64]| -> Vec<f64> {
c_vec
.iter()
.enumerate()
.map(|(i, &ci)| -ci / q[i][i])
.collect()
};
let mut c_plus = c.clone();
c_plus[0] += eps;
let x_plus = solve_unconstrained(&c_plus);
let mut c_minus = c.clone();
c_minus[0] -= eps;
let x_minus = solve_unconstrained(&c_minus);
let loss = |xv: &[f64]| -> f64 { xv.iter().map(|&xi| 0.5 * xi * xi).sum() };
let fd_dc0 = (loss(&x_plus) - loss(&x_minus)) / (2.0 * eps);
assert!(
(grad.dl_dc[0] - fd_dc0).abs() < 1e-5,
"KKT sensitivity dL/dc[0] = {} vs FD = {}",
grad.dl_dc[0],
fd_dc0
);
}
#[test]
fn test_kkt_system_struct() {
let q = vec![vec![4.0, 0.0], vec![0.0, 4.0]];
let a = vec![vec![1.0, 1.0]];
let sys = KktSystem::new(&q, &a);
assert_eq!(sys.n, 2);
assert_eq!(sys.p, 1);
assert_eq!(sys.matrix.len(), 3);
let b1 = vec![0.0, 0.0]; let b2 = vec![1.0]; let (x, nu) = sys.solve(&b1, &b2).expect("KKT solve failed");
assert!((x[0] - 0.5).abs() < 1e-10, "x[0] = {} (expected 0.5)", x[0]);
assert!((x[1] - 0.5).abs() < 1e-10, "x[1] = {} (expected 0.5)", x[1]);
assert!(
(nu[0] - (-2.0)).abs() < 1e-10,
"ν[0] = {} (expected -2.0)",
nu[0]
);
}
#[test]
fn test_outer_product() {
let a = vec![1.0, 2.0];
let b = vec![3.0, 4.0];
let c = outer_product(&a, &b);
assert!((c[0][0] - 3.0).abs() < 1e-12);
assert!((c[0][1] - 4.0).abs() < 1e-12);
assert!((c[1][0] - 6.0).abs() < 1e-12);
assert!((c[1][1] - 8.0).abs() < 1e-12);
}
#[test]
fn test_sym_outer_product() {
let a = vec![1.0, 2.0];
let b = vec![3.0, 4.0];
let c = sym_outer_product(&a, &b);
assert!((c[0][0] - 3.0).abs() < 1e-12); assert!((c[1][1] - 8.0).abs() < 1e-12); assert!((c[0][1] - c[1][0]).abs() < 1e-12);
}
#[test]
fn test_nlp_adjoint_unconstrained() {
let n = 3_usize;
let f_grad = vec![1.0, 2.0, 3.0_f64];
let g_jac: Vec<Vec<f64>> = vec![];
let h_jac: Vec<Vec<f64>> = vec![];
let x_star = vec![0.5, -0.5, 0.0_f64];
let lambda_star: Vec<f64> = vec![];
let nu_star: Vec<f64> = vec![];
let dl_dx = vec![1.0, 0.0, 0.0_f64];
let grad = parametric_nlp_adjoint(
&f_grad,
&g_jac,
&h_jac,
&x_star,
&lambda_star,
&nu_star,
&dl_dx,
)
.expect("NLP adjoint failed");
let reg = 1e-8_f64;
assert!(
(grad.dx_adj[0] - dl_dx[0] / reg).abs() < 1e-3 * (dl_dx[0] / reg).abs() + 1e-8,
"dx_adj[0] = {} (expected ~{})",
grad.dx_adj[0],
dl_dx[0] / reg
);
assert_eq!(grad.dl_df_params.len(), n);
assert_eq!(grad.dlambda_adj.len(), 0);
assert_eq!(grad.dnu_adj.len(), 0);
}
#[test]
fn test_regularize_q() {
let q = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
let q_reg = regularize_q(&q, 0.5);
assert!((q_reg[0][0] - 2.5).abs() < 1e-12);
assert!((q_reg[1][1] - 3.5).abs() < 1e-12);
assert!((q_reg[0][1] - 1.0).abs() < 1e-12); }
#[test]
fn test_mat_vec() {
let a = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let x = vec![1.0, 2.0];
let y = mat_vec(&a, &x);
assert!((y[0] - 5.0).abs() < 1e-12);
assert!((y[1] - 11.0).abs() < 1e-12);
}
#[allow(dead_code)]
fn _use_numerical_gradient(grad_fn: impl Fn(&[f64]) -> f64, x: &[f64]) -> Vec<f64> {
numerical_gradient_kkt(grad_fn, x, 1e-6)
}
}