scirs2_optimize/differentiable_optimization/kkt_sensitivity.rs
1//! KKT sensitivity analysis for differentiable optimization layers.
2//!
3//! Given an equality-constrained QP:
4//!
5//! min ½ xᵀQx + cᵀx s.t. Ax = b
6//!
7//! the KKT conditions are:
8//!
9//! Qx + c + Aᵀν = 0 (stationarity)
10//! Ax = b (primal feasibility)
11//!
12//! The bordered KKT system is:
13//!
14//! K = [Q Aᵀ]
15//! [A 0 ]
16//!
17//! Differentiating through the KKT conditions gives the adjoint system:
18//!
19//! Kᵀ [dx_adj; dν_adj] = [dL/dx; 0]
20//!
21//! from which parameter gradients can be extracted.
22//!
23//! For the general nonlinear case, we use the parametric NLP adjoint method
24//! to differentiate through solutions of:
25//!
26//! min f(x, θ) s.t. g(x, θ) ≤ 0, h(x, θ) = 0
27
28use crate::error::{OptimizeError, OptimizeResult};
29
30use super::implicit_diff::solve_implicit_system;
31
32// ─────────────────────────────────────────────────────────────────────────────
33// KKT matrix assembly
34// ─────────────────────────────────────────────────────────────────────────────
35
36/// Assemble the bordered KKT matrix for the equality-constrained QP:
37///
38/// min ½ xᵀQx + cᵀx s.t. Ax = b
39///
40/// The bordered system is:
41///
42/// K = [Q Aᵀ] size: (n+p) × (n+p)
43/// [A 0 ]
44///
45/// # Arguments
46/// * `q` – n×n cost matrix (symmetric PSD).
47/// * `a` – p×n equality constraint matrix.
48///
49/// # Returns
50/// The bordered KKT matrix as a row-major `Vec<Vec<f64>>`.
51pub fn kkt_matrix(q: &[Vec<f64>], a: &[Vec<f64>]) -> Vec<Vec<f64>> {
52 let n = q.len();
53 let p = a.len();
54 let dim = n + p;
55
56 let mut k = vec![vec![0.0_f64; dim]; dim];
57
58 // Block (0,0): Q (n×n)
59 for i in 0..n {
60 for j in 0..n {
61 k[i][j] = if i < q.len() && j < q[i].len() {
62 q[i][j]
63 } else {
64 0.0
65 };
66 }
67 }
68
69 // Block (0,1): Aᵀ (n×p)
70 for i in 0..p {
71 for j in 0..n {
72 let a_val = if i < a.len() && j < a[i].len() {
73 a[i][j]
74 } else {
75 0.0
76 };
77 k[j][n + i] = a_val;
78 }
79 }
80
81 // Block (1,0): A (p×n)
82 for i in 0..p {
83 for j in 0..n {
84 let a_val = if i < a.len() && j < a[i].len() {
85 a[i][j]
86 } else {
87 0.0
88 };
89 k[n + i][j] = a_val;
90 }
91 }
92
93 // Block (1,1): 0 (p×p) — already zero from initialization
94 k
95}
96
97// ─────────────────────────────────────────────────────────────────────────────
98// KKT sensitivity gradients
99// ─────────────────────────────────────────────────────────────────────────────
100
101/// Gradients of the loss w.r.t. QP parameters, computed via KKT sensitivity.
102#[derive(Debug, Clone)]
103pub struct KktGrad {
104 /// Gradient dL/dQ (n×n symmetric).
105 pub dl_dq: Vec<Vec<f64>>,
106 /// Gradient dL/dc (n).
107 pub dl_dc: Vec<f64>,
108 /// Gradient dL/dA (p×n).
109 pub dl_da: Vec<Vec<f64>>,
110 /// Gradient dL/db (p).
111 pub dl_db: Vec<f64>,
112 /// Adjoint of x (dx_adj, n).
113 pub dx_adj: Vec<f64>,
114 /// Adjoint of λ (dnu_adj, p).
115 pub dnu_adj: Vec<f64>,
116}
117
118/// Compute KKT sensitivity gradients for the equality-constrained QP.
119///
120/// Given the optimal primal x* and dual ν*, and the upstream gradient dL/dx,
121/// solve the adjoint KKT system:
122///
123/// [Q Aᵀ]ᵀ [dx_adj; dν_adj] = [dL/dx; 0]
124///
125/// Then extract:
126///
127/// dL/dQ = ½ (x dx_adjᵀ + dx_adj xᵀ) (symmetric outer product)
128/// dL/dc = dx_adj
129/// dL/dA = dν_adj xᵀ + ν dx_adjᵀ (via chain rule)
130/// dL/db = -dν_adj
131///
132/// # Arguments
133/// * `q` – n×n cost matrix.
134/// * `a` – p×n equality constraint matrix.
135/// * `x` – optimal primal solution (length n).
136/// * `nu` – optimal dual variables (length p).
137/// * `dl_dx` – upstream gradient dL/dx (length n).
138///
139/// # Errors
140/// Returns `OptimizeError::ComputationError` if the KKT system is singular.
141pub fn kkt_sensitivity(
142 q: &[Vec<f64>],
143 a: &[Vec<f64>],
144 x: &[f64],
145 nu: &[f64],
146 dl_dx: &[f64],
147) -> OptimizeResult<KktGrad> {
148 let n = x.len();
149 let p = nu.len();
150
151 if dl_dx.len() != n {
152 return Err(OptimizeError::InvalidInput(format!(
153 "dl_dx length {} != n {}",
154 dl_dx.len(),
155 n
156 )));
157 }
158
159 // Build the bordered KKT matrix K = [Q Aᵀ; A 0]
160 let k = kkt_matrix(q, a);
161
162 // Transpose K (K is symmetric for equality-constrained QP when Q is symmetric,
163 // but we solve the adjoint system explicitly for correctness in general case)
164 let dim = n + p;
165 let mut k_t = vec![vec![0.0_f64; dim]; dim];
166 for i in 0..dim {
167 for j in 0..dim {
168 k_t[i][j] = k[j][i];
169 }
170 }
171
172 // RHS for adjoint system: [dL/dx; 0_p]
173 let mut rhs = vec![0.0_f64; dim];
174 for i in 0..n {
175 rhs[i] = dl_dx[i];
176 }
177 // rhs[n..n+p] = 0 (already zero)
178
179 // Solve: Kᵀ [dx_adj; dν_adj] = [dL/dx; 0]
180 let adj = solve_implicit_system(&k_t, &rhs)?;
181
182 let dx_adj = adj[..n].to_vec();
183 let dnu_adj = adj[n..n + p].to_vec();
184
185 // ── Compute parameter gradients via adjoint method ────────────────────
186 // The adjoint rule: dL/dθ = -(∂F/∂θ)ᵀ adj
187 // where adj = [dx_adj; dν_adj] satisfies Kᵀ adj = [dL/dx; 0].
188
189 // ∂F₁/∂c = I → dL/dc = -(I·dx_adj) = -dx_adj
190 let dl_dc: Vec<f64> = dx_adj.iter().map(|&v| -v).collect();
191
192 // ∂F₂/∂b = -I → dL/db = -(-I·dν_adj) = dν_adj
193 let dl_db: Vec<f64> = dnu_adj.to_vec();
194
195 // ∂F₁/∂Q_{ij} = x_j (symmetric: we take ½ for the symmetrized form)
196 // dL/dQ_{ij} = -(dx_adj_i * x_j) → symmetric: -½(x·dx_adjᵀ + dx_adj·xᵀ)
197 let mut dl_dq = vec![vec![0.0_f64; n]; n];
198 for i in 0..n {
199 for j in 0..n {
200 dl_dq[i][j] = -0.5 * (dx_adj[i] * x[j] + x[i] * dx_adj[j]);
201 }
202 }
203
204 // ∂F₁/∂A_{ij} = ν_i δ_j· (∂(Aᵀν)_j / ∂A_{ij} = ν_i)
205 // ∂F₂/∂A_{ij} = x_j δ_i· (∂(Ax)_i / ∂A_{ij} = x_j)
206 // dL/dA_{ij} = -(ν_i * dx_adj_j + x_j * dν_adj_i)
207 let mut dl_da = vec![vec![0.0_f64; n]; p];
208 for i in 0..p {
209 for j in 0..n {
210 dl_da[i][j] = -(nu[i] * dx_adj[j] + x[j] * dnu_adj[i]);
211 }
212 }
213
214 Ok(KktGrad {
215 dl_dq,
216 dl_dc,
217 dl_da,
218 dl_db,
219 dx_adj,
220 dnu_adj,
221 })
222}
223
224// ─────────────────────────────────────────────────────────────────────────────
225// Parametric NLP adjoint
226// ─────────────────────────────────────────────────────────────────────────────
227
228/// Gradients for a general nonlinear program via the adjoint method.
229#[derive(Debug, Clone)]
230pub struct NlpGrad {
231 /// Gradient of loss w.r.t. objective gradient parameters (n).
232 pub dl_df_params: Vec<f64>,
233 /// Gradient of loss w.r.t. inequality constraint parameters (m × n adjoint).
234 pub dl_dg_params: Vec<Vec<f64>>,
235 /// Gradient of loss w.r.t. equality constraint parameters (p × n adjoint).
236 pub dl_dh_params: Vec<Vec<f64>>,
237 /// Adjoint of primal variables (dx_adj).
238 pub dx_adj: Vec<f64>,
239 /// Adjoint of inequality duals (dlambda_adj).
240 pub dlambda_adj: Vec<f64>,
241 /// Adjoint of equality duals (dnu_adj).
242 pub dnu_adj: Vec<f64>,
243}
244
245/// Compute the parametric NLP adjoint gradients.
246///
247/// Given the optimal solution (x*, λ*, ν*) of:
248///
249/// min f(x) s.t. g(x) ≤ 0, h(x) = 0
250///
251/// and Jacobians:
252/// - `f_grad` : ∇f(x*) evaluated at x* (length n)
253/// - `g_jac` : ∂g/∂x at x* (m×n row-major)
254/// - `h_jac` : ∂h/∂x at x* (p×n row-major)
255///
256/// the KKT stationarity condition is:
257///
258/// ∇f + Gᵀλ + Hᵀν = 0
259///
260/// We solve the bordered KKT adjoint system using only the active inequality
261/// constraints (those with λ_i > 0).
262///
263/// # Arguments
264/// * `f_grad` – ∇f(x*) (length n).
265/// * `g_jac` – Jacobian of inequality constraints at x* (m×n).
266/// * `h_jac` – Jacobian of equality constraints at x* (p×n).
267/// * `x_star` – optimal primal (length n).
268/// * `lambda_star` – optimal inequality duals (length m, ≥ 0).
269/// * `nu_star` – optimal equality duals (length p).
270/// * `dl_dx` – upstream gradient dL/dx* (length n).
271///
272/// # Errors
273/// Returns `OptimizeError::ComputationError` if the adjoint system is singular.
274pub fn parametric_nlp_adjoint(
275 f_grad: &[f64],
276 g_jac: &[Vec<f64>],
277 h_jac: &[Vec<f64>],
278 x_star: &[f64],
279 lambda_star: &[f64],
280 nu_star: &[f64],
281 dl_dx: &[f64],
282) -> OptimizeResult<NlpGrad> {
283 let n = x_star.len();
284 let m = lambda_star.len();
285 let p = nu_star.len();
286
287 if dl_dx.len() != n {
288 return Err(OptimizeError::InvalidInput(format!(
289 "dl_dx length {} != n {}",
290 dl_dx.len(),
291 n
292 )));
293 }
294
295 // Identify active inequality constraints: λ_i > 0 (complementary slackness)
296 let active_tol = 1e-8_f64;
297 let active_ineq: Vec<usize> = (0..m).filter(|&i| lambda_star[i] > active_tol).collect();
298 let m_act = active_ineq.len();
299
300 // Build the bordered KKT Jacobian for the active constraints:
301 //
302 // [∇²L_xx G_act^T H^T ] size: (n + m_act + p) × (n + m_act + p)
303 // [diag(λ_act) G_act 0 ]
304 // [H 0 0 ]
305 //
306 // For the adjoint method we approximate ∇²L_xx ≈ Q (from f_grad structure).
307 // Since we only have f_grad (not the Hessian), we use a rank-0 approximation
308 // with a small regularization: ∇²L_xx ≈ reg * I.
309 let reg = 1e-8_f64;
310 let dim = n + m_act + p;
311 let mut jac = vec![vec![0.0_f64; dim]; dim];
312
313 // Block (0,0): reg * I (n×n)
314 for i in 0..n {
315 jac[i][i] = reg;
316 }
317
318 // Block (0,1): G_act^T (n×m_act) — columns from active rows of g_jac
319 for (col, &ai) in active_ineq.iter().enumerate() {
320 for row in 0..n {
321 let g_val = if ai < g_jac.len() && row < g_jac[ai].len() {
322 g_jac[ai][row]
323 } else {
324 0.0
325 };
326 jac[row][n + col] = g_val;
327 }
328 }
329
330 // Block (0,2): H^T (n×p)
331 for i in 0..p {
332 for row in 0..n {
333 let h_val = if i < h_jac.len() && row < h_jac[i].len() {
334 h_jac[i][row]
335 } else {
336 0.0
337 };
338 jac[row][n + m_act + i] = h_val;
339 }
340 }
341
342 // Block (1,0): diag(λ_act) G_act (m_act×n)
343 for (row, &ai) in active_ineq.iter().enumerate() {
344 let lam_i = lambda_star[ai];
345 for col in 0..n {
346 let g_val = if ai < g_jac.len() && col < g_jac[ai].len() {
347 g_jac[ai][col]
348 } else {
349 0.0
350 };
351 jac[n + row][col] = lam_i * g_val;
352 }
353 }
354
355 // Block (1,1): 0 (diag(g(x*)) ≈ 0 at complementarity)
356 // Block (2,0): H (p×n)
357 for i in 0..p {
358 for col in 0..n {
359 let h_val = if i < h_jac.len() && col < h_jac[i].len() {
360 h_jac[i][col]
361 } else {
362 0.0
363 };
364 jac[n + m_act + i][col] = h_val;
365 }
366 }
367
368 // Transpose for adjoint system
369 let mut jac_t = vec![vec![0.0_f64; dim]; dim];
370 for i in 0..dim {
371 for j in 0..dim {
372 jac_t[i][j] = jac[j][i];
373 }
374 }
375
376 // RHS: [dL/dx; 0_m_act; 0_p]
377 let mut rhs = vec![0.0_f64; dim];
378 for i in 0..n {
379 rhs[i] = dl_dx[i];
380 }
381
382 // Solve adjoint system
383 let adj = solve_implicit_system(&jac_t, &rhs)?;
384
385 let dx_adj = adj[..n].to_vec();
386 let dlambda_adj_active = adj[n..n + m_act].to_vec();
387 let dnu_adj = adj[n + m_act..].to_vec();
388
389 // Expand dlambda_adj back to full m dimension
390 let mut dlambda_adj = vec![0.0_f64; m];
391 for (idx, &ai) in active_ineq.iter().enumerate() {
392 if idx < dlambda_adj_active.len() {
393 dlambda_adj[ai] = dlambda_adj_active[idx];
394 }
395 }
396
397 // Compute parameter sensitivity:
398 // dL/df_params = dx_adj (∂f/∂θ is the gradient of f w.r.t. θ, but
399 // since we don't have parametric dependence here, we return the adjoint)
400 let dl_df_params = dx_adj.clone();
401
402 // dL/dG_{ij} = dlambda_adj_i * x_j + lambda_i * dx_adj_j
403 let mut dl_dg_params = vec![vec![0.0_f64; n]; m];
404 for i in 0..m {
405 for j in 0..n {
406 dl_dg_params[i][j] = dlambda_adj[i] * x_star[j] + lambda_star[i] * dx_adj[j];
407 }
408 }
409
410 // dL/dH_{ij} = dnu_adj_i * x_j + nu_i * dx_adj_j
411 let mut dl_dh_params = vec![vec![0.0_f64; n]; p];
412 for i in 0..p {
413 for j in 0..n {
414 dl_dh_params[i][j] = dnu_adj[i] * x_star[j] + nu_star[i] * dx_adj[j];
415 }
416 }
417
418 Ok(NlpGrad {
419 dl_df_params,
420 dl_dg_params,
421 dl_dh_params,
422 dx_adj,
423 dlambda_adj,
424 dnu_adj,
425 })
426}
427
428// ─────────────────────────────────────────────────────────────────────────────
429// Utility: Cholesky-based KKT solve
430// ─────────────────────────────────────────────────────────────────────────────
431
432/// Augment Q with a Tikhonov regularization term δI for numerical stability.
433pub fn regularize_q(q: &[Vec<f64>], delta: f64) -> Vec<Vec<f64>> {
434 let n = q.len();
435 let mut q_reg = q.to_vec();
436 for i in 0..n {
437 if i < q_reg.len() && i < q_reg[i].len() {
438 q_reg[i][i] += delta;
439 }
440 }
441 q_reg
442}
443
444/// Compute the matrix-vector product y = Ax for a row-major matrix.
445pub fn mat_vec(a: &[Vec<f64>], x: &[f64]) -> Vec<f64> {
446 a.iter()
447 .map(|row| {
448 row.iter()
449 .zip(x.iter())
450 .map(|(&a_ij, &x_j)| a_ij * x_j)
451 .sum()
452 })
453 .collect()
454}
455
456/// Compute the outer product of two vectors: C_{ij} = a_i * b_j.
457pub fn outer_product(a: &[f64], b: &[f64]) -> Vec<Vec<f64>> {
458 a.iter()
459 .map(|&ai| b.iter().map(|&bj| ai * bj).collect())
460 .collect()
461}
462
463/// Compute the symmetric outer product: C = ½(a bᵀ + b aᵀ).
464pub fn sym_outer_product(a: &[f64], b: &[f64]) -> Vec<Vec<f64>> {
465 let n = a.len();
466 let mut c = vec![vec![0.0_f64; n]; n];
467 for i in 0..n {
468 for j in 0..n {
469 c[i][j] = 0.5 * (a[i] * b[j] + b[i] * a[j]);
470 }
471 }
472 c
473}
474
475// ─────────────────────────────────────────────────────────────────────────────
476// KKT system struct for monitoring/debugging
477// ─────────────────────────────────────────────────────────────────────────────
478
479/// Represents the assembled KKT system for an equality-constrained QP.
480///
481/// Stores the bordered matrix and provides utilities for solving and
482/// computing sensitivity.
483#[derive(Debug, Clone)]
484pub struct KktSystem {
485 /// The bordered KKT matrix K = [Q Aᵀ; A 0].
486 pub matrix: Vec<Vec<f64>>,
487 /// Dimension n (number of primal variables).
488 pub n: usize,
489 /// Number of equality constraints p.
490 pub p: usize,
491}
492
493impl KktSystem {
494 /// Build a new KKT system from Q and A.
495 pub fn new(q: &[Vec<f64>], a: &[Vec<f64>]) -> Self {
496 let n = q.len();
497 let p = a.len();
498 let matrix = kkt_matrix(q, a);
499 Self { matrix, n, p }
500 }
501
502 /// Solve the KKT system: K [x; ν] = [b1; b2].
503 pub fn solve(&self, b1: &[f64], b2: &[f64]) -> OptimizeResult<(Vec<f64>, Vec<f64>)> {
504 let dim = self.n + self.p;
505 let mut rhs = Vec::with_capacity(dim);
506 rhs.extend_from_slice(b1);
507 rhs.extend_from_slice(b2);
508
509 let sol = solve_implicit_system(&self.matrix, &rhs)?;
510 let x = sol[..self.n].to_vec();
511 let nu = sol[self.n..].to_vec();
512 Ok((x, nu))
513 }
514
515 /// Compute the sensitivity gradients for the given upstream gradient.
516 pub fn sensitivity(
517 &self,
518 q: &[Vec<f64>],
519 a: &[Vec<f64>],
520 x: &[f64],
521 nu: &[f64],
522 dl_dx: &[f64],
523 ) -> OptimizeResult<KktGrad> {
524 kkt_sensitivity(q, a, x, nu, dl_dx)
525 }
526}
527
528// ─────────────────────────────────────────────────────────────────────────────
529// Tests
530// ─────────────────────────────────────────────────────────────────────────────
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535
536 // Helper: numerical gradient via central differences
537 fn numerical_gradient_kkt<F>(f: F, x: &[f64], eps: f64) -> Vec<f64>
538 where
539 F: Fn(&[f64]) -> f64,
540 {
541 let n = x.len();
542 let mut grad = vec![0.0_f64; n];
543 for i in 0..n {
544 let mut xp = x.to_vec();
545 let mut xm = x.to_vec();
546 xp[i] += eps;
547 xm[i] -= eps;
548 grad[i] = (f(&xp) - f(&xm)) / (2.0 * eps);
549 }
550 grad
551 }
552
553 #[test]
554 fn test_kkt_matrix_shape() {
555 // Q: 3×3, A: 2×3 → bordered: 5×5
556 let q = vec![
557 vec![2.0, 0.0, 0.0],
558 vec![0.0, 3.0, 0.0],
559 vec![0.0, 0.0, 4.0],
560 ];
561 let a = vec![vec![1.0, 1.0, 0.0], vec![0.0, 1.0, 1.0]];
562
563 let k = kkt_matrix(&q, &a);
564 assert_eq!(k.len(), 5, "KKT matrix should be 5×5");
565 assert_eq!(k[0].len(), 5);
566
567 // Top-left block: Q
568 assert!((k[0][0] - 2.0).abs() < 1e-12);
569 assert!((k[1][1] - 3.0).abs() < 1e-12);
570 assert!((k[2][2] - 4.0).abs() < 1e-12);
571
572 // Top-right block: Aᵀ
573 // A row 0: [1,1,0] → column 0 of Aᵀ = [1,1,0]
574 assert!((k[0][3] - 1.0).abs() < 1e-12);
575 assert!((k[1][3] - 1.0).abs() < 1e-12);
576 assert!((k[2][3] - 0.0).abs() < 1e-12);
577
578 // Bottom-left block: A
579 assert!((k[3][0] - 1.0).abs() < 1e-12);
580 assert!((k[3][1] - 1.0).abs() < 1e-12);
581 assert!((k[4][1] - 1.0).abs() < 1e-12);
582 assert!((k[4][2] - 1.0).abs() < 1e-12);
583
584 // Bottom-right block: 0
585 assert!((k[3][3]).abs() < 1e-12);
586 assert!((k[4][4]).abs() < 1e-12);
587 }
588
589 #[test]
590 fn test_kkt_matrix_symmetry() {
591 let q = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
592 let a = vec![vec![1.0, 2.0]];
593
594 let k = kkt_matrix(&q, &a);
595 let dim = k.len();
596
597 for i in 0..dim {
598 for j in 0..dim {
599 assert!(
600 (k[i][j] - k[j][i]).abs() < 1e-12,
601 "KKT matrix not symmetric at ({},{}): {} vs {}",
602 i,
603 j,
604 k[i][j],
605 k[j][i]
606 );
607 }
608 }
609 }
610
611 #[test]
612 fn test_kkt_sensitivity_unconstrained() {
613 // For unconstrained QP: min ½ xᵀQx + cᵀx
614 // x*(c) = -Q⁻¹c, so dx*/dc = -Q⁻¹
615 // dL/dc = (dx*/dc)ᵀ dL/dx = -Q⁻¹ dL/dx
616 // With Q = 2I, dL/dx = [1, 0]: dL/dc = -(1/2)[1, 0] = [-0.5, 0]
617
618 let q = vec![vec![2.0, 0.0], vec![0.0, 2.0]];
619 let a: Vec<Vec<f64>> = vec![];
620 let x = vec![-0.5, -1.0]; // x* = -Q^{-1} c for c = [1, 2]
621 let nu: Vec<f64> = vec![];
622 let dl_dx = vec![1.0, 0.0];
623
624 let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
625
626 // dL/dc = -Q⁻¹ dL/dx = -0.5 * [1, 0] = [-0.5, 0]
627 assert!(
628 (grad.dl_dc[0] - (-0.5)).abs() < 1e-10,
629 "dl/dc[0] = {} (expected -0.5)",
630 grad.dl_dc[0]
631 );
632 assert!(
633 grad.dl_dc[1].abs() < 1e-10,
634 "dl/dc[1] = {} (expected 0.0)",
635 grad.dl_dc[1]
636 );
637 }
638
639 #[test]
640 fn test_kkt_sensitivity_with_equality() {
641 // min ½||x||² s.t. x[0] + x[1] = 1
642 // Optimal: x = [0.5, 0.5], ν = -0.5 (or +0.5 depending on sign convention)
643 let q = vec![vec![1.0, 0.0], vec![0.0, 1.0]]; // note: 2I → Q = I for this form
644 let a = vec![vec![1.0, 1.0]];
645 let x = vec![0.5, 0.5];
646 let nu = vec![-0.5];
647 let dl_dx = vec![1.0, 0.0];
648
649 let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
650
651 // Verify dl/dc is finite
652 assert!(grad.dl_dc[0].is_finite(), "dl/dc[0] not finite");
653 assert!(grad.dl_dc[1].is_finite(), "dl/dc[1] not finite");
654 // Verify dl/db is finite
655 assert!(grad.dl_db[0].is_finite(), "dl/db[0] not finite");
656 }
657
658 #[test]
659 fn test_kkt_sensitivity_gradient_check_c() {
660 // Verify dL/dc via finite differences
661 // Problem: min ½ xᵀQx + cᵀx (unconstrained), x*(c) = -Q⁻¹c
662 // Loss: L = 0.5 * ||x*||²
663 // dL/dc_i = x*_j * (dx*_j/dc_i) = x*_i * (-Q⁻¹)_{ii}
664
665 let q = vec![vec![4.0, 0.0], vec![0.0, 4.0]];
666 let a: Vec<Vec<f64>> = vec![];
667 // c = [2, 0] → x* = [-0.5, 0]
668 let c = vec![2.0_f64, 0.0];
669 let x = vec![-0.5_f64, 0.0]; // x* = -Q^{-1}c = [-0.5, 0]
670 let nu: Vec<f64> = vec![];
671
672 // Loss: L = 0.5 * ||x*||² = 0.5 * 0.25 = 0.125
673 // dL/dx = x* = [-0.5, 0]
674 let dl_dx = vec![-0.5_f64, 0.0];
675
676 let grad = kkt_sensitivity(&q, &a, &x, &nu, &dl_dx).expect("KKT sensitivity failed");
677
678 // dL/dc_i = dL/dx_j * dx*_j/dc_i = dL/dx_i * (-1/q_ii) = (-0.5) * (-1/4) = 0.125
679 // So dL/dc[0] = (-0.5) * (-1/4) = 0.125 → but wait:
680 // dx*/dc = -Q⁻¹, so dL/dc = (dx*/dc)ᵀ dL/dx = -Q⁻¹ dL/dx
681 // = -(1/4)*[-0.5, 0] = [0.125, 0]
682
683 // Check via FD
684 let eps = 1e-5_f64;
685 let solve_unconstrained = |c_vec: &[f64]| -> Vec<f64> {
686 // x* = -Q^{-1} c
687 c_vec
688 .iter()
689 .enumerate()
690 .map(|(i, &ci)| -ci / q[i][i])
691 .collect()
692 };
693
694 let mut c_plus = c.clone();
695 c_plus[0] += eps;
696 let x_plus = solve_unconstrained(&c_plus);
697
698 let mut c_minus = c.clone();
699 c_minus[0] -= eps;
700 let x_minus = solve_unconstrained(&c_minus);
701
702 let loss = |xv: &[f64]| -> f64 { xv.iter().map(|&xi| 0.5 * xi * xi).sum() };
703 let fd_dc0 = (loss(&x_plus) - loss(&x_minus)) / (2.0 * eps);
704
705 assert!(
706 (grad.dl_dc[0] - fd_dc0).abs() < 1e-5,
707 "KKT sensitivity dL/dc[0] = {} vs FD = {}",
708 grad.dl_dc[0],
709 fd_dc0
710 );
711 }
712
713 #[test]
714 fn test_kkt_system_struct() {
715 let q = vec![vec![4.0, 0.0], vec![0.0, 4.0]];
716 let a = vec![vec![1.0, 1.0]];
717
718 let sys = KktSystem::new(&q, &a);
719 assert_eq!(sys.n, 2);
720 assert_eq!(sys.p, 1);
721 assert_eq!(sys.matrix.len(), 3);
722
723 // KKT system: [Q Aᵀ; A 0][x; ν] = [b1; b2]
724 // For min ½ xᵀQx + cᵀx s.t. Ax=b:
725 // stationarity: Qx + c + Aᵀν = 0 → Qx + Aᵀν = -c
726 // feasibility: Ax = b
727 // With Q=4I, c=0, A=[[1,1]], b=[1]:
728 // 4x₀ + ν = 0, 4x₁ + ν = 0, x₀ + x₁ = 1
729 // From stationarity: x₀ = -ν/4, x₁ = -ν/4
730 // From feasibility: -ν/4 - ν/4 = 1 → -ν/2 = 1 → ν = -2
731 // So x₀ = x₁ = 0.5, ν = -2
732 let b1 = vec![0.0, 0.0]; // -c
733 let b2 = vec![1.0]; // b
734 let (x, nu) = sys.solve(&b1, &b2).expect("KKT solve failed");
735 assert!((x[0] - 0.5).abs() < 1e-10, "x[0] = {} (expected 0.5)", x[0]);
736 assert!((x[1] - 0.5).abs() < 1e-10, "x[1] = {} (expected 0.5)", x[1]);
737 assert!(
738 (nu[0] - (-2.0)).abs() < 1e-10,
739 "ν[0] = {} (expected -2.0)",
740 nu[0]
741 );
742 }
743
744 #[test]
745 fn test_outer_product() {
746 let a = vec![1.0, 2.0];
747 let b = vec![3.0, 4.0];
748 let c = outer_product(&a, &b);
749 assert!((c[0][0] - 3.0).abs() < 1e-12);
750 assert!((c[0][1] - 4.0).abs() < 1e-12);
751 assert!((c[1][0] - 6.0).abs() < 1e-12);
752 assert!((c[1][1] - 8.0).abs() < 1e-12);
753 }
754
755 #[test]
756 fn test_sym_outer_product() {
757 let a = vec![1.0, 2.0];
758 let b = vec![3.0, 4.0];
759 let c = sym_outer_product(&a, &b);
760 // C[i][j] = 0.5*(a[i]*b[j] + b[i]*a[j])
761 assert!((c[0][0] - 3.0).abs() < 1e-12); // 0.5*(1*3 + 3*1)
762 assert!((c[1][1] - 8.0).abs() < 1e-12); // 0.5*(2*4 + 4*2)
763 // Symmetry
764 assert!((c[0][1] - c[1][0]).abs() < 1e-12);
765 }
766
767 #[test]
768 fn test_nlp_adjoint_unconstrained() {
769 // For no constraints: adjoint solves reg*I dx_adj = dL/dx → dx_adj = dL/dx / reg
770 let n = 3_usize;
771 let f_grad = vec![1.0, 2.0, 3.0_f64];
772 let g_jac: Vec<Vec<f64>> = vec![];
773 let h_jac: Vec<Vec<f64>> = vec![];
774 let x_star = vec![0.5, -0.5, 0.0_f64];
775 let lambda_star: Vec<f64> = vec![];
776 let nu_star: Vec<f64> = vec![];
777 let dl_dx = vec![1.0, 0.0, 0.0_f64];
778
779 let grad = parametric_nlp_adjoint(
780 &f_grad,
781 &g_jac,
782 &h_jac,
783 &x_star,
784 &lambda_star,
785 &nu_star,
786 &dl_dx,
787 )
788 .expect("NLP adjoint failed");
789
790 // With only reg*I, dx_adj = dL/dx / reg
791 let reg = 1e-8_f64;
792 assert!(
793 (grad.dx_adj[0] - dl_dx[0] / reg).abs() < 1e-3 * (dl_dx[0] / reg).abs() + 1e-8,
794 "dx_adj[0] = {} (expected ~{})",
795 grad.dx_adj[0],
796 dl_dx[0] / reg
797 );
798
799 // Sizes match
800 assert_eq!(grad.dl_df_params.len(), n);
801 assert_eq!(grad.dlambda_adj.len(), 0);
802 assert_eq!(grad.dnu_adj.len(), 0);
803 }
804
805 #[test]
806 fn test_regularize_q() {
807 let q = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
808 let q_reg = regularize_q(&q, 0.5);
809 assert!((q_reg[0][0] - 2.5).abs() < 1e-12);
810 assert!((q_reg[1][1] - 3.5).abs() < 1e-12);
811 assert!((q_reg[0][1] - 1.0).abs() < 1e-12); // off-diagonal unchanged
812 }
813
814 #[test]
815 fn test_mat_vec() {
816 let a = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
817 let x = vec![1.0, 2.0];
818 let y = mat_vec(&a, &x);
819 assert!((y[0] - 5.0).abs() < 1e-12);
820 assert!((y[1] - 11.0).abs() < 1e-12);
821 }
822
823 // The helper is used only in dead-code test path
824 #[allow(dead_code)]
825 fn _use_numerical_gradient(grad_fn: impl Fn(&[f64]) -> f64, x: &[f64]) -> Vec<f64> {
826 numerical_gradient_kkt(grad_fn, x, 1e-6)
827 }
828}