Skip to main content

scirs2_optimize/symbolic/
mod.rs

1//! Symbolic-gradient-based optimization.
2//!
3//! Use `scirs2-symbolic`'s symbolic gradient + Hessian as the input to
4//! Newton's method, L-BFGS, and trust-region solvers — avoiding finite-difference
5//! approximations and their associated step-size sensitivity.
6//!
7//! # Example
8//! ```no_run
9//! use scirs2_optimize::symbolic::newton;
10//! use scirs2_symbolic::eml::LoweredOp;
11//!
12//! // Cost function: f(x) = x² (minimum at x=0)
13//! let cost = LoweredOp::Mul(
14//!     Box::new(LoweredOp::Var(0)),
15//!     Box::new(LoweredOp::Var(0)),
16//! );
17//! let result = newton(&cost, &[5.0], 50, 1e-8).expect("converge");
18//! assert!(result.x[0].abs() < 1e-6);
19//! ```
20
21use std::sync::Arc;
22
23use scirs2_symbolic::eml::eval::{eval_real, EvalCtx};
24use scirs2_symbolic::eml::{grad, hessian, LoweredOp};
25
26/// Result of a symbolic optimization run.
27#[derive(Debug, Clone)]
28pub struct SymbolicNewtonResult {
29    /// Solution vector.
30    pub x: Vec<f64>,
31    /// Final cost function value.
32    pub f_final: f64,
33    /// Number of iterations performed.
34    pub iters: usize,
35    /// True if convergence tolerance was reached.
36    pub converged: bool,
37}
38
39/// Errors from symbolic Newton.
40#[derive(Debug, thiserror::Error)]
41pub enum SymbolicNewtonError {
42    /// Underlying `LoweredOp` evaluation failure (domain, division by zero,
43    /// unbound variable, etc.).
44    #[error("evaluation error: {0}")]
45    EvalError(String),
46    /// Hessian matrix is singular (or numerically too ill-conditioned) so the
47    /// Newton system H·dx = -grad cannot be solved.
48    #[error("singular Hessian — cannot invert")]
49    SingularHessian,
50    /// Initial point dimension does not match the cost function's variable count.
51    #[error("dimension mismatch: x0 has {x0_len}, cost expects {n_vars}")]
52    DimMismatch {
53        /// Length of the supplied `x0` vector.
54        x0_len: usize,
55        /// Number of variables `cost.count_vars()` reported.
56        n_vars: usize,
57    },
58}
59
60/// Newton's method using scirs2-symbolic gradient and Hessian.
61///
62/// # Arguments
63/// - `cost`: scalar cost function as a `LoweredOp` (must be twice-differentiable)
64/// - `x0`: initial point, length matches `cost.count_vars()`
65/// - `max_iter`: max iterations
66/// - `tol`: convergence tolerance on `||grad||₂`
67///
68/// # Errors
69/// - [`SymbolicNewtonError::DimMismatch`] when `x0.len() < cost.count_vars()`
70/// - [`SymbolicNewtonError::EvalError`] when symbolic gradient/Hessian
71///   evaluation fails (domain violation, unbound variable, etc.)
72/// - [`SymbolicNewtonError::SingularHessian`] when the Hessian is singular at
73///   the current iterate
74pub fn newton(
75    cost: &LoweredOp,
76    x0: &[f64],
77    max_iter: usize,
78    tol: f64,
79) -> Result<SymbolicNewtonResult, SymbolicNewtonError> {
80    let n_vars = cost.count_vars();
81    if x0.len() < n_vars {
82        return Err(SymbolicNewtonError::DimMismatch {
83            x0_len: x0.len(),
84            n_vars,
85        });
86    }
87
88    // Precompute symbolic grad and Hessian — once
89    let grad_ops: Vec<LoweredOp> = (0..n_vars).map(|i| grad(cost, i)).collect();
90    let hess_ops: Vec<Vec<LoweredOp>> = hessian(cost, n_vars);
91
92    let mut x = x0.to_vec();
93    let mut converged = false;
94    let mut iters = 0;
95
96    for k in 0..max_iter {
97        iters = k + 1;
98        let ctx = EvalCtx::new(&x);
99
100        // Evaluate gradient
101        let mut grad_vec: Vec<f64> = Vec::with_capacity(n_vars);
102        for g_op in &grad_ops {
103            let v =
104                eval_real(g_op, &ctx).map_err(|e| SymbolicNewtonError::EvalError(e.to_string()))?;
105            grad_vec.push(v);
106        }
107
108        // Check convergence: ||grad||₂ < tol
109        let grad_norm: f64 = grad_vec.iter().map(|g| g * g).sum::<f64>().sqrt();
110        if grad_norm < tol {
111            converged = true;
112            break;
113        }
114
115        // Evaluate Hessian
116        let mut hess_mat: Vec<Vec<f64>> = Vec::with_capacity(n_vars);
117        for row in &hess_ops {
118            let mut hess_row: Vec<f64> = Vec::with_capacity(n_vars);
119            for h_op in row {
120                let v = eval_real(h_op, &ctx)
121                    .map_err(|e| SymbolicNewtonError::EvalError(e.to_string()))?;
122                hess_row.push(v);
123            }
124            hess_mat.push(hess_row);
125        }
126
127        // Solve H·dx = -grad → solve_linear(H, grad) returns H⁻¹·grad
128        let dx = solve_linear(&hess_mat, &grad_vec)?;
129
130        // Update: x ← x - dx (Newton step is dx = H⁻¹·grad, so x ← x - H⁻¹·grad)
131        for (xi, dxi) in x.iter_mut().zip(dx.iter()) {
132            *xi -= *dxi;
133        }
134    }
135
136    let final_ctx = EvalCtx::new(&x);
137    let f_final =
138        eval_real(cost, &final_ctx).map_err(|e| SymbolicNewtonError::EvalError(e.to_string()))?;
139
140    Ok(SymbolicNewtonResult {
141        x,
142        f_final,
143        iters,
144        converged,
145    })
146}
147
148// ────────────────────────────────────────────────────────────────────────────
149// Shared result / error types for L-BFGS and trust-region symbolic solvers
150// ────────────────────────────────────────────────────────────────────────────
151
152/// Result returned by [`lbfgs_symbolic`] and [`trust_region_symbolic`].
153#[derive(Debug, Clone)]
154pub struct SymbolicOptResult {
155    /// Solution vector.
156    pub x: scirs2_core::ndarray::Array1<f64>,
157    /// Objective value at `x`.
158    pub f_val: f64,
159    /// L₂ norm of the gradient at `x`.
160    pub grad_norm: f64,
161    /// Number of iterations performed.
162    pub iters: usize,
163    /// True when `grad_norm < tol`.
164    pub converged: bool,
165}
166
167/// Errors from [`lbfgs_symbolic`] and [`trust_region_symbolic`].
168#[derive(Debug, thiserror::Error)]
169pub enum SymbolicOptError {
170    /// Objective evaluation failed.
171    #[error("eval error: {0}")]
172    EvalError(String),
173    /// Symbolic gradient evaluation failed.
174    #[error("gradient eval error: {0}")]
175    GradEvalError(String),
176    /// Symbolic Hessian evaluation failed.
177    #[error("Hessian eval error: {0}")]
178    HessEvalError(String),
179    /// `x0.len()` does not match the number of variables in the objective.
180    #[error("dimension mismatch: expected {expected} variables, got {got}")]
181    DimMismatch {
182        /// Variables inferred from the objective.
183        expected: usize,
184        /// Length of the supplied `x0`.
185        got: usize,
186    },
187    /// Optimizer exhausted its iteration budget without reaching `tol`.
188    #[error("not converged after {iters} iters (grad_norm = {grad_norm:.6e})")]
189    NotConverged {
190        /// Iteration count at termination.
191        iters: usize,
192        /// Gradient norm at termination.
193        grad_norm: f64,
194    },
195    /// Wolfe-condition line search failed to find a suitable step.
196    #[error("line search failed")]
197    LineSearchFailed,
198}
199
200// ────────────────────────────────────────────────────────────────────────────
201// Shared helpers
202// ────────────────────────────────────────────────────────────────────────────
203
204/// Evaluate `grad_ops` at `x`, returning an error variant on failure.
205fn eval_gradient(grad_ops: &[LoweredOp], x: &[f64]) -> Result<Vec<f64>, SymbolicOptError> {
206    let ctx = EvalCtx::new(x);
207    let mut g = Vec::with_capacity(grad_ops.len());
208    for op in grad_ops {
209        let v = eval_real(op, &ctx).map_err(|e| SymbolicOptError::GradEvalError(e.to_string()))?;
210        g.push(v);
211    }
212    Ok(g)
213}
214
215/// Evaluate objective at `x`.
216fn eval_objective(obj: &LoweredOp, x: &[f64]) -> Result<f64, SymbolicOptError> {
217    let ctx = EvalCtx::new(x);
218    eval_real(obj, &ctx).map_err(|e| SymbolicOptError::EvalError(e.to_string()))
219}
220
221/// L₂ norm of a slice.
222fn l2_norm(v: &[f64]) -> f64 {
223    v.iter().map(|x| x * x).sum::<f64>().sqrt()
224}
225
226/// Dot product.
227fn dot(a: &[f64], b: &[f64]) -> f64 {
228    a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
229}
230
231// ────────────────────────────────────────────────────────────────────────────
232// L-BFGS with exact symbolic gradient
233// ────────────────────────────────────────────────────────────────────────────
234
235/// L-BFGS optimizer using exact symbolic gradient.
236///
237/// Converges faster than L-BFGS with finite-difference gradients because the
238/// gradient is exact (no step-size tuning needed). Uses a two-loop L-BFGS
239/// recursion with Wolfe-condition line search.
240///
241/// # Arguments
242/// - `objective`: scalar objective as an `Arc<LoweredOp>`
243/// - `x0`: initial point; length must equal `objective.count_vars()`
244/// - `max_iter`: maximum number of iterations
245/// - `tol`: convergence tolerance on `||∇f||₂`
246/// - `memory`: L-BFGS history length (5–20 is typical)
247///
248/// # Errors
249/// - [`SymbolicOptError::DimMismatch`] when `x0.len() != objective.count_vars()`
250/// - [`SymbolicOptError::EvalError`] / [`SymbolicOptError::GradEvalError`] on
251///   symbolic evaluation failures
252/// - [`SymbolicOptError::NotConverged`] when `max_iter` is exhausted
253///
254/// # Example
255/// ```no_run
256/// # #[cfg(feature = "symbolic")]
257/// # {
258/// use std::sync::Arc;
259/// use scirs2_optimize::symbolic::lbfgs_symbolic;
260/// use scirs2_symbolic::eml::LoweredOp;
261/// use scirs2_core::ndarray::array;
262///
263/// let obj = Arc::new(LoweredOp::Mul(
264///     Box::new(LoweredOp::Var(0)),
265///     Box::new(LoweredOp::Var(0)),
266/// ));
267/// let result = lbfgs_symbolic(&obj, array![3.0].view(), 100, 1e-8, 10).expect("converge");
268/// assert!(result.x[0].abs() < 1e-6);
269/// # }
270/// ```
271pub fn lbfgs_symbolic(
272    objective: &Arc<LoweredOp>,
273    x0: scirs2_core::ndarray::ArrayView1<f64>,
274    max_iter: usize,
275    tol: f64,
276    memory: usize,
277) -> Result<SymbolicOptResult, SymbolicOptError> {
278    use scirs2_core::ndarray::Array1;
279
280    let n_vars = objective.count_vars();
281    if x0.len() != n_vars {
282        return Err(SymbolicOptError::DimMismatch {
283            expected: n_vars,
284            got: x0.len(),
285        });
286    }
287
288    // Precompute symbolic gradient ops once.
289    let grad_ops: Vec<LoweredOp> = (0..n_vars).map(|i| grad(objective.as_ref(), i)).collect();
290
291    let mut x: Vec<f64> = x0.iter().copied().collect();
292
293    // Evaluate initial gradient before the loop.
294    let mut g = eval_gradient(&grad_ops, &x)?;
295    let mut grad_norm = l2_norm(&g);
296
297    // Handle max_iter == 0 immediately.
298    if max_iter == 0 {
299        return Err(SymbolicOptError::NotConverged {
300            iters: 0,
301            grad_norm,
302        });
303    }
304
305    if grad_norm < tol {
306        let f_val = eval_objective(objective.as_ref(), &x)?;
307        return Ok(SymbolicOptResult {
308            x: Array1::from_vec(x),
309            f_val,
310            grad_norm,
311            iters: 0,
312            converged: true,
313        });
314    }
315
316    // History: ring buffer of (s_k, y_k, rho_k) triples.
317    // s_k = x_{k+1} - x_k, y_k = g_{k+1} - g_k, rho_k = 1 / (y_k^T s_k)
318    let mem = memory.max(1);
319    let mut history: std::collections::VecDeque<(Vec<f64>, Vec<f64>, f64)> =
320        std::collections::VecDeque::with_capacity(mem);
321
322    let mut converged = false;
323    let mut iters = 0;
324
325    for k in 0..max_iter {
326        iters = k + 1;
327
328        // ── Two-loop L-BFGS recursion ────────────────────────────────────
329        let direction = lbfgs_direction(&g, &history);
330
331        // ── Wolfe-condition line search ───────────────────────────────────
332        let f_k = eval_objective(objective.as_ref(), &x)?;
333        let dg = dot(&g, &direction); // ∇f^T d (should be < 0)
334
335        const C1: f64 = 1e-4; // sufficient decrease
336        const C2: f64 = 0.9; // curvature
337        const MAX_LS: usize = 20;
338
339        let mut alpha = 1.0_f64;
340        let mut x_new: Vec<f64> = Vec::with_capacity(n_vars);
341        let mut g_new: Vec<f64>;
342        let mut armijo_ok = false;
343
344        // Try to satisfy both Wolfe conditions with backtracking.
345        let mut ls_iter = 0;
346        loop {
347            x_new = x
348                .iter()
349                .zip(&direction)
350                .map(|(xi, di)| xi + alpha * di)
351                .collect();
352            let f_new = eval_objective(objective.as_ref(), &x_new)?;
353            let suff_dec = f_new <= f_k + C1 * alpha * dg;
354
355            if suff_dec {
356                g_new = eval_gradient(&grad_ops, &x_new)?;
357                let curvature = dot(&g_new, &direction).abs() <= C2 * dg.abs();
358                if curvature {
359                    armijo_ok = true;
360                    break;
361                }
362                // Armijo satisfied but curvature not — still usable.
363                armijo_ok = true;
364                // Continue backtracking hoping to also satisfy curvature.
365            } else {
366                g_new = Vec::new(); // placeholder — will recompute on acceptance
367            }
368
369            ls_iter += 1;
370            if ls_iter >= MAX_LS {
371                break;
372            }
373            alpha *= 0.5;
374        }
375
376        // If we left the loop without a good step, accept Armijo-only or fail.
377        if !armijo_ok {
378            // Accept the last alpha that passed Armijo (may be very small).
379            x_new = x
380                .iter()
381                .zip(&direction)
382                .map(|(xi, di)| xi + alpha * di)
383                .collect();
384            let f_new = eval_objective(objective.as_ref(), &x_new)?;
385            if f_new > f_k + C1 * alpha * dg {
386                return Err(SymbolicOptError::LineSearchFailed);
387            }
388            g_new = eval_gradient(&grad_ops, &x_new)?;
389        } else if g_new.is_empty() {
390            // Filled placeholder path — curvature branch that broke early
391            g_new = eval_gradient(&grad_ops, &x_new)?;
392        }
393
394        // ── Update history ────────────────────────────────────────────────
395        let s_k: Vec<f64> = x_new.iter().zip(&x).map(|(xn, xo)| xn - xo).collect();
396        let y_k: Vec<f64> = g_new.iter().zip(&g).map(|(gn, go)| gn - go).collect();
397        let sy = dot(&s_k, &y_k);
398        if sy.abs() > 1e-20 {
399            let rho = 1.0 / sy;
400            if history.len() == mem {
401                history.pop_front();
402            }
403            history.push_back((s_k, y_k, rho));
404        }
405
406        // ── Advance ───────────────────────────────────────────────────────
407        x = x_new;
408        g = g_new;
409        grad_norm = l2_norm(&g);
410
411        if grad_norm < tol {
412            converged = true;
413            break;
414        }
415    }
416
417    let f_val = eval_objective(objective.as_ref(), &x)?;
418
419    if converged {
420        Ok(SymbolicOptResult {
421            x: Array1::from_vec(x),
422            f_val,
423            grad_norm,
424            iters,
425            converged: true,
426        })
427    } else {
428        Err(SymbolicOptError::NotConverged { iters, grad_norm })
429    }
430}
431
432/// Compute the L-BFGS search direction given the current gradient and history.
433///
434/// Standard two-loop recursion; returns `-H_k · g`.
435fn lbfgs_direction(
436    g: &[f64],
437    history: &std::collections::VecDeque<(Vec<f64>, Vec<f64>, f64)>,
438) -> Vec<f64> {
439    let n = g.len();
440    let m = history.len();
441    let mut q: Vec<f64> = g.to_vec();
442    let mut alphas: Vec<f64> = vec![0.0; m];
443
444    // First loop (newest to oldest)
445    for (idx, (s, y, rho)) in history.iter().rev().enumerate() {
446        let a = rho * dot(s, &q);
447        alphas[m - 1 - idx] = a;
448        for j in 0..n {
449            q[j] -= a * y[j];
450        }
451    }
452
453    // Initial Hessian scaling γ_k = s_{k-1}^T y_{k-1} / y_{k-1}^T y_{k-1}
454    let mut r: Vec<f64> = if let Some((s, y, _)) = history.back() {
455        let sy = dot(s, y);
456        let yy = dot(y, y);
457        let gamma = if yy > 1e-20 { sy / yy } else { 1.0 };
458        q.iter().map(|qi| gamma * qi).collect()
459    } else {
460        q.clone() // H_0 = I
461    };
462
463    // Second loop (oldest to newest)
464    for (idx, (s, y, rho)) in history.iter().enumerate() {
465        let beta = rho * dot(y, &r);
466        let a = alphas[idx];
467        for j in 0..n {
468            r[j] += s[j] * (a - beta);
469        }
470    }
471
472    // Search direction = -r
473    r.iter().map(|ri| -ri).collect()
474}
475
476// ────────────────────────────────────────────────────────────────────────────
477// Trust-region dogleg with exact symbolic gradient + Hessian
478// ────────────────────────────────────────────────────────────────────────────
479
480/// Trust-region optimizer using exact symbolic gradient + Hessian.
481///
482/// Uses the dogleg step: if the Newton step is within the trust radius, take it;
483/// otherwise interpolate between the steepest-descent step and the Newton step
484/// on the trust-region boundary.
485///
486/// # Arguments
487/// - `objective`: scalar objective as an `Arc<LoweredOp>`
488/// - `x0`: initial point; length must equal `objective.count_vars()`
489/// - `max_iter`: maximum number of iterations
490/// - `tol`: convergence tolerance on `||∇f||₂`
491/// - `initial_radius`: starting trust-region radius (typically 1.0)
492///
493/// # Errors
494/// - [`SymbolicOptError::DimMismatch`] when `x0.len() != objective.count_vars()`
495/// - [`SymbolicOptError::EvalError`] / [`SymbolicOptError::GradEvalError`] /
496///   [`SymbolicOptError::HessEvalError`] on symbolic evaluation failures
497/// - [`SymbolicOptError::NotConverged`] when `max_iter` is exhausted
498///
499/// # Example
500/// ```no_run
501/// # #[cfg(feature = "symbolic")]
502/// # {
503/// use std::sync::Arc;
504/// use scirs2_optimize::symbolic::trust_region_symbolic;
505/// use scirs2_symbolic::eml::LoweredOp;
506/// use scirs2_core::ndarray::array;
507///
508/// let obj = Arc::new(LoweredOp::Mul(
509///     Box::new(LoweredOp::Var(0)),
510///     Box::new(LoweredOp::Var(0)),
511/// ));
512/// let result = trust_region_symbolic(&obj, array![3.0].view(), 100, 1e-8, 1.0)
513///     .expect("converge");
514/// assert!(result.x[0].abs() < 1e-6);
515/// # }
516/// ```
517pub fn trust_region_symbolic(
518    objective: &Arc<LoweredOp>,
519    x0: scirs2_core::ndarray::ArrayView1<f64>,
520    max_iter: usize,
521    tol: f64,
522    initial_radius: f64,
523) -> Result<SymbolicOptResult, SymbolicOptError> {
524    use scirs2_core::ndarray::Array1;
525
526    let n_vars = objective.count_vars();
527    if x0.len() != n_vars {
528        return Err(SymbolicOptError::DimMismatch {
529            expected: n_vars,
530            got: x0.len(),
531        });
532    }
533
534    // Precompute symbolic gradient + Hessian ops once.
535    let grad_ops: Vec<LoweredOp> = (0..n_vars).map(|i| grad(objective.as_ref(), i)).collect();
536    let hess_ops: Vec<Vec<LoweredOp>> = hessian(objective.as_ref(), n_vars);
537
538    let mut x: Vec<f64> = x0.iter().copied().collect();
539
540    // Evaluate initial gradient.
541    let mut g = eval_gradient(&grad_ops, &x)?;
542    let mut grad_norm = l2_norm(&g);
543
544    if max_iter == 0 {
545        return Err(SymbolicOptError::NotConverged {
546            iters: 0,
547            grad_norm,
548        });
549    }
550
551    if grad_norm < tol {
552        let f_val = eval_objective(objective.as_ref(), &x)?;
553        return Ok(SymbolicOptResult {
554            x: Array1::from_vec(x),
555            f_val,
556            grad_norm,
557            iters: 0,
558            converged: true,
559        });
560    }
561
562    let mut delta = initial_radius.max(1e-8);
563    let mut converged = false;
564    let mut iters = 0;
565
566    for k in 0..max_iter {
567        iters = k + 1;
568
569        let f_k = eval_objective(objective.as_ref(), &x)?;
570
571        // Evaluate Hessian at current x.
572        let ctx = EvalCtx::new(&x);
573        let mut h_mat: Vec<Vec<f64>> = Vec::with_capacity(n_vars);
574        for row in &hess_ops {
575            let mut h_row: Vec<f64> = Vec::with_capacity(n_vars);
576            for h_op in row {
577                let v = eval_real(h_op, &ctx)
578                    .map_err(|e| SymbolicOptError::HessEvalError(e.to_string()))?;
579                h_row.push(v);
580            }
581            h_mat.push(h_row);
582        }
583
584        // Compute dogleg step.
585        let p = dogleg_step_sym(&g, &h_mat, delta);
586
587        // Actual vs predicted reduction.
588        let x_new: Vec<f64> = x.iter().zip(&p).map(|(xi, pi)| xi + pi).collect();
589        let f_new = eval_objective(objective.as_ref(), &x_new)?;
590        let actual_red = f_k - f_new;
591
592        // Predicted reduction: m(0) - m(p) = -g^T p - 0.5 p^T H p
593        let gtp = dot(&g, &p);
594        let htp: Vec<f64> = matvec(&h_mat, &p);
595        let phtp = dot(&p, &htp);
596        let pred_red = -(gtp + 0.5 * phtp);
597
598        // Update trust radius.
599        let rho = if pred_red.abs() < 1e-20 {
600            1.0
601        } else {
602            actual_red / pred_red
603        };
604
605        if rho >= 0.1 {
606            // Accept step.
607            x = x_new;
608            g = eval_gradient(&grad_ops, &x)?;
609            grad_norm = l2_norm(&g);
610
611            if grad_norm < tol {
612                converged = true;
613                // Expand radius and break.
614                if rho >= 0.75 {
615                    let p_norm = l2_norm(&p);
616                    if p_norm >= 0.8 * delta {
617                        delta = (2.0 * delta).min(100.0);
618                    }
619                }
620                break;
621            }
622        }
623        // Else: reject step, x unchanged, g unchanged.
624
625        // Adjust radius.
626        if rho >= 0.75 {
627            let p_norm = l2_norm(&p);
628            if p_norm >= 0.8 * delta {
629                delta = (2.0 * delta).min(100.0);
630            }
631        } else if rho < 0.25 {
632            delta *= 0.25;
633        }
634
635        if delta < 1e-14 {
636            // Trust region collapsed — treat as not converged.
637            break;
638        }
639    }
640
641    let f_val = eval_objective(objective.as_ref(), &x)?;
642
643    if converged {
644        Ok(SymbolicOptResult {
645            x: Array1::from_vec(x),
646            f_val,
647            grad_norm,
648            iters,
649            converged: true,
650        })
651    } else {
652        Err(SymbolicOptError::NotConverged { iters, grad_norm })
653    }
654}
655
656/// Compute the dogleg step p for the trust-region subproblem:
657///   min_p  g^T p + 0.5 p^T H p   s.t.  ||p|| ≤ Δ
658///
659/// Falls back to the steepest-descent Cauchy step when the Hessian is
660/// indefinite or the Newton system is singular.
661fn dogleg_step_sym(g: &[f64], h: &[Vec<f64>], delta: f64) -> Vec<f64> {
662    let n = g.len();
663
664    // Newton step: p_N = -H⁻¹ g  (solve H p_N = -g via Gaussian elimination)
665    let neg_g: Vec<f64> = g.iter().map(|gi| -gi).collect();
666    let p_n_opt = solve_linear_opt(h, &neg_g);
667
668    // Steepest-descent step: p_SD = -(g^T g / g^T H g) * g
669    let gtg = dot(g, g);
670    let hg = matvec(h, g);
671    let gthg = dot(g, &hg); // g^T H g
672
673    let p_sd: Vec<f64> = if gthg > 1e-20 {
674        let scale = gtg / gthg;
675        g.iter().map(|gi| -scale * gi).collect()
676    } else {
677        // H is indefinite or zero — use steepest-descent direction with unit step.
678        let gn = gtg.sqrt().max(1e-20);
679        g.iter().map(|gi| -gi / gn).collect()
680    };
681
682    let p_sd_norm = l2_norm(&p_sd);
683
684    // If Newton system succeeded, attempt full dogleg.
685    if let Some(p_n) = p_n_opt {
686        let p_n_norm = l2_norm(&p_n);
687
688        if p_n_norm <= delta {
689            // Newton step fits — take it.
690            return p_n;
691        }
692
693        if p_sd_norm >= delta {
694            // Cauchy point already outside trust region — scale SD.
695            return p_sd.iter().map(|pi| delta / p_sd_norm * pi).collect();
696        }
697
698        // Interpolate: p = p_SD + τ * (p_N - p_SD), ||p|| = Δ
699        // ||p_SD + τ * d||² = Δ²  where d = p_N - p_SD
700        // ||p_SD||² + 2τ p_SD^T d + τ² ||d||² = Δ²
701        let d: Vec<f64> = p_n.iter().zip(&p_sd).map(|(pni, psi)| pni - psi).collect();
702        let dd = dot(&d, &d);
703        let psd_d = dot(&p_sd, &d);
704        let psd2 = p_sd_norm * p_sd_norm;
705        let discriminant = psd_d * psd_d - dd * (psd2 - delta * delta);
706        let tau = if dd < 1e-20 || discriminant < 0.0 {
707            1.0
708        } else {
709            (-psd_d + discriminant.sqrt()) / dd
710        };
711        let tau = tau.clamp(0.0, 1.0);
712        p_sd.iter()
713            .zip(&d)
714            .map(|(psi, di)| psi + tau * di)
715            .collect()
716    } else {
717        // Singular Hessian — Cauchy step.
718        if p_sd_norm <= delta {
719            p_sd
720        } else {
721            p_sd.iter().map(|pi| delta / p_sd_norm * pi).collect()
722        }
723    }
724}
725
726/// Matrix-vector product H · v.
727fn matvec(h: &[Vec<f64>], v: &[f64]) -> Vec<f64> {
728    h.iter().map(|row| dot(row, v)).collect()
729}
730
731/// Partial-pivoting Gaussian elimination. Returns `None` when singular.
732fn solve_linear_opt(a: &[Vec<f64>], b: &[f64]) -> Option<Vec<f64>> {
733    let n = b.len();
734    if n == 0 {
735        return Some(Vec::new());
736    }
737
738    let mut mat: Vec<Vec<f64>> = a
739        .iter()
740        .zip(b.iter())
741        .map(|(row, &bi)| {
742            let mut r = row.clone();
743            r.push(bi);
744            r
745        })
746        .collect();
747
748    for k in 0..n {
749        // Find pivot
750        let mut max_idx = k;
751        let mut max_val = mat[k][k].abs();
752        for i in (k + 1)..n {
753            let v = mat[i][k].abs();
754            if v > max_val {
755                max_val = v;
756                max_idx = i;
757            }
758        }
759        if max_val < 1e-12 {
760            return None; // singular
761        }
762        mat.swap(k, max_idx);
763        for i in (k + 1)..n {
764            let factor = mat[i][k] / mat[k][k];
765            for j in k..(n + 1) {
766                mat[i][j] -= factor * mat[k][j];
767            }
768        }
769    }
770
771    let mut x = vec![0.0; n];
772    for i in (0..n).rev() {
773        let mut sum = mat[i][n];
774        for j in (i + 1)..n {
775            sum -= mat[i][j] * x[j];
776        }
777        x[i] = sum / mat[i][i];
778    }
779    Some(x)
780}
781
782/// Solve linear system A·x = b via partial-pivoting Gaussian elimination.
783fn solve_linear(a: &[Vec<f64>], b: &[f64]) -> Result<Vec<f64>, SymbolicNewtonError> {
784    let n = b.len();
785    if n == 0 {
786        return Ok(Vec::new());
787    }
788
789    // Augmented matrix
790    let mut mat: Vec<Vec<f64>> = a
791        .iter()
792        .zip(b.iter())
793        .map(|(row, &bi)| {
794            let mut r = row.clone();
795            r.push(bi);
796            r
797        })
798        .collect();
799
800    // Forward elimination with partial pivoting
801    for k in 0..n {
802        // Find pivot
803        let mut max_idx = k;
804        let mut max_val = mat[k][k].abs();
805        for i in (k + 1)..n {
806            let v = mat[i][k].abs();
807            if v > max_val {
808                max_val = v;
809                max_idx = i;
810            }
811        }
812        if max_val < 1e-12 {
813            return Err(SymbolicNewtonError::SingularHessian);
814        }
815        // Swap
816        mat.swap(k, max_idx);
817        // Eliminate
818        for i in (k + 1)..n {
819            let factor = mat[i][k] / mat[k][k];
820            for j in k..(n + 1) {
821                mat[i][j] -= factor * mat[k][j];
822            }
823        }
824    }
825
826    // Back-substitution
827    let mut x = vec![0.0; n];
828    for i in (0..n).rev() {
829        let mut sum = mat[i][n];
830        for j in (i + 1)..n {
831            sum -= mat[i][j] * x[j];
832        }
833        x[i] = sum / mat[i][i];
834    }
835
836    Ok(x)
837}
838
839// ────────────────────────────────────────────────────────────────────────────
840// Symbolic Lagrangian and KKT conditions
841// ────────────────────────────────────────────────────────────────────────────
842
843/// KKT system for equality-constrained optimization: min f(x) s.t. gᵢ(x) = 0.
844///
845/// Variables Var(0..n_vars) are the primal variables x.
846/// Variables Var(n_vars..n_vars+n_constraints) are the Lagrange multipliers λᵢ.
847pub struct KktSystem {
848    /// Lagrangian L = f + Σ λᵢ·gᵢ.
849    pub lagrangian: LoweredOp,
850    /// Stationarity conditions: ∂L/∂xⱼ = 0 for j in 0..n_vars.
851    pub stationarity: Vec<LoweredOp>,
852    /// Constraint residuals: gᵢ(x) for i in 0..n_constraints.
853    pub constraint_residuals: Vec<LoweredOp>,
854    /// Number of primal variables.
855    pub n_vars: usize,
856    /// Number of equality constraints.
857    pub n_constraints: usize,
858}
859
860/// Errors from [`build_kkt`].
861#[derive(Debug)]
862pub enum LagrangianError {
863    /// Variable or constraint dimension was unexpected.
864    DimMismatch {
865        /// Expected number of variables.
866        expected: usize,
867        /// Actual number encountered.
868        got: usize,
869    },
870    /// Error while computing a symbolic gradient.
871    GradError(String),
872}
873
874/// Form the KKT system for equality-constrained optimization symbolically.
875///
876/// Builds the Lagrangian L = f + Σ λᵢ·gᵢ and derives stationarity conditions
877/// ∂L/∂xⱼ = 0 for each primal variable.
878///
879/// # Arguments
880/// - `objective`: scalar objective f(x)
881/// - `constraints`: slice of equality constraints gᵢ(x) (each = 0 at a solution)
882/// - `n_vars`: number of primal decision variables
883///
884/// # Errors
885/// Returns [`LagrangianError::DimMismatch`] when the objective or a constraint
886/// mentions more variables than `n_vars`.
887pub fn build_kkt(
888    objective: &Arc<LoweredOp>,
889    constraints: &[Arc<LoweredOp>],
890    n_vars: usize,
891) -> Result<KktSystem, LagrangianError> {
892    let m = constraints.len();
893
894    // Validate: no operand should reference a variable index >= n_vars
895    // (multiplier slots n_vars..n_vars+m are reserved by the KKT construction).
896    let obj_vars = objective.count_vars();
897    if obj_vars > n_vars {
898        return Err(LagrangianError::DimMismatch {
899            expected: n_vars,
900            got: obj_vars,
901        });
902    }
903
904    // Build lagrangian = f + Σ λᵢ·gᵢ
905    // λᵢ occupies Var(n_vars + i).
906    let mut lagrangian: LoweredOp = objective.as_ref().clone();
907    for (i, g) in constraints.iter().enumerate() {
908        let lam_i = LoweredOp::Var(n_vars + i);
909        let term = LoweredOp::Mul(Box::new(lam_i), Box::new(g.as_ref().clone()));
910        lagrangian = LoweredOp::Add(Box::new(lagrangian), Box::new(term));
911    }
912
913    // Stationarity: ∂L/∂xⱼ for j in 0..n_vars
914    let stationarity: Vec<LoweredOp> = (0..n_vars).map(|j| grad(&lagrangian, j)).collect();
915
916    // Constraint residuals: just the gᵢ themselves
917    let constraint_residuals: Vec<LoweredOp> =
918        constraints.iter().map(|g| g.as_ref().clone()).collect();
919
920    Ok(KktSystem {
921        lagrangian,
922        stationarity,
923        constraint_residuals,
924        n_vars,
925        n_constraints: m,
926    })
927}
928
929/// Solve an equality-constrained optimization via Newton on the KKT system.
930///
931/// Forms the Lagrangian symbolically, derives KKT stationarity conditions, then
932/// applies Newton's method on the full nonlinear KKT system
933/// `[∂L/∂x; g(x)] = 0`.
934///
935/// # Arguments
936/// - `objective`: scalar objective f(x)
937/// - `constraints`: equality constraints gᵢ(x) = 0
938/// - `x0`: initial primal point; length must equal `count_vars(objective)`
939/// - `lambda0`: initial multiplier guess; length must equal `constraints.len()`
940/// - `max_iter`: maximum Newton iterations
941/// - `tol`: convergence tolerance on the L₂ norm of the KKT residual
942///
943/// # Errors
944/// - [`SymbolicOptError::DimMismatch`] when `x0.len()` or `lambda0.len()` is wrong
945/// - [`SymbolicOptError::EvalError`] on symbolic evaluation failures
946/// - [`SymbolicOptError::NotConverged`] when `max_iter` is exhausted
947pub fn solve_lagrangian_symbolic(
948    objective: &Arc<LoweredOp>,
949    constraints: &[Arc<LoweredOp>],
950    x0: scirs2_core::ndarray::ArrayView1<f64>,
951    lambda0: scirs2_core::ndarray::ArrayView1<f64>,
952    max_iter: usize,
953    tol: f64,
954) -> Result<SymbolicOptResult, SymbolicOptError> {
955    use scirs2_core::ndarray::Array1;
956
957    let m = constraints.len();
958
959    // Infer n_vars as the maximum variable index referenced across objective
960    // and all constraints.
961    let n_vars = std::iter::once(objective.count_vars())
962        .chain(constraints.iter().map(|c| c.count_vars()))
963        .max()
964        .unwrap_or(0);
965
966    if x0.len() != n_vars {
967        return Err(SymbolicOptError::DimMismatch {
968            expected: n_vars,
969            got: x0.len(),
970        });
971    }
972    if lambda0.len() != m {
973        return Err(SymbolicOptError::DimMismatch {
974            expected: m,
975            got: lambda0.len(),
976        });
977    }
978
979    // Build KKT system — map error variants
980    let kkt = build_kkt(objective, constraints, n_vars).map_err(|e| match e {
981        LagrangianError::DimMismatch { expected, got } => {
982            SymbolicOptError::DimMismatch { expected, got }
983        }
984        LagrangianError::GradError(s) => SymbolicOptError::GradEvalError(s),
985    })?;
986
987    // Total unknowns: N = n_vars + m
988    let big_n = n_vars + m;
989
990    // Combined variable vector z = [x0..., lambda0...]
991    let mut z: Vec<f64> = x0.iter().copied().chain(lambda0.iter().copied()).collect();
992
993    // All KKT equations: stationarity (n_vars eqs) ++ constraint residuals (m eqs)
994    let eqs: Vec<&LoweredOp> = kkt
995        .stationarity
996        .iter()
997        .chain(kkt.constraint_residuals.iter())
998        .collect();
999
1000    // Precompute symbolic Jacobian J[i][j] = ∂eqs[i]/∂z[j] — once before the loop.
1001    let jac_ops: Vec<Vec<LoweredOp>> = eqs
1002        .iter()
1003        .map(|eq| (0..big_n).map(|j| grad(eq, j)).collect())
1004        .collect();
1005
1006    // Evaluate initial residual for max_iter==0 early exit.
1007    let initial_residual = {
1008        let ctx = EvalCtx::new(&z);
1009        let mut f_vec = Vec::with_capacity(big_n);
1010        for eq in &eqs {
1011            let v = eval_real(eq, &ctx).map_err(|e| SymbolicOptError::EvalError(e.to_string()))?;
1012            f_vec.push(v);
1013        }
1014        f_vec
1015    };
1016    let mut residual_norm = l2_norm(&initial_residual);
1017
1018    if max_iter == 0 {
1019        return Err(SymbolicOptError::NotConverged {
1020            iters: 0,
1021            grad_norm: residual_norm,
1022        });
1023    }
1024
1025    // Check if already converged at the initial point.
1026    if residual_norm < tol {
1027        let f_val = eval_objective(objective.as_ref(), &z[..n_vars])?;
1028        return Ok(SymbolicOptResult {
1029            x: Array1::from_vec(z[..n_vars].to_vec()),
1030            f_val,
1031            grad_norm: residual_norm,
1032            iters: 0,
1033            converged: true,
1034        });
1035    }
1036
1037    let mut converged = false;
1038    let mut iters = 0;
1039
1040    for k in 0..max_iter {
1041        iters = k + 1;
1042
1043        // Evaluate KKT residual vector F
1044        let ctx = EvalCtx::new(&z);
1045        let mut f_vec: Vec<f64> = Vec::with_capacity(big_n);
1046        for eq in &eqs {
1047            let v = eval_real(eq, &ctx).map_err(|e| SymbolicOptError::EvalError(e.to_string()))?;
1048            f_vec.push(v);
1049        }
1050
1051        residual_norm = l2_norm(&f_vec);
1052        if residual_norm < tol {
1053            converged = true;
1054            break;
1055        }
1056
1057        // Evaluate Jacobian matrix J
1058        let mut jac_mat: Vec<Vec<f64>> = Vec::with_capacity(big_n);
1059        for row_ops in &jac_ops {
1060            let mut row: Vec<f64> = Vec::with_capacity(big_n);
1061            for op in row_ops {
1062                let v =
1063                    eval_real(op, &ctx).map_err(|e| SymbolicOptError::EvalError(e.to_string()))?;
1064                row.push(v);
1065            }
1066            jac_mat.push(row);
1067        }
1068
1069        // Solve J · dz = -F
1070        let neg_f: Vec<f64> = f_vec.iter().map(|v| -v).collect();
1071        let dz = match solve_linear_opt(&jac_mat, &neg_f) {
1072            Some(d) => d,
1073            None => {
1074                // Singular Jacobian — fall back to negative residual step (gradient descent).
1075                neg_f
1076            }
1077        };
1078
1079        // Update z ← z + dz
1080        for (zi, dzi) in z.iter_mut().zip(dz.iter()) {
1081            *zi += dzi;
1082        }
1083    }
1084
1085    let f_val = eval_objective(objective.as_ref(), &z[..n_vars])?;
1086
1087    if converged {
1088        Ok(SymbolicOptResult {
1089            x: Array1::from_vec(z[..n_vars].to_vec()),
1090            f_val,
1091            grad_norm: residual_norm,
1092            iters,
1093            converged: true,
1094        })
1095    } else {
1096        Err(SymbolicOptError::NotConverged {
1097            iters,
1098            grad_norm: residual_norm,
1099        })
1100    }
1101}
1102
1103// ─────────────────────────────────────────────────────────────────────────────
1104// Closed-form quadratic line-search wrapper
1105// ─────────────────────────────────────────────────────────────────────────────
1106
1107pub mod line_search;
1108pub use line_search::{OptLineSearchError, SymbolicLineSearch};
1109
1110#[cfg(test)]
1111mod tests {
1112    use super::*;
1113
1114    fn var(i: usize) -> LoweredOp {
1115        LoweredOp::Var(i)
1116    }
1117    fn c(v: f64) -> LoweredOp {
1118        LoweredOp::Const(v)
1119    }
1120
1121    #[test]
1122    fn newton_x_squared_one_step() {
1123        // f(x) = x² → grad = 2x, hess = 2; Newton step: dx = 2x / 2 = x; x_new = x - x = 0
1124        let cost = LoweredOp::Mul(Box::new(var(0)), Box::new(var(0)));
1125        let result = newton(&cost, &[5.0], 10, 1e-8).expect("converge");
1126        assert!(result.converged);
1127        assert!(result.x[0].abs() < 1e-6, "x = {}", result.x[0]);
1128        assert!(result.iters <= 2);
1129    }
1130
1131    #[test]
1132    fn newton_quadratic_2d() {
1133        // f(x, y) = x² + y² → minimum at (0, 0)
1134        let cost = LoweredOp::Add(
1135            Box::new(LoweredOp::Mul(Box::new(var(0)), Box::new(var(0)))),
1136            Box::new(LoweredOp::Mul(Box::new(var(1)), Box::new(var(1)))),
1137        );
1138        let result = newton(&cost, &[3.0, -4.0], 10, 1e-8).expect("converge");
1139        assert!(result.converged);
1140        assert!(result.x[0].abs() < 1e-6);
1141        assert!(result.x[1].abs() < 1e-6);
1142    }
1143
1144    #[test]
1145    fn newton_shifted_quadratic() {
1146        // f(x) = (x - 3)² → minimum at x = 3
1147        let inner = LoweredOp::Sub(Box::new(var(0)), Box::new(c(3.0)));
1148        let cost = LoweredOp::Mul(Box::new(inner.clone()), Box::new(inner));
1149        let result = newton(&cost, &[0.0], 10, 1e-8).expect("converge");
1150        assert!(result.converged);
1151        assert!((result.x[0] - 3.0).abs() < 1e-6, "x = {}", result.x[0]);
1152    }
1153
1154    #[test]
1155    fn newton_dim_mismatch_returns_err() {
1156        let cost = LoweredOp::Mul(Box::new(var(0)), Box::new(var(0)));
1157        let result = newton(&cost, &[], 10, 1e-8);
1158        assert!(matches!(
1159            result,
1160            Err(SymbolicNewtonError::DimMismatch { .. })
1161        ));
1162    }
1163
1164    #[test]
1165    fn solve_linear_2x2() {
1166        // [2 1; 1 3] · x = [5; 10]  → x = [1; 3]
1167        let a = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
1168        let b = vec![5.0, 10.0];
1169        let x = solve_linear(&a, &b).expect("solve");
1170        assert!((x[0] - 1.0).abs() < 1e-10);
1171        assert!((x[1] - 3.0).abs() < 1e-10);
1172    }
1173
1174    #[test]
1175    fn solve_linear_singular_returns_err() {
1176        let a = vec![vec![1.0, 2.0], vec![2.0, 4.0]]; // rank 1
1177        let b = vec![3.0, 6.0];
1178        assert!(matches!(
1179            solve_linear(&a, &b),
1180            Err(SymbolicNewtonError::SingularHessian)
1181        ));
1182    }
1183}