ipopt_ad/
lib.rs

1//! # IPOPT-AD - Blackbox NLP solver using IPOPT and automatic differentiation
2//! This crate provides a simple interface to the [IPOPT](https://coin-or.github.io/Ipopt/index.html)
3//! nonlinear program (NLP) solver. By evaluating gradients and Hessians using automatic differentiation
4//! provided by the [num-dual] crate, it "converts" IPOPT into a blackbox solver without introducing
5//! numerical errors through forward differences that could diminish the robustness and accuracy. The
6//! only limitation is that the evaluation of the objective and constraints has to be implemented
7//! generically for values implementing the [DualNum] trait.
8//!
9//! ## Example: problem 71 from the Hock-Schittkowsky test-suite
10//! This example demonstrates how to solve the problem that is also used as example in the
11//! [documentation](https://coin-or.github.io/Ipopt/INTERFACES.html) of IPOPT. Because the
12//! objective value and constraints are simple functions of x, the [SimpleADProblem] interface
13//! can be used.
14//! ```
15//! use ipopt_ad::{ADProblem, BasicADProblem, SimpleADProblem};
16//! use ipopt_ad::ipopt::Ipopt;
17//! use num_dual::DualNum;
18//! use approx::assert_relative_eq;
19//!
20//! struct HS071;
21//!
22//! impl BasicADProblem<4> for HS071 {
23//!     fn bounds(&self) -> ([f64; 4], [f64; 4]) {
24//!         ([1.0; 4], [5.0; 4])
25//!     }
26//!     fn initial_point(&self) -> [f64; 4] {
27//!         [1.0, 5.0, 5.0, 1.0]
28//!     }
29//!     fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>) {
30//!         (vec![25.0, 40.0], vec![f64::INFINITY, 40.0])
31//!     }
32//! }
33//!
34//! impl SimpleADProblem<4> for HS071 {
35//!     fn objective<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> D {
36//!         let [x1, x2, x3, x4] = x;
37//!         x1 * x4 * (x1 + x2 + x3) + x3
38//!     }
39//!     fn constraint_values<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Vec<D> {
40//!         let [x1, x2, x3, x4] = x;
41//!         vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4]
42//!     }
43//! }
44//!
45//! let problem = ADProblem::new(HS071);
46//! let mut ipopt = Ipopt::new(problem).unwrap();
47//! ipopt.set_option("print_level", 0);
48//! let res = ipopt.solve();
49//! let x = res.solver_data.solution.primal_variables;
50//! let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
51//! assert_relative_eq!(x, x_lit, max_relative = 1e-8);
52//! ```
53//!
54//! For more complex NLPs, it can be advantageous to evaluate the objective function and constraints
55//! simultaneously, which is possible with the [CachedADProblem] interface. For the HS071 problem,
56//! this looks like this:
57//! ```
58//! # use ipopt_ad::{ADProblem, BasicADProblem, CachedADProblem};
59//! # use ipopt_ad::ipopt::Ipopt;
60//! # use num_dual::DualNum;
61//! # use approx::assert_relative_eq;
62//! # use std::convert::Infallible;
63//! # struct HS071;
64//! # impl BasicADProblem<4> for HS071 {
65//! #     fn bounds(&self) -> ([f64; 4], [f64; 4]) {
66//! #         ([1.0; 4], [5.0; 4])
67//! #     }
68//! #     fn initial_point(&self) -> [f64; 4] {
69//! #         [1.0, 5.0, 5.0, 1.0]
70//! #     }
71//! #     fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>) {
72//! #         (vec![25.0, 40.0], vec![f64::INFINITY, 40.0])
73//! #     }
74//! # }
75//! impl CachedADProblem<4> for HS071 {
76//!     type Error = Infallible;
77//!     fn evaluate<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Result<(D, Vec<D>), Infallible> {
78//!         let [x1, x2, x3, x4] = x;
79//!         Ok((
80//!             x1 * x4 * (x1 + x2 + x3) + x3,
81//!             vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4],
82//!         ))
83//!     }
84//! }
85//!
86//! let problem = ADProblem::new_cached(HS071).unwrap();
87//! let mut ipopt = Ipopt::new(problem).unwrap();
88//! ipopt.set_option("print_level", 0);
89//! let res = ipopt.solve();
90//! let x = res.solver_data.solution.primal_variables;
91//! let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
92//! assert_relative_eq!(x, x_lit, max_relative = 1e-8);
93//! ```
94use ipopt::{BasicProblem, ConstrainedProblem};
95use nalgebra::{OVector, SVector, U1};
96use num_dual::{
97    gradient, hessian, jacobian, try_hessian, Derivative, Dual2Vec, DualNum, DualVec, DualVec64,
98    HyperDualVec, HyperDualVec64,
99};
100use std::cell::RefCell;
101use std::convert::Infallible;
102
103pub mod ipopt {
104    //! Re-export of all functionalities in [ipopt-rs].
105    pub use ipopt::*;
106}
107
108/// The basic information that needs to be provided for every optimization problem.
109pub trait BasicADProblem<const X: usize> {
110    /// Return lower and upper bounds for all variables.
111    fn bounds(&self) -> ([f64; X], [f64; X]);
112
113    /// Return an initial guess for all variables.
114    fn initial_point(&self) -> [f64; X];
115
116    /// Return the lower and upper bounds for all constraints.
117    fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>);
118}
119
120/// The interface for a simple NLP in which the objective function and constraint values are
121/// determined separate from each other.
122pub trait SimpleADProblem<const X: usize>: BasicADProblem<X> {
123    /// Return the objective value for the given x.
124    fn objective<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> D;
125
126    /// Return the values of all constraints for the given x.
127    fn constraint_values<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> Vec<D>;
128}
129
130/// The interface for an NLP in which it is more efficient to evaluate the objective and
131/// constraint value in the same function.
132///
133/// Internally, the results are cached for repeated calls with the same x so that the number
134/// of function evaluations is kept to a minimum.
135pub trait CachedADProblem<const X: usize>: BasicADProblem<X> {
136    /// The error type used in the evaluation.
137    type Error;
138
139    /// Return the objective value and values for all constraints simultaneously.
140    fn evaluate<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> Result<(D, Vec<D>), Self::Error>;
141}
142
143/// Wrapper struct that handles caching and sparsity detection to interface between the blackbox
144/// model and IPOPT.
145#[expect(clippy::type_complexity)]
146pub struct ADProblem<T, const X: usize, const CACHE: bool> {
147    problem: T,
148    con_jac_row_vec: Vec<i32>,
149    con_jac_col_vec: Vec<i32>,
150    hess_row_vec: Vec<i32>,
151    hess_col_vec: Vec<i32>,
152    cache: RefCell<Option<Option<(f64, Vec<f64>)>>>,
153    grad_cache: RefCell<Option<Option<([f64; X], Vec<[f64; X]>)>>>,
154}
155
156impl<T: BasicADProblem<X>, const X: usize, const CACHE: bool> ADProblem<T, X, CACHE> {
157    fn new_impl<
158        Err,
159        G: Fn(&T, [DualVec64<U1>; X]) -> Result<Vec<DualVec64<U1>>, Err>,
160        E: Fn(
161            &T,
162            [HyperDualVec64<U1, U1>; X],
163        ) -> Result<(HyperDualVec64<U1, U1>, Vec<HyperDualVec64<U1, U1>>), Err>,
164    >(
165        problem: T,
166        constraints: G,
167        evaluate: E,
168    ) -> Result<Self, Err> {
169        let x = problem.initial_point();
170        let mut con_jac_row_vec = Vec::new();
171        let mut con_jac_col_vec = Vec::new();
172        for i in 0..x.len() {
173            let mut x_dual: [DualVec64<U1>; X] = x.map(DualVec::from_re);
174            x_dual[i].eps = Derivative::derivative_generic(U1, U1, 0);
175            let con = constraints(&problem, x_dual)?;
176            for (j, c) in con.into_iter().enumerate() {
177                if c.eps != Derivative::none() {
178                    con_jac_row_vec.push(j as i32);
179                    con_jac_col_vec.push(i as i32);
180                }
181            }
182        }
183
184        let mut hess_row_vec = Vec::new();
185        let mut hess_col_vec = Vec::new();
186        for row in 0..x.len() {
187            for col in 0..=row {
188                let mut x_dual: [HyperDualVec64<U1, U1>; X] = x.map(HyperDualVec::from_re);
189                x_dual[row].eps1 = Derivative::derivative_generic(U1, U1, 0);
190                x_dual[col].eps2 = Derivative::derivative_generic(U1, U1, 0);
191                let (mut f, con) = evaluate(&problem, x_dual)?;
192                for g in con {
193                    f += g;
194                }
195                if f.eps1eps2 != Derivative::none() {
196                    hess_row_vec.push(row as i32);
197                    hess_col_vec.push(col as i32);
198                }
199            }
200        }
201
202        Ok(Self {
203            problem,
204            con_jac_row_vec,
205            con_jac_col_vec,
206            hess_row_vec,
207            hess_col_vec,
208            cache: RefCell::new(None),
209            grad_cache: RefCell::new(None),
210        })
211    }
212
213    fn update_cache(&self, new_x: bool) {
214        if new_x {
215            *self.cache.borrow_mut() = None;
216            *self.grad_cache.borrow_mut() = None;
217        }
218    }
219}
220
221impl<T: SimpleADProblem<X>, const X: usize> ADProblem<T, X, false> {
222    /// Initialize an NLP using the `SimpleADProblem` interface.
223    pub fn new(problem: T) -> Self {
224        Self::new_impl(
225            problem,
226            |problem, x| Ok::<_, Infallible>(problem.constraint_values(x)),
227            |problem, x| Ok((problem.objective(x), problem.constraint_values(x))),
228        )
229        .unwrap()
230    }
231}
232
233impl<T: CachedADProblem<X>, const X: usize> ADProblem<T, X, true> {
234    /// Initialize an NLP using the `CachedADProblem` interface.
235    pub fn new_cached(problem: T) -> Result<Self, T::Error> {
236        Self::new_impl(
237            problem,
238            |problem, x| problem.evaluate(x).map(|(_, g)| g),
239            |problem, x| problem.evaluate(x),
240        )
241    }
242
243    #[expect(clippy::type_complexity)]
244    fn evaluate_gradients(&self, x: [f64; X]) -> Result<([f64; X], Vec<[f64; X]>), T::Error> {
245        let mut x = SVector::from(x).map(DualVec::from_re);
246        let (r, c) = x.shape_generic();
247        for (i, xi) in x.iter_mut().enumerate() {
248            xi.eps = Derivative::derivative_generic(r, c, i);
249        }
250        let (f, g) = self.problem.evaluate(x.data.0[0])?;
251        Ok((
252            f.eps.unwrap_generic(r, c).data.0[0],
253            g.into_iter()
254                .map(|g| g.eps.unwrap_generic(r, c).data.0[0])
255                .collect(),
256        ))
257    }
258}
259
260impl<T: SimpleADProblem<X>, const X: usize> BasicProblem for ADProblem<T, X, false> {
261    fn num_variables(&self) -> usize {
262        X
263    }
264
265    fn bounds(&self, x_l: &mut [f64], x_u: &mut [f64]) -> bool {
266        let (lbnd, ubnd) = self.problem.bounds();
267        x_l.copy_from_slice(&lbnd);
268        x_u.copy_from_slice(&ubnd);
269        true
270    }
271
272    fn initial_point(&self, x: &mut [f64]) -> bool {
273        let init = self.problem.initial_point();
274        x.copy_from_slice(&init);
275        true
276    }
277
278    fn objective(&self, x: &[f64], _: bool, obj: &mut f64) -> bool {
279        *obj = self.problem.objective(x.try_into().unwrap());
280        true
281    }
282
283    fn objective_grad(&self, x: &[f64], _: bool, grad_f: &mut [f64]) -> bool {
284        let x = SVector::from_column_slice(x);
285        let (_, grad) = gradient(|x| self.problem.objective(x.data.0[0]), x);
286        grad_f.copy_from_slice(&grad.data.0[0]);
287        true
288    }
289}
290
291impl<T: SimpleADProblem<X>, const X: usize> ConstrainedProblem for ADProblem<T, X, false> {
292    fn num_constraints(&self) -> usize {
293        self.problem.constraint_bounds().0.len()
294    }
295
296    fn num_constraint_jacobian_non_zeros(&self) -> usize {
297        self.con_jac_col_vec.len()
298    }
299
300    fn constraint(&self, x: &[f64], _: bool, g: &mut [f64]) -> bool {
301        g.copy_from_slice(&self.problem.constraint_values(x.try_into().unwrap()));
302        true
303    }
304
305    fn constraint_bounds(&self, g_l: &mut [f64], g_u: &mut [f64]) -> bool {
306        let (lbnd, ubnd) = self.problem.constraint_bounds();
307        g_l.copy_from_slice(&lbnd);
308        g_u.copy_from_slice(&ubnd);
309        true
310    }
311
312    fn constraint_jacobian_indices(&self, rows: &mut [i32], cols: &mut [i32]) -> bool {
313        rows.copy_from_slice(&self.con_jac_row_vec);
314        cols.copy_from_slice(&self.con_jac_col_vec);
315        true
316    }
317
318    fn constraint_jacobian_values(&self, x: &[f64], _: bool, vals: &mut [f64]) -> bool {
319        let x = SVector::from_column_slice(x);
320        let (_, jac) = jacobian(
321            |x| OVector::from(self.problem.constraint_values(x.data.0[0])),
322            x,
323        );
324        for ((v, &r), &c) in vals
325            .iter_mut()
326            .zip(&self.con_jac_row_vec)
327            .zip(&self.con_jac_col_vec)
328        {
329            *v = jac[(r as usize, c as usize)];
330        }
331        true
332    }
333
334    fn num_hessian_non_zeros(&self) -> usize {
335        self.hess_col_vec.len()
336    }
337
338    fn hessian_indices(&self, rows: &mut [i32], cols: &mut [i32]) -> bool {
339        rows.copy_from_slice(&self.hess_row_vec);
340        cols.copy_from_slice(&self.hess_col_vec);
341        true
342    }
343
344    fn hessian_values(
345        &self,
346        x: &[f64],
347        _new_x: bool,
348        obj_factor: f64,
349        lambda: &[f64],
350        vals: &mut [f64],
351    ) -> bool {
352        let (_, _, hess) = hessian(
353            |x| {
354                let f = self.problem.objective(x.data.0[0]);
355                let g = self.problem.constraint_values(x.data.0[0]);
356                f * obj_factor
357                    + g.into_iter()
358                        .zip(lambda)
359                        .map(|(g, &l)| g * l)
360                        .sum::<Dual2Vec<_, _, _>>()
361            },
362            SVector::from_column_slice(x),
363        );
364        for ((v, &r), &c) in vals
365            .iter_mut()
366            .zip(&self.hess_row_vec)
367            .zip(&self.hess_col_vec)
368        {
369            *v = hess[(r as usize, c as usize)];
370        }
371        true
372    }
373}
374
375impl<T: CachedADProblem<X>, const X: usize> BasicProblem for ADProblem<T, X, true> {
376    fn num_variables(&self) -> usize {
377        X
378    }
379
380    fn bounds(&self, x_l: &mut [f64], x_u: &mut [f64]) -> bool {
381        let (lbnd, ubnd) = self.problem.bounds();
382        x_l.copy_from_slice(&lbnd);
383        x_u.copy_from_slice(&ubnd);
384        true
385    }
386
387    fn initial_point(&self, x: &mut [f64]) -> bool {
388        let init = self.problem.initial_point();
389        x.copy_from_slice(&init);
390        true
391    }
392
393    fn objective(&self, x: &[f64], new_x: bool, obj: &mut f64) -> bool {
394        self.update_cache(new_x);
395        let mut cache = self.cache.borrow_mut();
396        let Some((f, _)) = cache.get_or_insert_with(|| {
397            let x = SVector::from_column_slice(x);
398            self.problem.evaluate(x.data.0[0]).ok()
399        }) else {
400            return false;
401        };
402        *obj = *f;
403        true
404    }
405
406    fn objective_grad(&self, x: &[f64], new_x: bool, grad_f: &mut [f64]) -> bool {
407        self.update_cache(new_x);
408        let mut cache = self.grad_cache.borrow_mut();
409        let Some((grad, _)) = cache.get_or_insert_with(|| {
410            let x = SVector::from_column_slice(x);
411            self.evaluate_gradients(x.data.0[0]).ok()
412        }) else {
413            return false;
414        };
415        grad_f.copy_from_slice(&*grad);
416        true
417    }
418}
419
420impl<T: CachedADProblem<X>, const X: usize> ConstrainedProblem for ADProblem<T, X, true> {
421    fn num_constraints(&self) -> usize {
422        self.problem.constraint_bounds().0.len()
423    }
424
425    fn num_constraint_jacobian_non_zeros(&self) -> usize {
426        self.con_jac_col_vec.len()
427    }
428
429    fn constraint(&self, x: &[f64], new_x: bool, g: &mut [f64]) -> bool {
430        self.update_cache(new_x);
431        let mut cache = self.cache.borrow_mut();
432        let Some((_, con)) = cache.get_or_insert_with(|| {
433            let x = SVector::from_column_slice(x);
434            self.problem.evaluate(x.data.0[0]).ok()
435        }) else {
436            return false;
437        };
438        g.copy_from_slice(&*con);
439        true
440    }
441
442    fn constraint_bounds(&self, g_l: &mut [f64], g_u: &mut [f64]) -> bool {
443        let (lbnd, ubnd) = self.problem.constraint_bounds();
444        g_l.copy_from_slice(&lbnd);
445        g_u.copy_from_slice(&ubnd);
446        true
447    }
448
449    fn constraint_jacobian_indices(&self, rows: &mut [i32], cols: &mut [i32]) -> bool {
450        rows.copy_from_slice(&self.con_jac_row_vec);
451        cols.copy_from_slice(&self.con_jac_col_vec);
452        true
453    }
454
455    fn constraint_jacobian_values(&self, x: &[f64], new_x: bool, vals: &mut [f64]) -> bool {
456        self.update_cache(new_x);
457        let mut cache = self.grad_cache.borrow_mut();
458        let Some((_, jac)) = cache.get_or_insert_with(|| {
459            let x = SVector::from_column_slice(x);
460            self.evaluate_gradients(x.data.0[0]).ok()
461        }) else {
462            return false;
463        };
464        for ((v, &r), &c) in vals
465            .iter_mut()
466            .zip(&self.con_jac_row_vec)
467            .zip(&self.con_jac_col_vec)
468        {
469            *v = jac[r as usize][c as usize];
470        }
471        true
472    }
473
474    fn num_hessian_non_zeros(&self) -> usize {
475        self.hess_col_vec.len()
476    }
477
478    fn hessian_indices(&self, rows: &mut [i32], cols: &mut [i32]) -> bool {
479        rows.copy_from_slice(&self.hess_row_vec);
480        cols.copy_from_slice(&self.hess_col_vec);
481        true
482    }
483
484    fn hessian_values(
485        &self,
486        x: &[f64],
487        _new_x: bool,
488        obj_factor: f64,
489        lambda: &[f64],
490        vals: &mut [f64],
491    ) -> bool {
492        let Ok((_, _, hess)) = try_hessian(
493            |x| {
494                self.problem.evaluate(x.data.0[0]).map(|(f, g)| {
495                    f * obj_factor
496                        + g.into_iter()
497                            .zip(lambda)
498                            .map(|(g, &l)| g * l)
499                            .sum::<Dual2Vec<_, _, _>>()
500                })
501            },
502            SVector::from_column_slice(x),
503        ) else {
504            return false;
505        };
506        for ((v, &r), &c) in vals
507            .iter_mut()
508            .zip(&self.hess_row_vec)
509            .zip(&self.hess_col_vec)
510        {
511            *v = hess[(r as usize, c as usize)];
512        }
513        true
514    }
515}
516
517#[cfg(test)]
518mod test {
519    use super::*;
520    use crate::ipopt::Ipopt;
521    use approx::assert_relative_eq;
522    use std::convert::Infallible;
523
524    struct HS071;
525
526    impl BasicADProblem<4> for HS071 {
527        fn bounds(&self) -> ([f64; 4], [f64; 4]) {
528            ([1.0; 4], [5.0; 4])
529        }
530
531        fn initial_point(&self) -> [f64; 4] {
532            [1.0, 5.0, 5.0, 1.0]
533        }
534
535        fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>) {
536            (vec![25.0, 40.0], vec![f64::INFINITY, 40.0])
537        }
538    }
539
540    impl SimpleADProblem<4> for HS071 {
541        fn objective<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> D {
542            let [x1, x2, x3, x4] = x;
543            x1 * x4 * (x1 + x2 + x3) + x3
544        }
545
546        fn constraint_values<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Vec<D> {
547            let [x1, x2, x3, x4] = x;
548            vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4]
549        }
550    }
551
552    impl CachedADProblem<4> for HS071 {
553        type Error = Infallible;
554        fn evaluate<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Result<(D, Vec<D>), Infallible> {
555            let [x1, x2, x3, x4] = x;
556            Ok((
557                x1 * x4 * (x1 + x2 + x3) + x3,
558                vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4],
559            ))
560        }
561    }
562
563    #[test]
564    fn test_problem() {
565        let problem = ADProblem::new(HS071);
566        let mut ipopt = Ipopt::new(problem).unwrap();
567        ipopt.set_option("print_level", 0);
568        let res = ipopt.solve();
569        let x = res.solver_data.solution.primal_variables;
570        println!("{x:?}");
571        let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
572        assert_relative_eq!(x, x_lit, max_relative = 1e-8);
573    }
574
575    #[test]
576    fn test_problem_cached() {
577        let problem = ADProblem::new_cached(HS071).unwrap();
578        let mut ipopt = Ipopt::new(problem).unwrap();
579        ipopt.set_option("print_level", 0);
580        let res = ipopt.solve();
581        let x = res.solver_data.solution.primal_variables;
582        println!("{x:?}");
583        let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
584        assert_relative_eq!(x, x_lit, max_relative = 1e-8);
585    }
586}