Skip to main content

ipopt_ad/
lib.rs

1//! # IPOPT-AD - Blackbox NLP solver using IPOPT and automatic differentiation
2//! This crate provides a simple interface to the [IPOPT](https://coin-or.github.io/Ipopt/index.html)
3//! nonlinear program (NLP) solver. By evaluating gradients and Hessians using automatic differentiation
4//! provided by the [num-dual] crate, it "converts" IPOPT into a blackbox solver without introducing
5//! numerical errors through forward differences that could diminish the robustness and accuracy. The
6//! only limitation is that the evaluation of the objective and constraints has to be implemented
7//! generically for values implementing the [DualNum] trait.
8//!
9//! ## Example: problem 71 from the Hock-Schittkowsky test-suite
10//! This example demonstrates how to solve the problem that is also used as example in the
11//! [documentation](https://coin-or.github.io/Ipopt/INTERFACES.html) of IPOPT. Because the
12//! objective value and constraints are simple functions of x, the [SimpleADProblem] interface
13//! can be used.
14//! ```
15//! # #[cfg(feature = "ripopt")]
16//! # {
17//! use ipopt_ad::{ADProblem, BasicADProblem, SimpleADProblem};
18//! use ripopt::solve;
19//! use num_dual::DualNum;
20//! use approx::assert_relative_eq;
21//!
22//! struct HS071;
23//!
24//! impl BasicADProblem<4> for HS071 {
25//!     fn bounds(&self) -> ([f64; 4], [f64; 4]) {
26//!         ([1.0; 4], [5.0; 4])
27//!     }
28//!     fn initial_point(&self) -> [f64; 4] {
29//!         [1.0, 5.0, 5.0, 1.0]
30//!     }
31//!     fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>) {
32//!         (vec![25.0, 40.0], vec![f64::INFINITY, 40.0])
33//!     }
34//! }
35//!
36//! impl SimpleADProblem<4> for HS071 {
37//!     fn objective<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> D {
38//!         let [x1, x2, x3, x4] = x;
39//!         x1 * x4 * (x1 + x2 + x3) + x3
40//!     }
41//!     fn constraints<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Vec<D> {
42//!         let [x1, x2, x3, x4] = x;
43//!         vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4]
44//!     }
45//! }
46//!
47//! let problem = ADProblem::new(HS071);
48//! let res = solve(&problem, &Default::default());
49//! let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
50//! assert_relative_eq!(&res.x as &[f64], x_lit, max_relative = 1e-7);
51//! # }
52//! ```
53//!
54//! For more complex NLPs, it can be advantageous to evaluate the objective function and constraints
55//! simultaneously, which is possible with the [CachedADProblem] interface. For the HS071 problem,
56//! this looks like this:
57//! ```
58//! # #[cfg(feature = "ripopt")]
59//! # {
60//! # use ipopt_ad::{ADProblem, BasicADProblem, CachedADProblem};
61//! # use ripopt::solve;
62//! # use num_dual::DualNum;
63//! # use approx::assert_relative_eq;
64//! # use std::convert::Infallible;
65//! # struct HS071;
66//! # impl BasicADProblem<4> for HS071 {
67//! #     fn bounds(&self) -> ([f64; 4], [f64; 4]) {
68//! #         ([1.0; 4], [5.0; 4])
69//! #     }
70//! #     fn initial_point(&self) -> [f64; 4] {
71//! #         [1.0, 5.0, 5.0, 1.0]
72//! #     }
73//! #     fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>) {
74//! #         (vec![25.0, 40.0], vec![f64::INFINITY, 40.0])
75//! #     }
76//! # }
77//! impl CachedADProblem<4> for HS071 {
78//!     type Error = Infallible;
79//!     fn evaluate<D: DualNum<f64> + Copy>(&self, x: [D; 4]) -> Result<(D, Vec<D>), Infallible> {
80//!         let [x1, x2, x3, x4] = x;
81//!         Ok((
82//!             x1 * x4 * (x1 + x2 + x3) + x3,
83//!             vec![x1 * x2 * x3 * x4, x1 * x1 + x2 * x2 + x3 * x3 + x4 * x4],
84//!         ))
85//!     }
86//! }
87//!
88//! let problem = ADProblem::new_cached(HS071).unwrap();
89//! let res = solve(&problem, &Default::default());
90//! let x_lit = &[1f64, 4.74299963, 3.82114998, 1.37940829] as &[f64];
91//! assert_relative_eq!(&res.x as &[f64], x_lit, max_relative = 1e-7);
92//! # }
93//! ```
94use nalgebra::{SVector, U1};
95use num::FromPrimitive;
96use num_dual::{Derivative, DualNum, DualVec, DualVec64, HyperDualVec, HyperDualVec64};
97use std::cell::RefCell;
98use std::convert::Infallible;
99
100#[cfg(feature = "ipopt")]
101pub mod ipopt;
102#[cfg(feature = "ripopt")]
103pub mod ripopt;
104
105/// The basic information that needs to be provided for every optimization problem.
106pub trait BasicADProblem<const X: usize> {
107    /// Return lower and upper bounds for all variables.
108    fn bounds(&self) -> ([f64; X], [f64; X]);
109
110    /// Return an initial guess for all variables.
111    fn initial_point(&self) -> [f64; X];
112
113    /// Return the lower and upper bounds for all constraints.
114    fn constraint_bounds(&self) -> (Vec<f64>, Vec<f64>);
115}
116
117/// The interface for a simple NLP in which the objective function and constraint values are
118/// determined separate from each other.
119pub trait SimpleADProblem<const X: usize>: BasicADProblem<X> {
120    /// Return the objective value for the given x.
121    fn objective<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> D;
122
123    /// Return the values of all constraints for the given x.
124    fn constraints<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> Vec<D>;
125}
126
127/// The interface for an NLP in which it is more efficient to evaluate the objective and
128/// constraint value in the same function.
129///
130/// Internally, the results are cached for repeated calls with the same x so that the number
131/// of function evaluations is kept to a minimum.
132pub trait CachedADProblem<const X: usize>: BasicADProblem<X> {
133    /// The error type used in the evaluation.
134    type Error;
135
136    /// Return the objective value and values for all constraints simultaneously.
137    fn evaluate<D: DualNum<f64> + Copy>(&self, x: [D; X]) -> Result<(D, Vec<D>), Self::Error>;
138}
139
140/// Wrapper struct that handles caching and sparsity detection to interface between the blackbox
141/// model and IPOPT.
142#[expect(clippy::type_complexity)]
143#[allow(unused)]
144pub struct ADProblem<T, I, const X: usize, const CACHE: bool> {
145    problem: T,
146    con_jac_row_vec: Vec<I>,
147    con_jac_col_vec: Vec<I>,
148    hess_row_vec: Vec<I>,
149    hess_col_vec: Vec<I>,
150    cache: RefCell<Option<Option<(f64, Vec<f64>)>>>,
151    grad_cache: RefCell<Option<Option<([f64; X], Vec<[f64; X]>)>>>,
152}
153
154impl<T: BasicADProblem<X>, I: FromPrimitive, const X: usize, const CACHE: bool>
155    ADProblem<T, I, X, CACHE>
156{
157    fn new_impl<
158        Err,
159        G: Fn(&T, [DualVec64<U1>; X]) -> Result<Vec<DualVec64<U1>>, Err>,
160        E: Fn(
161            &T,
162            [HyperDualVec64<U1, U1>; X],
163        ) -> Result<(HyperDualVec64<U1, U1>, Vec<HyperDualVec64<U1, U1>>), Err>,
164    >(
165        problem: T,
166        constraints: G,
167        evaluate: E,
168    ) -> Result<Self, Err> {
169        let x = problem.initial_point();
170        let mut con_jac_row_vec = Vec::new();
171        let mut con_jac_col_vec = Vec::new();
172        for i in 0..x.len() {
173            let mut x_dual: [DualVec64<U1>; X] = x.map(DualVec::from_re);
174            x_dual[i].eps = Derivative::derivative_generic(U1, U1, 0);
175            let con = constraints(&problem, x_dual)?;
176            for (j, c) in con.into_iter().enumerate() {
177                if c.eps != Derivative::none() {
178                    con_jac_row_vec.push(I::from_usize(j).unwrap());
179                    con_jac_col_vec.push(I::from_usize(i).unwrap());
180                }
181            }
182        }
183
184        let mut hess_row_vec = Vec::new();
185        let mut hess_col_vec = Vec::new();
186        for row in 0..x.len() {
187            for col in 0..=row {
188                let mut x_dual: [HyperDualVec64<U1, U1>; X] = x.map(HyperDualVec::from_re);
189                x_dual[row].eps1 = Derivative::derivative_generic(U1, U1, 0);
190                x_dual[col].eps2 = Derivative::derivative_generic(U1, U1, 0);
191                let (mut f, con) = evaluate(&problem, x_dual)?;
192                for g in con {
193                    f += g;
194                }
195                if f.eps1eps2 != Derivative::none() {
196                    hess_row_vec.push(I::from_usize(row).unwrap());
197                    hess_col_vec.push(I::from_usize(col).unwrap());
198                }
199            }
200        }
201
202        Ok(Self {
203            problem,
204            con_jac_row_vec,
205            con_jac_col_vec,
206            hess_row_vec,
207            hess_col_vec,
208            cache: RefCell::new(None),
209            grad_cache: RefCell::new(None),
210        })
211    }
212
213    #[allow(unused)]
214    fn update_cache(&self, new_x: bool) {
215        if new_x {
216            *self.cache.borrow_mut() = None;
217            *self.grad_cache.borrow_mut() = None;
218        }
219    }
220}
221
222impl<T: SimpleADProblem<X>, I: FromPrimitive, const X: usize> ADProblem<T, I, X, false> {
223    /// Initialize an NLP using the `SimpleADProblem` interface.
224    pub fn new(problem: T) -> Self {
225        Self::new_impl(
226            problem,
227            |problem, x| Ok::<_, Infallible>(problem.constraints(x)),
228            |problem, x| Ok((problem.objective(x), problem.constraints(x))),
229        )
230        .unwrap()
231    }
232}
233
234impl<T: CachedADProblem<X>, I: FromPrimitive, const X: usize> ADProblem<T, I, X, true> {
235    /// Initialize an NLP using the `CachedADProblem` interface.
236    pub fn new_cached(problem: T) -> Result<Self, T::Error> {
237        Self::new_impl(
238            problem,
239            |problem, x| problem.evaluate(x).map(|(_, g)| g),
240            |problem, x| problem.evaluate(x),
241        )
242    }
243
244    #[expect(clippy::type_complexity)]
245    #[allow(unused)]
246    fn evaluate_gradients(&self, x: [f64; X]) -> Result<([f64; X], Vec<[f64; X]>), T::Error> {
247        let mut x = SVector::from(x).map(DualVec::from_re);
248        let (r, c) = x.shape_generic();
249        for (i, xi) in x.iter_mut().enumerate() {
250            xi.eps = Derivative::derivative_generic(r, c, i);
251        }
252        let (f, g) = self.problem.evaluate(x.data.0[0])?;
253        Ok((
254            f.eps.unwrap_generic(r, c).data.0[0],
255            g.into_iter()
256                .map(|g| g.eps.unwrap_generic(r, c).data.0[0])
257                .collect(),
258        ))
259    }
260}