use crate::{DMatrix, DVector, Float};
use std::convert::Infallible;
pub trait GenericCostFunction<U = (), E = Infallible> {
type Input;
fn evaluate_generic(&self, x: &Self::Input, args: &U) -> Result<Float, E>;
}
pub trait GenericGradient<U = (), E = Infallible>: GenericCostFunction<U, E> {
fn gradient_generic(&self, x: &Self::Input, args: &U) -> Result<DVector<Float>, E>;
fn evaluate_with_gradient_generic(
&self,
x: &Self::Input,
args: &U,
) -> Result<(Float, DVector<Float>), E> {
Ok((
self.evaluate_generic(x, args)?,
self.gradient_generic(x, args)?,
))
}
fn hessian_generic(&self, x: &Self::Input, args: &U) -> Result<DMatrix<Float>, E>;
fn evaluate_with_hessian_generic(
&self,
x: &Self::Input,
args: &U,
) -> Result<(Float, DMatrix<Float>), E> {
Ok((
self.evaluate_generic(x, args)?,
self.hessian_generic(x, args)?,
))
}
fn gradient_with_hessian_generic(
&self,
x: &Self::Input,
args: &U,
) -> Result<(DVector<Float>, DMatrix<Float>), E> {
Ok((
self.gradient_generic(x, args)?,
self.hessian_generic(x, args)?,
))
}
fn evaluate_with_gradient_and_hessian_generic(
&self,
x: &Self::Input,
args: &U,
) -> Result<(Float, DVector<Float>, DMatrix<Float>), E> {
let (v, g) = self.evaluate_with_gradient_generic(x, args)?;
Ok((v, g, self.hessian_generic(x, args)?))
}
}
pub trait CostFunction<U = (), E = Infallible> {
fn evaluate(&self, x: &DVector<Float>, args: &U) -> Result<Float, E>;
}
pub trait Gradient<U = (), E = Infallible>: CostFunction<U, E> {
fn gradient(&self, x: &DVector<Float>, args: &U) -> Result<DVector<Float>, E> {
let n = x.len();
let mut grad = DVector::zeros(n);
let mut xw = x.clone();
for i in 0..n {
let xi = xw[i];
let hi = Float::cbrt(Float::EPSILON) * (xi.abs() + 1.0);
xw[i] = xi + hi;
let f_plus = self.evaluate(&xw, args)?;
xw[i] = xi - hi;
let f_minus = self.evaluate(&xw, args)?;
xw[i] = xi;
grad[i] = (f_plus - f_minus) / (2.0 * hi);
}
Ok(grad)
}
fn evaluate_with_gradient(
&self,
x: &DVector<Float>,
args: &U,
) -> Result<(Float, DVector<Float>), E> {
Ok((self.evaluate(x, args)?, self.gradient(x, args)?))
}
fn hessian(&self, x: &DVector<Float>, args: &U) -> Result<DMatrix<Float>, E> {
let n = x.len();
let mut xw = x.clone();
let h: DVector<Float> = xw
.iter()
.map(|&xi| Float::cbrt(Float::EPSILON) * (xi.abs() + 1.0))
.collect::<Vec<_>>()
.into();
let mut hess = DMatrix::zeros(n, n);
let mut g_plus = DMatrix::zeros(n, n);
let mut g_minus = DMatrix::zeros(n, n);
for i in 0..x.len() {
let xi = xw[i];
xw[i] = xi + h[i];
g_plus.set_column(i, &self.gradient(&xw, args)?);
xw[i] = xi - h[i];
g_minus.set_column(i, &self.gradient(&xw, args)?);
xw[i] = xi;
hess[(i, i)] = (g_plus[(i, i)] - g_minus[(i, i)]) / (2.0 * h[i]);
for j in 0..i {
hess[(i, j)] = ((g_plus[(i, j)] - g_minus[(i, j)]) / (4.0 * h[j]))
+ ((g_plus[(j, i)] - g_minus[(j, i)]) / (4.0 * h[i]));
hess[(j, i)] = hess[(i, j)];
}
}
Ok(hess)
}
fn gradient_with_hessian(
&self,
x: &DVector<Float>,
args: &U,
) -> Result<(DVector<Float>, DMatrix<Float>), E> {
Ok((self.gradient(x, args)?, self.hessian(x, args)?))
}
fn evaluate_with_gradient_and_hessian(
&self,
x: &DVector<Float>,
args: &U,
) -> Result<(Float, DVector<Float>, DMatrix<Float>), E> {
let (v, g) = self.evaluate_with_gradient(x, args)?;
Ok((v, g, self.hessian(x, args)?))
}
}
pub trait LogDensity<U = (), E = Infallible> {
fn log_density(&self, x: &DVector<Float>, args: &U) -> Result<Float, E>;
}
#[cfg(test)]
mod tests {
use crate::{
traits::{CostFunction, Gradient},
DVector, Float,
};
use approx::assert_relative_eq;
use std::convert::Infallible;
struct TestFunction;
impl CostFunction for TestFunction {
fn evaluate(&self, x: &DVector<Float>, _: &()) -> Result<Float, Infallible> {
#[allow(clippy::suboptimal_flops)]
Ok(x[0].powi(2) + x[1].powi(2) + 1.0)
}
}
impl Gradient for TestFunction {}
#[test]
fn test_cost_function() {
let x: DVector<Float> = DVector::from_vec(vec![1.0, 2.0]);
let y = TestFunction.evaluate(&x, &()).unwrap();
assert_eq!(y, 6.0);
}
#[test]
fn test_cost_function_gradient() {
let x: DVector<Float> = DVector::from_vec(vec![1.0, 2.0]);
let dy = TestFunction.gradient(&x, &()).unwrap();
assert_relative_eq!(dy[0], 2.0, epsilon = Float::EPSILON.sqrt());
assert_relative_eq!(dy[1], 4.0, epsilon = Float::EPSILON.sqrt());
}
#[test]
fn test_cost_function_hessian() {
let x: DVector<Float> = DVector::from_vec(vec![1.0, 2.0]);
let hessian = TestFunction.hessian(&x, &()).unwrap();
assert_relative_eq!(hessian[(0, 0)], 2.0, epsilon = Float::EPSILON.cbrt());
assert_relative_eq!(hessian[(1, 1)], 2.0, epsilon = Float::EPSILON.cbrt());
assert_relative_eq!(hessian[(0, 1)], 0.0, epsilon = Float::EPSILON.cbrt());
assert_relative_eq!(hessian[(1, 0)], 0.0, epsilon = Float::EPSILON.cbrt());
}
#[test]
fn test_cost_function_covariance_and_std() {
use crate::core::utils::hessian_to_covariance;
let x: DVector<Float> = DVector::from_vec(vec![1.0, 2.0]);
let hessian = TestFunction.hessian(&x, &()).unwrap();
let cov = hessian_to_covariance(&hessian).unwrap();
assert_relative_eq!(cov[(0, 0)], 0.5, epsilon = Float::EPSILON.cbrt());
assert_relative_eq!(cov[(1, 1)], 0.5, epsilon = Float::EPSILON.cbrt());
}
}