use super::gaussian_mixture::GaussianMixture;
use crate::clustering::{find_best_number_of_clusters, sort_by_cluster};
use crate::errors::MoeError;
use crate::errors::Result;
use crate::parameters::{GpMixtureParams, GpMixtureValidParams};
use crate::surrogates::*;
use crate::types::*;
use crate::{expertise_macros::*, GpType};
use egobox_gp::metrics::CrossValScore;
use egobox_gp::{correlation_models::*, mean_models::*, GaussianProcess, SparseGaussianProcess};
use linfa::dataset::Records;
use linfa::traits::{Fit, Predict, PredictInplace};
use linfa::{Dataset, DatasetBase, Float, ParamGuard};
use linfa_clustering::GaussianMixtureModel;
use log::{debug, info, trace};
use paste::paste;
use std::cmp::Ordering;
use std::ops::Sub;
#[cfg(not(feature = "blas"))]
use linfa_linalg::norm::*;
use ndarray::{
concatenate, s, Array1, Array2, Array3, ArrayBase, ArrayView2, Axis, Data, Ix2, Zip,
};
#[cfg(feature = "blas")]
use ndarray_linalg::Norm;
use ndarray_rand::rand::Rng;
use ndarray_stats::QuantileExt;
#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "persistent")]
use std::fs;
#[cfg(feature = "persistent")]
use std::io::Write;
macro_rules! check_allowed {
($spec:ident, $model_kind:ident, $model:ident, $list:ident) => {
paste! {
if $spec.contains([< $model_kind Spec>]::[< $model:upper >]) {
$list.push(stringify!($model));
}
}
};
}
impl<D: Data<Elem = f64>> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, MoeError>
for GpMixtureValidParams<f64>
{
type Object = GpMixture;
fn fit(
&self,
dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
) -> Result<Self::Object> {
let x = dataset.records();
let y = dataset.targets();
self.train(x, y)
}
}
impl GpMixtureValidParams<f64> {
pub fn train(
&self,
xt: &ArrayBase<impl Data<Elem = f64>, Ix2>,
yt: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<GpMixture> {
trace!("Moe training...");
let nx = xt.ncols();
let data = concatenate(Axis(1), &[xt.view(), yt.view()]).unwrap();
let (n_clusters, recomb) = if self.n_clusters() == 0 {
let max_nb_clusters = xt.nrows() / 10 + 1;
find_best_number_of_clusters(
xt,
yt,
max_nb_clusters,
self.kpls_dim(),
self.regression_spec(),
self.correlation_spec(),
self.rng(),
)
} else {
(self.n_clusters(), self.recombination())
};
if self.n_clusters() == 0 {
debug!("Automatic settings {} {:?}", n_clusters, recomb);
}
let training = if recomb == Recombination::Smooth(None) && self.n_clusters() > 1 {
let (_, training_data) = extract_part(&data, 5);
training_data
} else {
data.to_owned()
};
let dataset = Dataset::from(training);
let gmx = if self.gmx().is_some() {
self.gmx().unwrap().clone()
} else {
trace!("GMM training...");
let gmm = GaussianMixtureModel::params(n_clusters)
.n_runs(20)
.with_rng(self.rng())
.fit(&dataset)?;
let weights = gmm.weights().to_owned();
let means = gmm.means().slice(s![.., ..nx]).to_owned();
let covariances = gmm.covariances().slice(s![.., ..nx, ..nx]).to_owned();
let factor = match recomb {
Recombination::Smooth(Some(f)) => f,
Recombination::Smooth(None) => 1.,
Recombination::Hard => 1.,
};
GaussianMixture::new(weights, means, covariances)?.heaviside_factor(factor)
};
trace!("Train on clusters...");
let clustering = Clustering::new(gmx, recomb);
self.train_on_clusters(&xt.view(), &yt.view(), &clustering)
}
pub fn train_on_clusters(
&self,
xt: &ArrayBase<impl Data<Elem = f64>, Ix2>,
yt: &ArrayBase<impl Data<Elem = f64>, Ix2>,
clustering: &Clustering,
) -> Result<GpMixture> {
let gmx = clustering.gmx();
let recomb = clustering.recombination();
let nx = xt.ncols();
let data = concatenate(Axis(1), &[xt.view(), yt.view()]).unwrap();
let dataset_clustering = gmx.predict(xt);
let clusters = sort_by_cluster(gmx.n_clusters(), &data, &dataset_clustering);
check_number_of_points(&clusters, xt.ncols())?;
let mut experts = Vec::new();
let nb_clusters = clusters.len();
for (nc, cluster) in clusters.iter().enumerate() {
if nb_clusters > 1 && cluster.nrows() < 3 {
return Err(MoeError::ClusteringError(format!(
"Not enough points in cluster, requires at least 3, got {}",
cluster.nrows()
)));
}
debug!("nc={} theta_tuning={:?}", nc, self.theta_tunings());
let expert = self.find_best_expert(nc, nx, cluster)?;
experts.push(expert);
}
if recomb == Recombination::Smooth(None) && self.n_clusters() > 1 {
let (test, _) = extract_part(&data, 5);
let xtest = test.slice(s![.., ..nx]).to_owned();
let ytest = test.slice(s![.., nx..]).to_owned();
let factor = self.optimize_heaviside_factor(&experts, gmx, &xtest, &ytest);
info!("Retrain mixture with optimized heaviside factor={}", factor);
let moe = GpMixtureParams::from(self.clone())
.n_clusters(gmx.n_clusters())
.recombination(Recombination::Smooth(Some(factor)))
.check()?
.train(xt, yt)?; Ok(moe)
} else {
Ok(GpMixture {
gp_type: self.gp_type().clone(),
recombination: recomb,
experts,
gmx: gmx.clone(),
training_data: (xt.to_owned(), yt.to_owned()),
params: self.clone(),
})
}
}
fn find_best_expert(
&self,
nc: usize,
nx: usize,
data: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Box<dyn FullGpSurrogate>> {
let xtrain = data.slice(s![.., ..nx]).to_owned();
let ytrain = data.slice(s![.., nx..]).to_owned();
let mut dataset = Dataset::from((xtrain.clone(), ytrain.clone()));
let regression_spec = self.regression_spec();
let mut allowed_means = vec![];
check_allowed!(regression_spec, Regression, Constant, allowed_means);
check_allowed!(regression_spec, Regression, Linear, allowed_means);
check_allowed!(regression_spec, Regression, Quadratic, allowed_means);
let correlation_spec = self.correlation_spec();
let mut allowed_corrs = vec![];
check_allowed!(
correlation_spec,
Correlation,
SquaredExponential,
allowed_corrs
);
check_allowed!(
correlation_spec,
Correlation,
AbsoluteExponential,
allowed_corrs
);
check_allowed!(correlation_spec, Correlation, Matern32, allowed_corrs);
check_allowed!(correlation_spec, Correlation, Matern52, allowed_corrs);
debug!("Find best expert");
let best = if allowed_means.len() == 1 && allowed_corrs.len() == 1 {
(format!("{}_{}", allowed_means[0], allowed_corrs[0]), None) } else {
let mut map_error = Vec::new();
compute_errors!(self, allowed_means, allowed_corrs, dataset, map_error);
let errs: Vec<f64> = map_error.iter().map(|(_, err)| *err).collect();
debug!("Accuracies {:?}", map_error);
let argmin = errs
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(Ordering::Equal))
.map(|(index, _)| index)
.unwrap();
(map_error[argmin].0.clone(), Some(map_error[argmin].1))
};
debug!("after Find best expert");
let expert = match self.gp_type() {
GpType::FullGp { .. } => {
let best_expert_params: std::result::Result<Box<dyn GpSurrogateParams>, MoeError> =
match best.0.as_str() {
"Constant_SquaredExponential" => {
Ok(make_surrogate_params!(Constant, SquaredExponential))
}
"Constant_AbsoluteExponential" => {
Ok(make_surrogate_params!(Constant, AbsoluteExponential))
}
"Constant_Matern32" => Ok(make_surrogate_params!(Constant, Matern32)),
"Constant_Matern52" => Ok(make_surrogate_params!(Constant, Matern52)),
"Linear_SquaredExponential" => {
Ok(make_surrogate_params!(Linear, SquaredExponential))
}
"Linear_AbsoluteExponential" => {
Ok(make_surrogate_params!(Linear, AbsoluteExponential))
}
"Linear_Matern32" => Ok(make_surrogate_params!(Linear, Matern32)),
"Linear_Matern52" => Ok(make_surrogate_params!(Linear, Matern52)),
"Quadratic_SquaredExponential" => {
Ok(make_surrogate_params!(Quadratic, SquaredExponential))
}
"Quadratic_AbsoluteExponential" => {
Ok(make_surrogate_params!(Quadratic, AbsoluteExponential))
}
"Quadratic_Matern32" => Ok(make_surrogate_params!(Quadratic, Matern32)),
"Quadratic_Matern52" => Ok(make_surrogate_params!(Quadratic, Matern52)),
_ => {
return Err(MoeError::ExpertError(format!("Unknown expert {}", best.0)))
}
};
let mut expert_params = best_expert_params?;
expert_params.n_start(self.n_start());
expert_params.kpls_dim(self.kpls_dim());
if nc > 0 && self.theta_tunings().len() == 1 {
expert_params.theta_tuning(self.theta_tunings()[0].clone());
} else {
expert_params.theta_tuning(self.theta_tunings()[nc].clone());
}
debug!("Train best expert...");
expert_params.train(&xtrain.view(), &ytrain.view())
}
GpType::SparseGp {
inducings,
sparse_method,
..
} => {
let inducings = inducings.to_owned();
let best_expert_params: std::result::Result<Box<dyn SgpSurrogateParams>, MoeError> =
match best.0.as_str() {
"Constant_SquaredExponential" => {
Ok(make_sgp_surrogate_params!(SquaredExponential, inducings))
}
"Constant_AbsoluteExponential" => {
Ok(make_sgp_surrogate_params!(AbsoluteExponential, inducings))
}
"Constant_Matern32" => Ok(make_sgp_surrogate_params!(Matern32, inducings)),
"Constant_Matern52" => Ok(make_sgp_surrogate_params!(Matern52, inducings)),
_ => {
return Err(MoeError::ExpertError(format!("Unknown expert {}", best.0)))
}
};
let mut expert_params = best_expert_params?;
let seed = self.rng().gen();
debug!("Theta tuning = {:?}", self.theta_tunings());
expert_params.sparse_method(*sparse_method);
expert_params.seed(seed);
expert_params.n_start(self.n_start());
expert_params.kpls_dim(self.kpls_dim());
expert_params.theta_tuning(self.theta_tunings()[0].clone());
debug!("Train best expert...");
expert_params.train(&xtrain.view(), &ytrain.view())
}
};
debug!("...after best expert training");
if let Some(v) = best.1 {
info!("Best expert {} accuracy={}", best.0, v);
}
expert.map_err(MoeError::from)
}
fn optimize_heaviside_factor(
&self,
experts: &[Box<dyn FullGpSurrogate>],
gmx: &GaussianMixture<f64>,
xtest: &ArrayBase<impl Data<Elem = f64>, Ix2>,
ytest: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> f64 {
if self.recombination() == Recombination::Hard || self.n_clusters() == 1 {
1.
} else {
let scale_factors = Array1::linspace(0.1, 2.1, 20);
let errors = scale_factors.map(move |&factor| {
let gmx2 = gmx.clone();
let gmx2 = gmx2.heaviside_factor(factor);
let pred = predict_smooth(experts, &gmx2, xtest).unwrap();
pred.sub(ytest).mapv(|x| x * x).sum().sqrt() / xtest.mapv(|x| x * x).sum().sqrt()
});
let min_error_index = errors.argmin().unwrap();
if *errors.max().unwrap() < 1e-6 {
1.
} else {
scale_factors[min_error_index]
}
}
}
}
fn check_number_of_points<F>(
clusters: &[ArrayBase<impl Data<Elem = F>, Ix2>],
dim: usize,
) -> Result<()> {
if clusters.len() > 1 {
let min_number_point = (dim + 1) * (dim + 2) / 2;
for cluster in clusters {
if cluster.len() < min_number_point {
return Err(MoeError::ClusteringError(format!(
"Not enough points in training set. Need {} points, got {}",
min_number_point,
cluster.len()
)));
}
}
}
Ok(())
}
fn predict_smooth(
experts: &[Box<dyn FullGpSurrogate>],
gmx: &GaussianMixture<f64>,
points: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let probas = gmx.predict_probas(points);
let mut preds = Array1::<f64>::zeros(points.nrows());
Zip::from(&mut preds)
.and(points.rows())
.and(probas.rows())
.for_each(|y, x, p| {
let x = x.insert_axis(Axis(0));
let preds: Array1<f64> = experts
.iter()
.map(|gp| gp.predict(&x).unwrap()[[0, 0]])
.collect();
*y = (preds * p).sum();
});
Ok(preds.insert_axis(Axis(1)))
}
#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
pub struct GpMixture {
recombination: Recombination<f64>,
experts: Vec<Box<dyn FullGpSurrogate>>,
gmx: GaussianMixture<f64>,
gp_type: GpType<f64>,
training_data: (Array2<f64>, Array2<f64>),
params: GpMixtureValidParams<f64>,
}
impl std::fmt::Display for GpMixture {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let recomb = match self.recombination() {
Recombination::Hard => "Hard".to_string(),
Recombination::Smooth(Some(f)) => format!("Smooth({f})"),
Recombination::Smooth(None) => "Smooth".to_string(),
};
let experts = self
.experts
.iter()
.map(|expert| expert.to_string())
.reduce(|acc, s| acc + ", " + &s)
.unwrap();
write!(f, "Mixture[{}]({})", &recomb, &experts)
}
}
impl Clustered for GpMixture {
fn n_clusters(&self) -> usize {
self.gmx.n_clusters()
}
fn recombination(&self) -> Recombination<f64> {
self.recombination()
}
fn to_clustering(&self) -> Clustering {
Clustering {
recombination: self.recombination(),
gmx: self.gmx.clone(),
}
}
}
#[cfg_attr(feature = "serializable", typetag::serde)]
impl GpSurrogate for GpMixture {
fn dims(&self) -> (usize, usize) {
self.experts[0].dims()
}
fn predict(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
match self.recombination {
Recombination::Hard => self.predict_hard(x),
Recombination::Smooth(_) => self.predict_smooth(x),
}
}
fn predict_var(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
match self.recombination {
Recombination::Hard => self.predict_var_hard(x),
Recombination::Smooth(_) => self.predict_var_smooth(x),
}
}
#[cfg(feature = "persistent")]
fn save(&self, path: &str) -> Result<()> {
let mut file = fs::File::create(path).unwrap();
let bytes = match serde_json::to_string(self) {
Ok(b) => b,
Err(err) => return Err(MoeError::SaveError(err)),
};
file.write_all(bytes.as_bytes())?;
Ok(())
}
}
#[cfg_attr(feature = "serializable", typetag::serde)]
impl GpSurrogateExt for GpMixture {
fn predict_gradients(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
match self.recombination {
Recombination::Hard => self.predict_gradients_hard(x),
Recombination::Smooth(_) => self.predict_gradients_smooth(x),
}
}
fn predict_var_gradients(&self, x: &ArrayView2<f64>) -> Result<Array2<f64>> {
match self.recombination {
Recombination::Hard => self.predict_var_gradients_hard(x),
Recombination::Smooth(_) => self.predict_var_gradients_smooth(x),
}
}
fn sample(&self, x: &ArrayView2<f64>, n_traj: usize) -> Result<Array2<f64>> {
if self.n_clusters() != 1 {
return Err(MoeError::SampleError(format!(
"Can not sample when several clusters {}",
self.n_clusters()
)));
}
self.sample_expert(0, x, n_traj)
}
}
impl CrossValScore<f64, MoeError, GpMixtureParams<f64>, Self> for GpMixture {
fn training_data(&self) -> &(Array2<f64>, Array2<f64>) {
&self.training_data
}
fn params(&self) -> GpMixtureParams<f64> {
GpMixtureParams::<f64>::from(self.params.clone())
}
}
impl MixtureGpSurrogate for GpMixture {
fn experts(&self) -> &Vec<Box<dyn FullGpSurrogate>> {
&self.experts
}
}
impl GpMixture {
pub fn params() -> GpMixtureParams<f64> {
GpMixtureParams::new()
}
pub fn gp_type(&self) -> &GpType<f64> {
&self.gp_type
}
pub fn recombination(&self) -> Recombination<f64> {
self.recombination
}
pub fn gmx(&self) -> &GaussianMixture<f64> {
&self.gmx
}
pub fn output_dim(&self) -> usize {
let (_, res) = self.experts[0].dims();
res
}
pub fn set_recombination(mut self, recombination: Recombination<f64>) -> Self {
self.recombination = match recombination {
Recombination::Hard => recombination,
Recombination::Smooth(None) => Recombination::Smooth(Some(1.)),
Recombination::Smooth(Some(_)) => recombination,
};
self
}
pub fn set_gmx(
mut self,
weights: Array1<f64>,
means: Array2<f64>,
covariances: Array3<f64>,
) -> Self {
self.gmx = GaussianMixture::new(weights, means, covariances).unwrap();
self
}
pub fn set_experts(mut self, experts: Vec<Box<dyn FullGpSurrogate>>) -> Self {
self.experts = experts;
self
}
pub fn predict_smooth(&self, x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Result<Array2<f64>> {
predict_smooth(&self.experts, &self.gmx, x)
}
pub fn predict_var_smooth(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let probas = self.gmx.predict_probas(x);
let mut preds = Array1::<f64>::zeros(x.nrows());
Zip::from(&mut preds)
.and(x.rows())
.and(probas.rows())
.for_each(|y, x, p| {
let x = x.insert_axis(Axis(0));
let preds: Array1<f64> = self
.experts
.iter()
.map(|gp| gp.predict_var(&x).unwrap()[[0, 0]])
.collect();
*y = (preds * p * p).sum();
});
Ok(preds.insert_axis(Axis(1)))
}
pub fn predict_gradients_smooth(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let probas = self.gmx.predict_probas(x);
let probas_drv = self.gmx.predict_probas_derivatives(x);
let mut drv = Array2::<f64>::zeros((x.nrows(), x.ncols()));
Zip::from(drv.rows_mut())
.and(x.rows())
.and(probas.rows())
.and(probas_drv.outer_iter())
.for_each(|mut y, x, p, pprime| {
let x = x.insert_axis(Axis(0));
let preds: Array1<f64> = self
.experts
.iter()
.map(|gp| gp.predict(&x).unwrap()[[0, 0]])
.collect();
let drvs: Vec<Array1<f64>> = self
.experts
.iter()
.map(|gp| gp.predict_gradients(&x).unwrap().row(0).to_owned())
.collect();
let preds = preds.insert_axis(Axis(1));
let mut preds_drv = Array2::zeros((self.experts.len(), x.len()));
Zip::indexed(preds_drv.rows_mut()).for_each(|i, mut jc| jc.assign(&drvs[i]));
let mut term1 = Array2::zeros((self.experts.len(), x.len()));
Zip::from(term1.rows_mut())
.and(&p)
.and(preds_drv.rows())
.for_each(|mut t, p, der| t.assign(&(der.to_owned().mapv(|v| v * p))));
let term1 = term1.sum_axis(Axis(0));
let term2 = pprime.to_owned() * preds;
let term2 = term2.sum_axis(Axis(0));
y.assign(&(term1 + term2));
});
Ok(drv)
}
pub fn predict_var_gradients_smooth(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let probas = self.gmx.predict_probas(x);
let probas_drv = self.gmx.predict_probas_derivatives(x);
let mut drv = Array2::<f64>::zeros((x.nrows(), x.ncols()));
Zip::from(drv.rows_mut())
.and(x.rows())
.and(probas.rows())
.and(probas_drv.outer_iter())
.for_each(|mut y, xi, p, pprime| {
let xii = xi.insert_axis(Axis(0));
let preds: Array1<f64> = self
.experts
.iter()
.map(|gp| gp.predict_var(&xii).unwrap()[[0, 0]])
.collect();
let drvs: Vec<Array1<f64>> = self
.experts
.iter()
.map(|gp| gp.predict_var_gradients(&xii).unwrap().row(0).to_owned())
.collect();
let preds = preds.insert_axis(Axis(1));
let mut preds_drv = Array2::zeros((self.experts.len(), xi.len()));
Zip::indexed(preds_drv.rows_mut()).for_each(|i, mut jc| jc.assign(&drvs[i]));
let mut term1 = Array2::zeros((self.experts.len(), xi.len()));
Zip::from(term1.rows_mut())
.and(&p)
.and(preds_drv.rows())
.for_each(|mut t, p, der| t.assign(&(der.to_owned().mapv(|v| v * p * p))));
let term1 = term1.sum_axis(Axis(0));
let term2 = (p.to_owned() * pprime * preds).mapv(|v| 2. * v);
let term2 = term2.sum_axis(Axis(0));
y.assign(&(term1 + term2));
});
Ok(drv)
}
pub fn predict_hard(&self, x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Result<Array2<f64>> {
let clustering = self.gmx.predict(x);
trace!("Clustering {:?}", clustering);
let mut preds = Array2::zeros((x.nrows(), 1));
Zip::from(preds.rows_mut())
.and(x.rows())
.and(&clustering)
.for_each(|mut y, x, &c| {
y.assign(
&self.experts[c]
.predict(&x.insert_axis(Axis(0)))
.unwrap()
.row(0),
);
});
Ok(preds)
}
pub fn predict_var_hard(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let clustering = self.gmx.predict(x);
trace!("Clustering {:?}", clustering);
let mut variances = Array2::zeros((x.nrows(), 1));
Zip::from(variances.rows_mut())
.and(x.rows())
.and(&clustering)
.for_each(|mut y, x, &c| {
y.assign(
&self.experts[c]
.predict_var(&x.insert_axis(Axis(0)))
.unwrap()
.row(0),
);
});
Ok(variances)
}
pub fn predict_gradients_hard(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let mut drv = Array2::<f64>::zeros((x.nrows(), x.ncols()));
let clustering = self.gmx.predict(x);
Zip::from(drv.rows_mut())
.and(x.rows())
.and(&clustering)
.for_each(|mut drv_i, xi, &c| {
let x = xi.to_owned().insert_axis(Axis(0));
let x_drv: ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 2]>> =
self.experts[c].predict_gradients(&x.view()).unwrap();
drv_i.assign(&x_drv.column(0))
});
Ok(drv)
}
pub fn predict_var_gradients_hard(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
let mut vardrv = Array2::<f64>::zeros((x.nrows(), x.ncols()));
let clustering = self.gmx.predict(x);
Zip::from(vardrv.rows_mut())
.and(x.rows())
.and(&clustering)
.for_each(|mut vardrv_i, xi, &c| {
let x = xi.to_owned().insert_axis(Axis(0));
let x_vardrv: ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 2]>> =
self.experts[c].predict_var_gradients(&x.view()).unwrap();
vardrv_i.assign(&x_vardrv.row(0))
});
Ok(vardrv)
}
pub fn sample_expert(
&self,
ith: usize,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
n_traj: usize,
) -> Result<Array2<f64>> {
self.experts[ith].sample(&x.view(), n_traj)
}
pub fn predict(&self, x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Result<Array2<f64>> {
<GpMixture as GpSurrogate>::predict(self, &x.view())
}
pub fn predict_var(&self, x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Result<Array2<f64>> {
<GpMixture as GpSurrogate>::predict_var(self, &x.view())
}
pub fn predict_gradients(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
<GpMixture as GpSurrogateExt>::predict_gradients(self, &x.view())
}
pub fn predict_var_gradients(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
) -> Result<Array2<f64>> {
<GpMixture as GpSurrogateExt>::predict_var_gradients(self, &x.view())
}
pub fn sample(
&self,
x: &ArrayBase<impl Data<Elem = f64>, Ix2>,
n_traj: usize,
) -> Result<Array2<f64>> {
<GpMixture as GpSurrogateExt>::sample(self, &x.view(), n_traj)
}
#[cfg(feature = "persistent")]
pub fn load(path: &str) -> Result<Box<GpMixture>> {
let data = fs::read_to_string(path)?;
let moe: GpMixture = serde_json::from_str(&data).unwrap();
Ok(Box::new(moe))
}
}
fn extract_part<F: Float>(
data: &ArrayBase<impl Data<Elem = F>, Ix2>,
quantile: usize,
) -> (Array2<F>, Array2<F>) {
let nsamples = data.nrows();
let indices = Array1::range(0., nsamples as f32, quantile as f32).mapv(|v| v as usize);
let data_test = data.select(Axis(0), indices.as_slice().unwrap());
let indices2: Vec<usize> = (0..nsamples).filter(|i| i % quantile != 0).collect();
let data_train = data.select(Axis(0), &indices2);
(data_test, data_train)
}
impl<D: Data<Elem = f64>> PredictInplace<ArrayBase<D, Ix2>, Array2<f64>> for GpMixture {
fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array2<f64>) {
assert_eq!(
x.nrows(),
y.nrows(),
"The number of data points must match the number of output targets."
);
let values = self.predict(x).expect("MoE prediction");
*y = values;
}
fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<f64> {
Array2::zeros((x.nrows(), self.dims().1))
}
}
#[allow(dead_code)]
pub struct MoeVariancePredictor<'a>(&'a GpMixture);
impl<'a, D: Data<Elem = f64>> PredictInplace<ArrayBase<D, Ix2>, Array2<f64>>
for MoeVariancePredictor<'a>
{
fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array2<f64>) {
assert_eq!(
x.nrows(),
y.nrows(),
"The number of data points must match the number of output targets."
);
let values = self.0.predict_var(x).expect("MoE variances prediction");
*y = values;
}
fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<f64> {
Array2::zeros((x.nrows(), self.0.dims().1))
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use argmin_testfunctions::rosenbrock;
use egobox_doe::{Lhs, SamplingMethod};
use ndarray::{array, Array, Array2, Zip};
use ndarray_npy::write_npy;
use ndarray_rand::rand::SeedableRng;
use ndarray_rand::rand_distr::Uniform;
use ndarray_rand::RandomExt;
use rand_xoshiro::Xoshiro256Plus;
fn f_test_1d(x: &Array2<f64>) -> Array2<f64> {
let mut y = Array2::zeros(x.dim());
Zip::from(&mut y).and(x).for_each(|yi, &xi| {
if xi < 0.4 {
*yi = xi * xi;
} else if (0.4..0.8).contains(&xi) {
*yi = 3. * xi + 1.;
} else {
*yi = f64::sin(10. * xi);
}
});
y
}
fn df_test_1d(x: &Array2<f64>) -> Array2<f64> {
let mut y = Array2::zeros(x.dim());
Zip::from(&mut y).and(x).for_each(|yi, &xi| {
if xi < 0.4 {
*yi = 2. * xi;
} else if (0.4..0.8).contains(&xi) {
*yi = 3.;
} else {
*yi = 10. * f64::cos(10. * xi);
}
});
y
}
#[test]
fn test_moe_hard() {
let mut rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Array2::random_using((50, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let moe = GpMixture::params()
.n_clusters(3)
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.recombination(Recombination::Hard)
.with_rng(rng)
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
let x = Array1::linspace(0., 1., 30).insert_axis(Axis(1));
let preds = moe.predict(&x).expect("MOE prediction");
let dpreds = moe.predict_gradients(&x).expect("MOE drv prediction");
println!("dpred = {dpreds}");
let test_dir = "target/tests";
std::fs::create_dir_all(test_dir).ok();
write_npy(format!("{test_dir}/x_hard.npy"), &x).expect("x saved");
write_npy(format!("{test_dir}/preds_hard.npy"), &preds).expect("preds saved");
write_npy(format!("{test_dir}/dpreds_hard.npy"), &dpreds).expect("dpreds saved");
assert_abs_diff_eq!(
0.39 * 0.39,
moe.predict(&array![[0.39]]).unwrap()[[0, 0]],
epsilon = 1e-4
);
assert_abs_diff_eq!(
f64::sin(10. * 0.82),
moe.predict(&array![[0.82]]).unwrap()[[0, 0]],
epsilon = 1e-4
);
println!("LOOCV = {}", moe.loocv_score());
}
#[test]
fn test_moe_smooth() {
let test_dir = "target/tests";
let mut rng = Xoshiro256Plus::seed_from_u64(42);
let xt = Array2::random_using((60, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let ds = Dataset::new(xt.to_owned(), yt.to_owned());
let moe = GpMixture::params()
.n_clusters(3)
.recombination(Recombination::Smooth(Some(0.5)))
.with_rng(rng.clone())
.fit(&ds)
.expect("MOE fitted");
let x = Array1::linspace(0., 1., 100).insert_axis(Axis(1));
let preds = moe.predict(&x).expect("MOE prediction");
write_npy(format!("{test_dir}/xt.npy"), &xt).expect("x saved");
write_npy(format!("{test_dir}/yt.npy"), &yt).expect("preds saved");
write_npy(format!("{test_dir}/x_smooth.npy"), &x).expect("x saved");
write_npy(format!("{test_dir}/preds_smooth.npy"), &preds).expect("preds saved");
println!("Smooth moe {moe}");
assert_abs_diff_eq!(
0.2623, moe.predict(&array![[0.37]]).unwrap()[[0, 0]],
epsilon = 1e-3
);
let moe = GpMixture::params()
.n_clusters(3)
.recombination(Recombination::Smooth(None))
.with_rng(rng.clone())
.fit(&ds)
.expect("MOE fitted");
println!("Smooth moe {moe}");
std::fs::create_dir_all(test_dir).ok();
let x = Array1::linspace(0., 1., 100).insert_axis(Axis(1));
let preds = moe.predict(&x).expect("MOE prediction");
write_npy(format!("{test_dir}/x_smooth2.npy"), &x).expect("x saved");
write_npy(format!("{test_dir}/preds_smooth2.npy"), &preds).expect("preds saved");
assert_abs_diff_eq!(
0.37 * 0.37, moe.predict(&array![[0.37]]).unwrap()[[0, 0]],
epsilon = 1e-3
);
}
#[test]
fn test_moe_auto() {
let mut rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Array2::random_using((100, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let ds = Dataset::new(xt, yt);
let moe = GpMixture::params()
.n_clusters(0)
.with_rng(rng.clone())
.fit(&ds)
.expect("MOE fitted");
println!(
"Moe auto: nb clusters={}, recomb={:?}",
moe.n_clusters(),
moe.recombination()
);
assert_abs_diff_eq!(
0.37 * 0.37, moe.predict(&array![[0.37]]).unwrap()[[0, 0]],
epsilon = 1e-3
);
}
#[test]
fn test_moe_variances_smooth() {
let mut rng = Xoshiro256Plus::seed_from_u64(42);
let xt = Array2::random_using((100, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let moe = GpMixture::params()
.n_clusters(3)
.recombination(Recombination::Smooth(None))
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.with_rng(rng.clone())
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
let x = Array1::linspace(0., 1., 20).insert_axis(Axis(1));
let variances = moe.predict_var(&x).expect("MOE variances prediction");
assert_abs_diff_eq!(*variances.max().unwrap(), 0., epsilon = 1e-10);
}
fn xsinx(x: &[f64]) -> f64 {
(x[0] - 3.5) * f64::sin((x[0] - 3.5) / std::f64::consts::PI)
}
#[test]
fn test_find_best_expert() {
let mut rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Array2::random_using((10, 1), Uniform::new(0., 1.), &mut rng);
let yt = xt.mapv(|x| xsinx(&[x]));
let data = concatenate(Axis(1), &[xt.view(), yt.view()]).unwrap();
let moe = GpMixture::params().with_rng(rng).check_unwrap();
let best_expert = &moe.find_best_expert(0, 1, &data).unwrap();
println!("Best expert {best_expert}");
}
#[test]
fn test_find_best_heaviside_factor() {
let mut rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Array2::random_using((50, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let _moe = GpMixture::params()
.n_clusters(3)
.with_rng(rng)
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
}
#[cfg(feature = "persistent")]
#[test]
fn test_save_load_moe() {
let test_dir = "target/tests";
std::fs::create_dir_all(test_dir).ok();
let mut rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Array2::random_using((50, 1), Uniform::new(0., 1.), &mut rng);
let yt = f_test_1d(&xt);
let ds = Dataset::new(xt, yt);
let moe = GpMixture::params()
.n_clusters(3)
.with_rng(rng)
.fit(&ds)
.expect("MOE fitted");
let xtest = array![[0.6]];
let y_expected = moe.predict(&xtest).unwrap();
let filename = format!("{test_dir}/saved_moe.json");
moe.save(&filename).expect("MoE saving");
let new_moe = GpMixture::load(&filename).expect("MoE loading");
assert_abs_diff_eq!(y_expected, new_moe.predict(&xtest).unwrap(), epsilon = 1e-6);
}
#[test]
fn test_moe_drv_smooth() {
let rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Lhs::new(&array![[0., 1.]]).sample(100);
let yt = f_test_1d(&xt);
let moe = GpMixture::params()
.n_clusters(3)
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.recombination(Recombination::Smooth(Some(0.5)))
.with_rng(rng)
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
let x = Array1::linspace(0., 1., 50).insert_axis(Axis(1));
let preds = moe.predict(&x).expect("MOE prediction");
let dpreds = moe.predict_gradients(&x).expect("MOE drv prediction");
let test_dir = "target/tests";
std::fs::create_dir_all(test_dir).ok();
write_npy(format!("{test_dir}/x_moe_smooth.npy"), &x).expect("x saved");
write_npy(format!("{test_dir}/preds_moe_smooth.npy"), &preds).expect("preds saved");
write_npy(format!("{test_dir}/dpreds_moe_smooth.npy"), &dpreds).expect("dpreds saved");
let mut rng = Xoshiro256Plus::seed_from_u64(42);
for _ in 0..20 {
let x1: f64 = rng.gen_range(0.0..1.0);
let h = 1e-8;
let xtest = array![[x1]];
let x = array![[x1], [x1 + h], [x1 - h]];
let preds = moe.predict(&x).unwrap();
let fdiff = (preds[[1, 0]] - preds[[2, 0]]) / (2. * h);
let drv = moe.predict_gradients(&xtest).unwrap();
let df = df_test_1d(&xtest);
let err = if drv[[0, 0]] < 0.2 {
(drv[[0, 0]] - fdiff).abs()
} else {
(drv[[0, 0]] - fdiff).abs() / drv[[0, 0]]
};
println!(
"Test predicted derivatives at {xtest}: drv {drv}, true df {df}, fdiff {fdiff}"
);
println!("preds(x, x+h, x-h)={}", preds);
assert_abs_diff_eq!(err, 0.0, epsilon = 2.5e-1);
}
}
fn norm1(x: &Array2<f64>) -> Array2<f64> {
x.mapv(|v| v.abs())
.sum_axis(Axis(1))
.insert_axis(Axis(1))
.to_owned()
}
fn rosenb(x: &Array2<f64>) -> Array2<f64> {
let mut y: Array2<f64> = Array2::zeros((x.nrows(), 1));
Zip::from(y.rows_mut())
.and(x.rows())
.par_for_each(|mut yi, xi| yi.assign(&array![rosenbrock(&xi.to_vec())]));
y
}
#[allow(clippy::excessive_precision)]
fn test_variance_derivatives(f: fn(&Array2<f64>) -> Array2<f64>) {
let rng = Xoshiro256Plus::seed_from_u64(0);
let xt = egobox_doe::FullFactorial::new(&array![[-1., 1.], [-1., 1.]]).sample(100);
let yt = f(&xt);
let moe = GpMixture::params()
.n_clusters(2)
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.recombination(Recombination::Smooth(Some(1.)))
.with_rng(rng)
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
for _ in 0..20 {
let mut rng = Xoshiro256Plus::seed_from_u64(42);
let x = Array::random_using((2,), Uniform::new(0., 1.), &mut rng);
let xa: f64 = x[0];
let xb: f64 = x[1];
let e = 1e-4;
println!("Test derivatives at [{xa}, {xb}]");
let x = array![
[xa, xb],
[xa + e, xb],
[xa - e, xb],
[xa, xb + e],
[xa, xb - e]
];
let y_pred = moe.predict(&x).unwrap();
let y_deriv = moe.predict_gradients(&x).unwrap();
let diff_g = (y_pred[[1, 0]] - y_pred[[2, 0]]) / (2. * e);
let diff_d = (y_pred[[3, 0]] - y_pred[[4, 0]]) / (2. * e);
assert_rel_or_abs_error(y_deriv[[0, 0]], diff_g);
assert_rel_or_abs_error(y_deriv[[0, 1]], diff_d);
let y_pred = moe.predict_var(&x).unwrap();
let y_deriv = moe.predict_var_gradients(&x).unwrap();
let diff_g = (y_pred[[1, 0]] - y_pred[[2, 0]]) / (2. * e);
let diff_d = (y_pred[[3, 0]] - y_pred[[4, 0]]) / (2. * e);
assert_rel_or_abs_error(y_deriv[[0, 0]], diff_g);
assert_rel_or_abs_error(y_deriv[[0, 1]], diff_d);
}
}
fn assert_rel_or_abs_error(y_deriv: f64, fdiff: f64) {
println!("analytic deriv = {y_deriv}, fdiff = {fdiff}");
if fdiff.abs() < 1e-2 {
assert_abs_diff_eq!(y_deriv, 0.0, epsilon = 1e-1); } else {
let drv_rel_error1 = (y_deriv - fdiff).abs() / fdiff; assert_abs_diff_eq!(drv_rel_error1, 0.0, epsilon = 1e-1);
}
}
#[test]
fn test_moe_var_deriv_norm1() {
test_variance_derivatives(norm1);
}
#[test]
fn test_moe_var_deriv_rosenb() {
test_variance_derivatives(rosenb);
}
#[test]
fn test_moe_display() {
let rng = Xoshiro256Plus::seed_from_u64(0);
let xt = Lhs::new(&array![[0., 1.]])
.with_rng(rng.clone())
.sample(100);
let yt = f_test_1d(&xt);
let moe = GpMixture::params()
.n_clusters(3)
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.recombination(Recombination::Hard)
.with_rng(rng)
.fit(&Dataset::new(xt, yt))
.expect("MOE fitted");
println!("Display moe: {}", moe);
}
fn griewank(x: &Array2<f64>) -> Array2<f64> {
let dim = x.ncols();
let d = Array1::linspace(1., dim as f64, dim).mapv(|v| v.sqrt());
let mut y = Array2::zeros((x.nrows(), 1));
Zip::from(y.rows_mut()).and(x.rows()).for_each(|mut y, x| {
let s = x.mapv(|v| v * v).sum() / 4000.;
let p = (x.to_owned() / &d)
.mapv(|v| v.cos())
.fold(1., |acc, x| acc * x);
y[0] = s - p + 1.;
});
y
}
#[test]
fn test_kpls_griewank() {
let dims = [100];
let nts = [100];
let lim = array![[-600., 600.]];
let test_dir = "target/tests";
std::fs::create_dir_all(test_dir).ok();
(0..1).for_each(|i| {
let dim = dims[i];
let nt = nts[i];
let xlimits = lim.broadcast((dim, 2)).unwrap();
let prefix = "griewank";
let xfilename = format!("{test_dir}/{prefix}_xt_{nt}x{dim}.npy");
let yfilename = format!("{test_dir}/{prefix}_yt_{nt}x1.npy");
let rng = Xoshiro256Plus::seed_from_u64(42);
let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt);
write_npy(xfilename, &xt).expect("cannot save xt");
let yt = griewank(&xt);
write_npy(yfilename, &yt).expect("cannot save yt");
let gp = GpMixture::params()
.n_clusters(1)
.regression_spec(RegressionSpec::CONSTANT)
.correlation_spec(CorrelationSpec::SQUAREDEXPONENTIAL)
.kpls_dim(Some(3))
.fit(&Dataset::new(xt, yt))
.expect("GP fit error");
let rng = Xoshiro256Plus::seed_from_u64(0);
let xtest = Lhs::new(&xlimits).with_rng(rng).sample(100);
let ytest = gp.predict(&xtest).expect("prediction error");
let ytrue = griewank(&xtest);
let nrmse = (ytrue.to_owned() - &ytest).norm_l2() / ytrue.norm_l2();
println!(
"diff={} ytrue={} nrsme={}",
(ytrue.to_owned() - &ytest).norm_l2(),
ytrue.norm_l2(),
nrmse
);
assert_abs_diff_eq!(nrmse, 0., epsilon = 1e-2);
});
}
}