#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
#[cfg(feature = "std")]
use std::vec::Vec;
use core::cmp::Ordering::Equal;
use core::fmt::Debug;
use num_traits::Float;
use crate::primitives::buffer::CVBuffer;
#[derive(Debug, Clone)]
struct SimpleRng {
state: u64,
}
impl SimpleRng {
fn new(seed: u64) -> Self {
Self { state: seed }
}
fn next_u32(&mut self) -> u32 {
self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
(self.state >> 32) as u32
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CVKind {
KFold(usize),
#[allow(clippy::upper_case_acronyms)]
LOOCV,
}
#[derive(Debug, Clone)]
pub struct CVConfig<'a, T> {
pub(crate) kind: CVKind,
pub(crate) fractions: &'a [T],
pub(crate) seed: Option<u64>,
}
impl<'a, T> CVConfig<'a, T> {
pub fn seed(mut self, seed: u64) -> Self {
self.seed = Some(seed);
self
}
pub fn fractions(&self) -> &[T] {
self.fractions
}
pub fn kind(&self) -> CVKind {
self.kind
}
pub fn get_seed(&self) -> Option<u64> {
self.seed
}
}
#[allow(non_snake_case)]
pub fn KFold<T>(k: usize, fractions: &[T]) -> CVConfig<'_, T> {
CVConfig {
kind: CVKind::KFold(k),
fractions,
seed: None,
}
}
#[allow(non_snake_case)]
pub fn LOOCV<T>(fractions: &[T]) -> CVConfig<'_, T> {
CVConfig {
kind: CVKind::LOOCV,
fractions,
seed: None,
}
}
impl CVKind {
#[allow(clippy::too_many_arguments)]
pub fn run<T, F, P>(
self,
x: &[T],
y: &[T],
dimensions: usize,
fractions: &[T],
seed: Option<u64>,
mut smoother: F,
mut predictor: Option<P>,
cv_buffer: &mut CVBuffer<T>,
) -> (T, Vec<T>)
where
T: Float + Debug + Send + Sync + 'static,
F: FnMut(&[T], &[T], T) -> Vec<T>,
P: FnMut(&[T], &[T], &[T], T) -> Vec<T>,
{
match self {
CVKind::KFold(k) => Self::kfold_cross_validation(
x,
y,
dimensions,
fractions,
k,
seed,
&mut smoother,
predictor.as_mut(),
cv_buffer,
),
CVKind::LOOCV => Self::leave_one_out_cross_validation(
x,
y,
dimensions,
fractions,
&mut smoother,
predictor.as_mut(),
cv_buffer,
),
}
}
pub fn build_subset_inplace<T: Float>(
x: &[T],
y: &[T],
dims: usize,
indices: &[usize],
tx: &mut Vec<T>,
ty: &mut Vec<T>,
) {
tx.clear();
ty.clear();
for &i in indices {
let offset = i * dims;
tx.extend_from_slice(&x[offset..offset + dims]);
ty.push(y[i]);
}
}
pub fn build_subset_from_indices<T: Float>(
x: &[T],
y: &[T],
dims: usize,
indices: &[usize],
) -> (Vec<T>, Vec<T>) {
let mut tx = Vec::with_capacity(indices.len() * dims);
let mut ty = Vec::with_capacity(indices.len());
Self::build_subset_inplace(x, y, dims, indices, &mut tx, &mut ty);
(tx, ty)
}
pub fn interpolate_prediction<T: Float>(x_train: &[T], y_train: &[T], x_new: T) -> T {
let n = x_train.len();
if n == 0 {
return T::zero();
}
if n == 1 {
return y_train[0];
}
if x_new <= x_train[0] {
return y_train[0];
}
if x_new >= x_train[n - 1] {
return y_train[n - 1];
}
let mut left = 0;
let mut right = n - 1;
while right - left > 1 {
let mid = (left + right) / 2;
if x_train[mid] <= x_new {
left = mid;
} else {
right = mid;
}
}
let x0 = x_train[left];
let x1 = x_train[right];
let y0 = y_train[left];
let y1 = y_train[right];
let denom = x1 - x0;
if denom <= T::zero() {
return (y0 + y1) / T::from(2.0).unwrap();
}
let alpha = (x_new - x0) / denom;
y0 + alpha * (y1 - y0)
}
pub fn interpolate_prediction_batch<T: Float>(
x_train: &[T],
y_train: &[T],
x_new: &[T],
y_pred: &mut [T],
) {
let n_train = x_train.len();
let n_new = x_new.len();
if n_new == 0 {
return;
}
if n_train == 0 {
y_pred.fill(T::zero());
return;
}
if n_train == 1 {
y_pred.fill(y_train[0]);
return;
}
let mut left = 0;
for i in 0..n_new {
let xi = x_new[i];
if xi <= x_train[0] {
y_pred[i] = y_train[0];
continue;
}
if xi >= x_train[n_train - 1] {
y_pred[i] = y_train[n_train - 1];
continue;
}
while left + 1 < n_train && x_train[left + 1] <= xi {
left += 1;
}
let right = left + 1;
let x0 = x_train[left];
let x1 = x_train[right];
let y0 = y_train[left];
let y1 = y_train[right];
let denom = x1 - x0;
if denom <= T::zero() {
y_pred[i] = (y0 + y1) / T::from(2.0).unwrap();
} else {
let alpha = (xi - x0) / denom;
y_pred[i] = y0 + alpha * (y1 - y0);
}
}
}
fn select_best_fraction<T: Float>(fractions: &[T], scores: &[T]) -> (T, Vec<T>) {
if fractions.is_empty() {
return (T::zero(), Vec::new());
}
let best_idx = scores
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(Equal))
.map(|(i, _)| i)
.unwrap_or(0);
(fractions[best_idx], scores.to_vec())
}
#[allow(clippy::too_many_arguments)]
fn kfold_cross_validation<T, F, P>(
x: &[T],
y: &[T],
dims: usize,
fractions: &[T],
k: usize,
seed: Option<u64>,
smoother: &mut F,
mut predictor: Option<&mut P>,
cv_buffer: &mut CVBuffer<T>,
) -> (T, Vec<T>)
where
T: Float + Debug + Send + Sync + 'static,
F: FnMut(&[T], &[T], T) -> Vec<T>,
P: FnMut(&[T], &[T], &[T], T) -> Vec<T>,
{
let n = x.len() / dims;
if n < k || k < 2 {
return (
fractions.first().copied().unwrap_or(T::zero()),
vec![T::zero(); fractions.len()],
);
}
let fold_size = n / k;
let mut cv_scores = vec![T::zero(); fractions.len()];
let mut indices: Vec<usize> = (0..n).collect();
if let Some(s) = seed {
let mut rng = SimpleRng::new(s);
for i in (1..n).rev() {
let j = (rng.next_u32() as usize) % (i + 1);
indices.swap(i, j);
}
}
cv_buffer.ensure_capacity(n, dims);
for (frac_idx, &frac) in fractions.iter().enumerate() {
let mut fold_rmses = Vec::with_capacity(k);
for fold in 0..k {
let test_start = fold * fold_size;
let test_end = if fold == k - 1 {
n
} else {
(fold + 1) * fold_size
};
let (tx, ty, tex, tey) = (
&mut cv_buffer.train_x,
&mut cv_buffer.train_y,
&mut cv_buffer.test_x,
&mut cv_buffer.test_y,
);
tx.clear();
ty.clear();
for &idx in &indices[0..test_start] {
let offset = idx * dims;
tx.extend_from_slice(&x[offset..offset + dims]);
ty.push(y[idx]);
}
for &idx in &indices[test_end..n] {
let offset = idx * dims;
tx.extend_from_slice(&x[offset..offset + dims]);
ty.push(y[idx]);
}
tex.clear();
tey.clear();
for &idx in &indices[test_start..test_end] {
let offset = idx * dims;
tex.extend_from_slice(&x[offset..offset + dims]);
tey.push(y[idx]);
}
let predictions = if let Some(ref mut p_fn) = predictor {
p_fn(tx, ty, tex, frac)
} else {
let mut train_data: Vec<(T, T)> = tx
.iter()
.zip(ty.iter())
.map(|(&xi, &yi)| (xi, yi))
.collect();
train_data.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Equal));
let (sorted_tx, sorted_ty): (Vec<T>, Vec<T>) = train_data.into_iter().unzip();
let train_smooth = smoother(&sorted_tx, &sorted_ty, frac);
let mut preds = Vec::with_capacity(tex.len() / dims);
for &xi in tex.iter() {
preds.push(Self::interpolate_prediction(&sorted_tx, &train_smooth, xi));
}
preds
};
let mut fold_error = T::zero();
for (i, &predicted) in predictions.iter().enumerate() {
let actual = tey[i];
let error = actual - predicted;
fold_error = fold_error + error * error;
}
if !tey.is_empty() {
fold_rmses.push((fold_error / T::from(tey.len()).unwrap()).sqrt());
}
}
if !fold_rmses.is_empty() {
let sum: T = fold_rmses.iter().copied().fold(T::zero(), |a, b| a + b);
cv_scores[frac_idx] = sum / T::from(fold_rmses.len()).unwrap();
} else {
cv_scores[frac_idx] = T::infinity();
}
}
Self::select_best_fraction(fractions, &cv_scores)
}
fn leave_one_out_cross_validation<T, F, P>(
x: &[T],
y: &[T],
dims: usize,
fractions: &[T],
smoother: &mut F,
mut predictor: Option<&mut P>,
cv_buffer: &mut CVBuffer<T>,
) -> (T, Vec<T>)
where
T: Float + Debug + Send + Sync + 'static,
F: FnMut(&[T], &[T], T) -> Vec<T>,
P: FnMut(&[T], &[T], &[T], T) -> Vec<T>,
{
let n = x.len() / dims;
let mut cv_scores = vec![T::zero(); fractions.len()];
cv_buffer.ensure_capacity(n, dims);
let mut test_point = vec![T::zero(); dims];
for (frac_idx, &frac) in fractions.iter().enumerate() {
let mut total_error = T::zero();
for i in 0..n {
let (tx, ty) = (&mut cv_buffer.train_x, &mut cv_buffer.train_y);
tx.clear();
ty.clear();
for (j, &val) in y.iter().enumerate().take(i) {
let offset = j * dims;
tx.extend_from_slice(&x[offset..offset + dims]);
ty.push(val);
}
for (j, &val) in y.iter().enumerate().take(n).skip(i + 1) {
let offset = j * dims;
tx.extend_from_slice(&x[offset..offset + dims]);
ty.push(val);
}
let test_offset = i * dims;
test_point.copy_from_slice(&x[test_offset..test_offset + dims]);
let predicted = if let Some(ref mut p_fn) = predictor {
let preds = p_fn(tx, ty, &test_point, frac);
preds[0]
} else {
let train_smooth = smoother(tx, ty, frac);
Self::interpolate_prediction(tx, &train_smooth, test_point[0])
};
let error = y[i] - predicted;
total_error = total_error + error * error;
}
cv_scores[frac_idx] = (total_error / T::from(n).unwrap()).sqrt();
}
Self::select_best_fraction(fractions, &cv_scores)
}
}