use crate::TimeSeries;
pub struct TimeSeriesCV {
n_splits: usize,
max_train_size: Option<usize>,
gap: usize,
}
impl TimeSeriesCV {
pub fn new(n_splits: usize) -> Self {
Self {
n_splits,
max_train_size: None,
gap: 0,
}
}
pub fn with_max_train_size(mut self, max_train_size: usize) -> Self {
self.max_train_size = Some(max_train_size);
self
}
pub fn with_gap(mut self, gap: usize) -> Self {
self.gap = gap;
self
}
pub fn split(
&self,
series: &TimeSeries,
) -> Result<Vec<(TimeSeries, TimeSeries)>, torsh_core::error::TorshError> {
let mut splits = Vec::new();
let n = series.len();
let test_size = n / (self.n_splits + 1);
for i in 1..=self.n_splits {
let train_end = test_size * i;
let test_start = train_end + self.gap;
let test_end = test_start + test_size;
if test_end <= n {
let train_start = if let Some(max_size) = self.max_train_size {
(train_end).saturating_sub(max_size)
} else {
0
};
let train = series.slice(train_start, train_end)?;
let test = series.slice(test_start, test_end)?;
splits.push((train, test));
}
}
Ok(splits)
}
}
pub fn walk_forward_validation<F>(
series: &TimeSeries,
window_size: usize,
step_size: usize,
predict_fn: F,
) -> Result<Vec<f64>, torsh_core::error::TorshError>
where
F: Fn(&TimeSeries) -> f64,
{
let mut errors = Vec::new();
let n = series.len();
for i in (window_size..n).step_by(step_size) {
let train = series.slice(i - window_size, i)?;
let pred = predict_fn(&train);
let actual = series.values.get_item_flat(i)? as f64;
errors.push((pred - actual).abs());
}
Ok(errors)
}
pub fn expanding_window_validation<F>(
series: &TimeSeries,
min_train_size: usize,
test_size: usize,
predict_fn: F,
) -> Result<Vec<f64>, torsh_core::error::TorshError>
where
F: Fn(&TimeSeries) -> f64,
{
let mut errors = Vec::new();
let n = series.len();
if min_train_size + test_size > n {
return Ok(errors);
}
for i in (min_train_size..=(n - test_size)).step_by(test_size) {
let train = series.slice(0, i)?;
let test_start = i;
let test_end = (i + test_size).min(n);
for j in test_start..test_end {
let pred = predict_fn(&train);
let actual = series.values.get_item_flat(j)? as f64;
errors.push((pred - actual).abs());
}
}
Ok(errors)
}
pub fn rolling_window_validation<F>(
series: &TimeSeries,
window_size: usize,
test_size: usize,
predict_fn: F,
) -> Result<Vec<f64>, torsh_core::error::TorshError>
where
F: Fn(&TimeSeries) -> f64,
{
let mut errors = Vec::new();
let n = series.len();
if window_size + test_size > n {
return Ok(errors);
}
for i in window_size..(n - test_size + 1) {
let train = series.slice(i - window_size, i)?;
let test_start = i;
let test_end = (i + test_size).min(n);
for j in test_start..test_end {
let pred = predict_fn(&train);
let actual = series.values.get_item_flat(j)? as f64;
errors.push((pred - actual).abs());
}
}
Ok(errors)
}
pub struct PurgedTimeSeriesCV {
n_splits: usize,
test_size: usize,
purge_window: usize,
embargo_window: usize,
}
impl PurgedTimeSeriesCV {
pub fn new(
n_splits: usize,
test_size: usize,
purge_window: usize,
embargo_window: usize,
) -> Self {
Self {
n_splits,
test_size,
purge_window,
embargo_window,
}
}
pub fn split(
&self,
series: &TimeSeries,
) -> Result<Vec<(TimeSeries, TimeSeries)>, torsh_core::error::TorshError> {
let mut splits = Vec::new();
let n = series.len();
let split_size = n / self.n_splits;
for i in 0..self.n_splits {
let test_start = i * split_size;
let test_end = (test_start + self.test_size).min(n);
if test_end >= n {
break;
}
let purge_start = test_start.saturating_sub(self.purge_window);
let embargo_end = (test_end + self.embargo_window).min(n);
let train_before_end = purge_start;
let train_after_start = embargo_end;
if train_before_end > 0 {
let train = series.slice(0, train_before_end)?;
let test = series.slice(test_start, test_end)?;
splits.push((train, test));
} else if train_after_start < n {
let train = series.slice(train_after_start, n)?;
let test = series.slice(test_start, test_end)?;
splits.push((train, test));
}
}
Ok(splits)
}
}
pub struct CombinatorialPurgedCV {
n_paths: usize,
test_size: usize,
purge_window: usize,
embargo_window: usize,
}
impl CombinatorialPurgedCV {
pub fn new(
n_paths: usize,
test_size: usize,
purge_window: usize,
embargo_window: usize,
) -> Self {
Self {
n_paths,
test_size,
purge_window,
embargo_window,
}
}
pub fn split(
&self,
series: &TimeSeries,
) -> Result<Vec<Vec<(TimeSeries, TimeSeries)>>, torsh_core::error::TorshError> {
let n = series.len();
let mut all_paths = Vec::new();
for path_idx in 0..self.n_paths {
let mut path_splits = Vec::new();
let offset = (path_idx * n) / (self.n_paths * 3);
let mut current_pos = offset;
while current_pos + self.test_size < n {
let test_start = current_pos;
let test_end = current_pos + self.test_size;
let purge_start = test_start.saturating_sub(self.purge_window);
let embargo_end = (test_end + self.embargo_window).min(n);
if purge_start > offset {
let train = series.slice(offset, purge_start)?;
let test = series.slice(test_start, test_end)?;
path_splits.push((train, test));
}
current_pos = embargo_end;
}
if !path_splits.is_empty() {
all_paths.push(path_splits);
}
}
Ok(all_paths)
}
}
pub struct NestedTimeSeriesCV {
n_outer_splits: usize,
n_inner_splits: usize,
gap: usize,
}
impl NestedTimeSeriesCV {
pub fn new(n_outer_splits: usize, n_inner_splits: usize) -> Self {
Self {
n_outer_splits,
n_inner_splits,
gap: 0,
}
}
pub fn with_gap(mut self, gap: usize) -> Self {
self.gap = gap;
self
}
pub fn split(
&self,
series: &TimeSeries,
) -> Result<
Vec<(TimeSeries, TimeSeries, Vec<(TimeSeries, TimeSeries)>)>,
torsh_core::error::TorshError,
> {
let mut nested_splits = Vec::new();
let n = series.len();
let outer_test_size = n / (self.n_outer_splits + 1);
for i in 1..=self.n_outer_splits {
let outer_train_end = outer_test_size * i;
let outer_test_start = outer_train_end + self.gap;
let outer_test_end = outer_test_start + outer_test_size;
if outer_test_end <= n {
let outer_train = series.slice(0, outer_train_end)?;
let outer_test = series.slice(outer_test_start, outer_test_end)?;
let inner_cv = TimeSeriesCV::new(self.n_inner_splits).with_gap(self.gap);
let inner_splits = inner_cv.split(&outer_train)?;
nested_splits.push((outer_train, outer_test, inner_splits));
}
}
Ok(nested_splits)
}
}
pub struct ScoredTimeSeriesCV<F>
where
F: Fn(&[f64], &[f64]) -> f64,
{
cv: TimeSeriesCV,
scorer: F,
}
impl<F> ScoredTimeSeriesCV<F>
where
F: Fn(&[f64], &[f64]) -> f64,
{
pub fn new(n_splits: usize, scorer: F) -> Self {
Self {
cv: TimeSeriesCV::new(n_splits),
scorer,
}
}
pub fn evaluate<M>(
&self,
series: &TimeSeries,
mut model: M,
) -> Result<Vec<f64>, torsh_core::error::TorshError>
where
M: FnMut(
&TimeSeries,
&TimeSeries,
) -> Result<(Vec<f64>, Vec<f64>), torsh_core::error::TorshError>,
{
let splits = self.cv.split(series)?;
let mut scores = Vec::with_capacity(splits.len());
for (train, test) in splits {
let (predictions, actuals) = model(&train, &test)?;
let score = (self.scorer)(&predictions, &actuals);
scores.push(score);
}
Ok(scores)
}
}
pub struct BlockedTimeSeriesCV {
n_splits: usize,
block_size: usize,
gap: usize,
}
impl BlockedTimeSeriesCV {
pub fn new(n_splits: usize, block_size: usize) -> Self {
Self {
n_splits,
block_size,
gap: 0,
}
}
pub fn with_gap(mut self, gap: usize) -> Self {
self.gap = gap;
self
}
pub fn split(
&self,
series: &TimeSeries,
) -> Result<Vec<(TimeSeries, TimeSeries)>, torsh_core::error::TorshError> {
let mut splits = Vec::new();
let n = series.len();
let total_block_size = self.block_size + self.gap;
for i in 0..self.n_splits {
let test_start = i * total_block_size;
let test_end = (test_start + self.block_size).min(n);
if test_end <= n {
let mut _train_indices = Vec::new();
let train_end_before = test_start.saturating_sub(self.gap);
_train_indices.extend(0..train_end_before);
let train_start_after = (test_end + self.gap).min(n);
if train_start_after < n {
_train_indices.extend(train_start_after..n);
}
if !_train_indices.is_empty() {
let train = if train_end_before > 0 {
series.slice(0, train_end_before)?
} else {
series.slice(train_start_after, n)?
};
let test = series.slice(test_start, test_end)?;
splits.push((train, test));
}
}
}
Ok(splits)
}
}
#[cfg(test)]
mod tests {
use super::*;
use torsh_tensor::Tensor;
fn create_test_series() -> TimeSeries {
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let tensor = Tensor::from_vec(data, &[10]).expect("Tensor should succeed");
TimeSeries::new(tensor)
}
#[test]
fn test_timeseries_cv() {
let series = create_test_series();
let cv = TimeSeriesCV::new(3);
let splits = cv.split(&series).expect("split operation should succeed");
assert!(!splits.is_empty());
for (train, test) in splits {
assert!(train.len() > 0);
assert!(test.len() > 0);
}
}
#[test]
fn test_timeseries_cv_with_gap() {
let series = create_test_series();
let cv = TimeSeriesCV::new(2).with_gap(1);
let splits = cv.split(&series).expect("split operation should succeed");
assert!(!splits.is_empty());
}
#[test]
fn test_walk_forward_validation() {
let series = create_test_series();
let errors = walk_forward_validation(&series, 3, 1, |_| 5.0)
.expect("walk forward validation should succeed");
assert!(!errors.is_empty());
}
#[test]
fn test_blocked_timeseries_cv() {
let series = create_test_series();
let cv = BlockedTimeSeriesCV::new(2, 3);
let splits = cv.split(&series).expect("split operation should succeed");
assert!(!splits.is_empty());
}
#[test]
fn test_expanding_window_validation() {
let series = create_test_series();
let errors = expanding_window_validation(&series, 3, 2, |_| 5.0)
.expect("expanding window validation should succeed");
assert!(!errors.is_empty());
}
#[test]
fn test_purged_cv() {
let series = create_test_series();
let cv = PurgedTimeSeriesCV::new(2, 2, 1, 1);
let splits = cv.split(&series).expect("split operation should succeed");
assert!(!splits.is_empty());
for (train, test) in splits {
assert!(train.len() > 0);
assert_eq!(test.len(), 2);
}
}
#[test]
fn test_combinatorial_purged_cv() {
let series = create_test_series();
let cv = CombinatorialPurgedCV::new(2, 2, 1, 1);
let all_paths = cv.split(&series).expect("split operation should succeed");
assert!(!all_paths.is_empty());
for path in all_paths {
assert!(!path.is_empty());
}
}
#[test]
fn test_nested_cv() {
let series = create_test_series();
let cv = NestedTimeSeriesCV::new(2, 2);
let nested_splits = cv.split(&series).expect("split operation should succeed");
assert!(!nested_splits.is_empty());
for (outer_train, outer_test, inner_splits) in nested_splits {
assert!(outer_train.len() > 0);
assert!(outer_test.len() > 0);
assert!(!inner_splits.is_empty());
for (inner_train, inner_val) in inner_splits {
assert!(inner_train.len() > 0);
assert!(inner_val.len() > 0);
}
}
}
#[test]
fn test_scored_cv() {
let series = create_test_series();
let scorer = |predictions: &[f64], actuals: &[f64]| {
predictions
.iter()
.zip(actuals.iter())
.map(|(p, a)| (p - a).powi(2))
.sum::<f64>()
/ predictions.len() as f64
};
let cv = ScoredTimeSeriesCV::new(2, scorer);
let model = |_train: &TimeSeries, test: &TimeSeries| {
let predictions = vec![5.0; test.len()];
let mut actuals = Vec::new();
for i in 0..test.len() {
actuals.push(
test.values
.get_item_flat(i)
.expect("push operation should succeed") as f64,
);
}
Ok((predictions, actuals))
};
let scores = cv
.evaluate(&series, model)
.expect("evaluation should succeed");
assert_eq!(scores.len(), 2);
assert!(scores.iter().all(|&s| s >= 0.0));
}
}