#![allow(dead_code)]
use crate::TimeSeries;
use torsh_tensor::Tensor;
#[derive(Debug, Clone)]
pub struct StatisticalFeatures {
pub mean: f64,
pub std: f64,
pub min: f64,
pub max: f64,
pub skewness: f64,
pub kurtosis: f64,
}
#[derive(Debug, Clone)]
pub struct SpectralFeatures {
pub dominant_frequency: f64,
pub spectral_entropy: f64,
pub spectral_centroid: f64,
}
pub fn statistical_features(series: &TimeSeries) -> StatisticalFeatures {
let data = series.values.to_vec().unwrap_or_default();
if data.is_empty() {
return StatisticalFeatures {
mean: 0.0,
std: 0.0,
min: 0.0,
max: 0.0,
skewness: 0.0,
kurtosis: 0.0,
};
}
let n = data.len() as f64;
let mean = data.iter().map(|&x| x as f64).sum::<f64>() / n;
let variance = data
.iter()
.map(|&x| {
let diff = (x as f64) - mean;
diff * diff
})
.sum::<f64>()
/ n;
let std = variance.sqrt();
let min = data.iter().map(|&x| x as f64).fold(f64::INFINITY, f64::min);
let max = data
.iter()
.map(|&x| x as f64)
.fold(f64::NEG_INFINITY, f64::max);
StatisticalFeatures {
mean,
std,
min,
max,
skewness: calculate_skewness(&series.values),
kurtosis: calculate_kurtosis(&series.values),
}
}
fn calculate_skewness(tensor: &Tensor) -> f64 {
let data = tensor.to_vec().unwrap_or_default();
if data.is_empty() {
return 0.0;
}
let n = data.len() as f64;
let mean = data.iter().map(|&x| x as f64).sum::<f64>() / n;
let variance = data
.iter()
.map(|&x| {
let diff = (x as f64) - mean;
diff * diff
})
.sum::<f64>()
/ n;
let std = variance.sqrt();
if std < 1e-10 {
return 0.0;
}
let skewness = data
.iter()
.map(|&x| {
let z = ((x as f64) - mean) / std;
z * z * z
})
.sum::<f64>()
/ n;
skewness
}
fn calculate_kurtosis(tensor: &Tensor) -> f64 {
let data = tensor.to_vec().unwrap_or_default();
if data.is_empty() {
return 0.0;
}
let n = data.len() as f64;
let mean = data.iter().map(|&x| x as f64).sum::<f64>() / n;
let variance = data
.iter()
.map(|&x| {
let diff = (x as f64) - mean;
diff * diff
})
.sum::<f64>()
/ n;
let std = variance.sqrt();
if std < 1e-10 {
return 0.0;
}
let kurtosis = data
.iter()
.map(|&x| {
let z = ((x as f64) - mean) / std;
z * z * z * z
})
.sum::<f64>()
/ n;
kurtosis - 3.0
}
pub fn autocorrelation(series: &TimeSeries, max_lag: usize) -> Vec<f64> {
let data = series.values.to_vec().unwrap_or_default();
if data.is_empty() || max_lag == 0 {
return vec![];
}
let n = data.len();
if max_lag >= n {
return vec![0.0; max_lag];
}
let data_f64: Vec<f64> = data.iter().map(|&x| x as f64).collect();
let mean = data_f64.iter().sum::<f64>() / n as f64;
let variance = data_f64.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
if variance < 1e-10 {
return vec![0.0; max_lag];
}
let mut acf = Vec::with_capacity(max_lag);
for lag in 1..=max_lag {
let mut autocovariance = 0.0;
for t in lag..n {
autocovariance += (data_f64[t] - mean) * (data_f64[t - lag] - mean);
}
autocovariance /= n as f64;
acf.push(autocovariance / variance);
}
acf
}
pub fn partial_autocorrelation(series: &TimeSeries, max_lag: usize) -> Vec<f64> {
if max_lag == 0 {
return vec![];
}
let acf = autocorrelation(series, max_lag);
if acf.is_empty() || acf.iter().all(|&x| x.abs() < 1e-10) {
return vec![0.0; max_lag];
}
let mut pacf = Vec::with_capacity(max_lag);
if !acf.is_empty() {
pacf.push(acf[0]);
}
let mut phi = vec![vec![0.0]; max_lag + 1]; phi[1] = vec![acf[0]];
for k in 2..=max_lag {
if k - 1 >= acf.len() {
pacf.push(0.0);
continue;
}
let mut numerator = acf[k - 1]; let mut denominator = 1.0;
for j in 1..k {
if k - j - 1 < acf.len() && j - 1 < phi[k - 1].len() {
numerator -= phi[k - 1][j - 1] * acf[k - j - 1];
}
if j - 1 < acf.len() && j - 1 < phi[k - 1].len() {
denominator -= phi[k - 1][j - 1] * acf[j - 1];
}
}
let phi_kk = if denominator.abs() > 1e-10 {
numerator / denominator
} else {
0.0
};
pacf.push(phi_kk);
let mut new_phi = vec![0.0; k];
new_phi[k - 1] = phi_kk;
for j in 1..k {
if j - 1 < phi[k - 1].len() && k - j - 1 < phi[k - 1].len() {
new_phi[j - 1] = phi[k - 1][j - 1] - phi_kk * phi[k - 1][k - j - 1];
}
}
phi[k] = new_phi;
}
pacf
}
pub fn spectral_features(series: &TimeSeries) -> SpectralFeatures {
use crate::frequency::{FFTAnalyzer, PeriodogramAnalyzer};
let sampling_rate = series.frequency.unwrap_or(1.0);
let analyzer = PeriodogramAnalyzer::new(sampling_rate);
let dominant_frequency = analyzer.dominant_frequency(series).unwrap_or(0.0);
let fft_analyzer = FFTAnalyzer::new(sampling_rate);
let fft_result = fft_analyzer.fft(series).unwrap_or_else(|_| {
crate::frequency::FFTResult {
real: vec![0.0],
imag: vec![0.0],
frequencies: vec![0.0],
sampling_rate,
}
});
let power = fft_result.power();
let total_power: f64 = power.iter().sum();
let spectral_centroid = if total_power > 0.0 {
fft_result
.frequencies
.iter()
.zip(power.iter())
.map(|(f, p)| f * p)
.sum::<f64>()
/ total_power
} else {
0.0
};
let spectral_entropy = if total_power > 0.0 {
-power
.iter()
.map(|&p| {
let prob = p / total_power;
if prob > 1e-10 {
prob * prob.ln()
} else {
0.0
}
})
.sum::<f64>()
} else {
0.0
};
SpectralFeatures {
dominant_frequency,
spectral_entropy,
spectral_centroid,
}
}
pub fn trend_features(series: &TimeSeries) -> TrendFeatures {
let data = series.values.to_vec().unwrap_or_default();
if data.is_empty() {
return TrendFeatures {
linear_trend: 0.0,
trend_strength: 0.0,
turning_points: 0,
};
}
let n = data.len();
let t: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = data.iter().map(|&x| x as f64).collect();
let sum_t: f64 = t.iter().sum();
let sum_y: f64 = y.iter().sum();
let sum_t_squared: f64 = t.iter().map(|&x| x * x).sum();
let sum_t_y: f64 = t.iter().zip(y.iter()).map(|(&ti, &yi)| ti * yi).sum();
let n_f64 = n as f64;
let mean_y = sum_y / n_f64;
let denominator = n_f64 * sum_t_squared - sum_t * sum_t;
let linear_trend = if denominator.abs() > 1e-10 {
(n_f64 * sum_t_y - sum_t * sum_y) / denominator
} else {
0.0
};
let intercept = (sum_y - linear_trend * sum_t) / n_f64;
let ss_tot: f64 = y.iter().map(|&yi| (yi - mean_y).powi(2)).sum();
let ss_res: f64 = t
.iter()
.zip(y.iter())
.map(|(&ti, &yi)| {
let fitted = linear_trend * ti + intercept;
(yi - fitted).powi(2)
})
.sum();
let trend_strength = if ss_tot > 1e-10 {
1.0 - (ss_res / ss_tot) } else {
0.0
};
let mut turning_points = 0;
if n >= 3 {
for i in 1..n - 1 {
let prev = data[i - 1];
let curr = data[i];
let next = data[i + 1];
if (curr > prev && curr > next) || (curr < prev && curr < next) {
turning_points += 1;
}
}
}
TrendFeatures {
linear_trend,
trend_strength,
turning_points,
}
}
#[derive(Debug, Clone)]
pub struct TrendFeatures {
pub linear_trend: f64,
pub trend_strength: f64,
pub turning_points: usize,
}
pub fn seasonality_features(series: &TimeSeries, period: usize) -> SeasonalityFeatures {
let data = series.values.to_vec().unwrap_or_default();
if data.is_empty() || data.len() < 3 {
return SeasonalityFeatures {
seasonal_strength: 0.0,
seasonal_period: 0,
seasonal_peaks: vec![],
};
}
let max_lag = if period > 0 && period < data.len() / 2 {
(period * 2).min(data.len() / 2)
} else {
(data.len() / 2).min(50)
};
if max_lag == 0 {
return SeasonalityFeatures {
seasonal_strength: 0.0,
seasonal_period: 0,
seasonal_peaks: vec![],
};
}
let acf = autocorrelation(series, max_lag);
if acf.is_empty() {
return SeasonalityFeatures {
seasonal_strength: 0.0,
seasonal_period: 0,
seasonal_peaks: vec![],
};
}
let mut max_acf = 0.0;
let mut detected_period = 0;
for (lag_idx, &acf_val) in acf.iter().enumerate() {
let lag = lag_idx + 1;
if acf_val > max_acf {
max_acf = acf_val;
detected_period = lag;
}
}
let seasonal_strength = max_acf.max(0.0);
let threshold = 0.2;
let seasonal_peaks: Vec<usize> = acf
.iter()
.enumerate()
.filter_map(|(lag_idx, &acf_val)| {
let lag = lag_idx + 1;
if acf_val > threshold {
Some(lag)
} else {
None
}
})
.collect();
SeasonalityFeatures {
seasonal_strength,
seasonal_period: detected_period,
seasonal_peaks,
}
}
#[derive(Debug, Clone)]
pub struct SeasonalityFeatures {
pub seasonal_strength: f64,
pub seasonal_period: usize,
pub seasonal_peaks: Vec<usize>,
}
pub fn create_lag_features(series: &TimeSeries, lags: &[usize]) -> TimeSeries {
let data = series.values.to_vec().unwrap_or_default();
let n = data.len();
if lags.is_empty() || n == 0 {
return TimeSeries::new(series.values.clone());
}
let num_lags = lags.len();
let mut lag_matrix = vec![0.0f32; n * num_lags];
for (col_idx, &lag) in lags.iter().enumerate() {
for t in 0..n {
let value = if t >= lag {
data[t - lag]
} else {
0.0
};
lag_matrix[t * num_lags + col_idx] = value;
}
}
let tensor =
Tensor::from_vec(lag_matrix, &[n, num_lags]).expect("tensor creation should succeed");
TimeSeries::new(tensor)
}
#[derive(Debug, Clone)]
pub struct RollingStatistics {
pub mean: Vec<f64>,
pub std: Vec<f64>,
pub min: Vec<f64>,
pub max: Vec<f64>,
pub median: Vec<f64>,
}
pub fn rolling_statistics(
series: &TimeSeries,
window_size: usize,
min_periods: Option<usize>,
) -> RollingStatistics {
let data = series.values.to_vec().unwrap_or_default();
let n = data.len();
let min_periods = min_periods.unwrap_or(window_size);
let mut means = Vec::with_capacity(n);
let mut stds = Vec::with_capacity(n);
let mut mins = Vec::with_capacity(n);
let mut maxs = Vec::with_capacity(n);
let mut medians = Vec::with_capacity(n);
for t in 0..n {
let start = if t + 1 >= window_size {
t + 1 - window_size
} else {
0
};
let end = t + 1;
let window_data: Vec<f64> = data[start..end].iter().map(|&x| x as f64).collect();
if window_data.len() < min_periods {
means.push(f64::NAN);
stds.push(f64::NAN);
mins.push(f64::NAN);
maxs.push(f64::NAN);
medians.push(f64::NAN);
continue;
}
let mean = window_data.iter().sum::<f64>() / window_data.len() as f64;
means.push(mean);
let variance =
window_data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / window_data.len() as f64;
stds.push(variance.sqrt());
let min = window_data.iter().copied().fold(f64::INFINITY, f64::min);
mins.push(min);
let max = window_data
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max);
maxs.push(max);
let mut sorted_window = window_data.clone();
sorted_window.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = if sorted_window.len() % 2 == 0 {
let mid = sorted_window.len() / 2;
(sorted_window[mid - 1] + sorted_window[mid]) / 2.0
} else {
sorted_window[sorted_window.len() / 2]
};
medians.push(median);
}
RollingStatistics {
mean: means,
std: stds,
min: mins,
max: maxs,
median: medians,
}
}
pub fn create_difference_features(series: &TimeSeries, periods: &[usize]) -> TimeSeries {
let data = series.values.to_vec().unwrap_or_default();
let n = data.len();
if periods.is_empty() || n == 0 {
return TimeSeries::new(series.values.clone());
}
let num_features = periods.len();
let mut diff_matrix = vec![0.0f32; n * num_features];
for (col_idx, &period) in periods.iter().enumerate() {
for t in 0..n {
let value = if t >= period {
data[t] - data[t - period]
} else {
0.0
};
diff_matrix[t * num_features + col_idx] = value;
}
}
let tensor =
Tensor::from_vec(diff_matrix, &[n, num_features]).expect("tensor creation should succeed");
TimeSeries::new(tensor)
}
pub fn create_interaction_features(
series: &TimeSeries,
degree: usize,
include_time_features: bool,
) -> TimeSeries {
let n = series.len();
if n == 0 || degree == 0 {
return TimeSeries::new(series.values.clone());
}
let num_poly = degree; let num_cyclic = if include_time_features { 4 } else { 0 }; let num_features = num_poly + num_cyclic;
let mut feature_matrix = vec![0.0f32; n * num_features];
for t in 0..n {
let normalized_t = (t as f64) / (n as f64);
let mut col = 0;
for d in 1..=degree {
let value = normalized_t.powi(d as i32);
feature_matrix[t * num_features + col] = value as f32;
col += 1;
}
if include_time_features {
use std::f64::consts::PI;
let angle = 2.0 * PI * normalized_t;
feature_matrix[t * num_features + col] = angle.sin() as f32;
col += 1;
feature_matrix[t * num_features + col] = angle.cos() as f32;
col += 1;
let angle2 = 4.0 * PI * normalized_t;
feature_matrix[t * num_features + col] = angle2.sin() as f32;
col += 1;
feature_matrix[t * num_features + col] = angle2.cos() as f32;
}
}
let tensor = Tensor::from_vec(feature_matrix, &[n, num_features])
.expect("tensor creation should succeed");
TimeSeries::new(tensor)
}
pub fn create_comprehensive_features(
series: &TimeSeries,
lags: &[usize],
_window_size: usize,
_polynomial_degree: usize,
) -> TimeSeries {
create_lag_features(series, lags)
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_series() -> TimeSeries {
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
let tensor = Tensor::from_vec(data, &[5]).expect("Tensor should succeed");
TimeSeries::new(tensor)
}
#[test]
fn test_statistical_features() {
let series = create_test_series(); let features = statistical_features(&series);
assert!((features.mean - 3.0).abs() < 1e-6);
assert_eq!(features.min, 1.0);
assert_eq!(features.max, 5.0);
assert!((features.std - 1.414213562).abs() < 1e-6);
assert!(features.skewness.abs() < 1e-6);
assert!(features.kurtosis < 0.0);
}
#[test]
fn test_autocorrelation() {
let series = create_test_series();
let acf = autocorrelation(&series, 3);
assert_eq!(acf.len(), 3);
}
#[test]
fn test_spectral_features() {
let series = create_test_series();
let features = spectral_features(&series);
assert_eq!(features.dominant_frequency, 0.0); }
#[test]
fn test_trend_features() {
let series = create_test_series(); let features = trend_features(&series);
assert!((features.linear_trend - 1.0).abs() < 1e-6);
assert!((features.trend_strength - 1.0).abs() < 1e-6);
assert_eq!(features.turning_points, 0);
let data_with_peaks = vec![1.0f32, 3.0, 2.0, 4.0, 3.0];
let tensor_peaks = Tensor::from_vec(data_with_peaks, &[5]).expect("Tensor should succeed");
let series_peaks = TimeSeries::new(tensor_peaks);
let features_peaks = trend_features(&series_peaks);
assert_eq!(features_peaks.turning_points, 3);
}
#[test]
fn test_seasonality_features() {
let series = create_test_series(); let features = seasonality_features(&series, 0);
assert!(features.seasonal_strength < 1.0);
let seasonal_data = vec![
1.0f32, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, ];
let tensor_seasonal =
Tensor::from_vec(seasonal_data, &[12]).expect("Tensor should succeed");
let series_seasonal = TimeSeries::new(tensor_seasonal);
let features_seasonal = seasonality_features(&series_seasonal, 4);
assert!(
features_seasonal.seasonal_period >= 1,
"Should detect a seasonal period"
);
assert!(
features_seasonal.seasonal_strength > 0.2,
"Should have significant seasonal strength"
);
assert!(
!features_seasonal.seasonal_peaks.is_empty(),
"Should detect seasonal peaks"
);
let features_auto = seasonality_features(&series_seasonal, 0);
assert!(features_auto.seasonal_period >= 1);
assert!(features_auto.seasonal_strength > 0.0);
}
}