use std::cmp::Ordering;
use ndarray::{Array1, Axis, s};
#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};
use crate::error::DigiFiError;
use crate::utilities::NUMERICAL_CORRECTION;
use crate::statistics::n_choose_r;
pub fn min_max_scaling(x: Array1<f64>, a: f64, b: f64) -> Array1<f64> {
let first_value: f64 = x[0];
let (min, max): (f64, f64) = x.iter().fold((first_value, first_value), |(min, max), curr| {
if *curr < min { (*curr, max) } else if max < *curr { (min, *curr) } else { (min, max) }
} );
if min == max {
Array1::from_vec(vec![1.0; x.len()])
} else {
a + ((x - min) * (b - a)) / (max - min)
}
}
pub fn percent_change(x: &Array1<f64>) -> Array1<f64> {
let result: Vec<f64> = x.slice(s![1..(x.len())]).iter().zip(x.slice(s![0..(x.len()-1)]).iter()).map(|(final_, initial)| {
if initial == &0.0 && final_ == &0.0 {
0.0
} else if initial != &0.0 && final_ != &0.0 {
(final_ / initial) - 1.0
} else {
if initial < final_ { 1.0 } else { -1.0 }
}
} ).collect();
Array1::from_vec(result)
}
pub fn log_return_transformation(x: &Array1<f64>) -> Array1<f64> {
let result: Vec<f64> = x.slice(s![1..(x.len())]).iter().zip(x.slice(s![0..(x.len()-1)]).iter()).map(|(final_, initial)| {
let percent_change: f64 = if initial == &0.0 && final_ == &0.0 {
0.0
} else if initial != &0.0 && final_ != &0.0 {
(final_ / initial) - 1.0
} else {
if initial < final_ { 1.0 } else { -1.0 }
};
let v: f64 = match percent_change { -1.0 => percent_change + NUMERICAL_CORRECTION, _ => percent_change, };
(v + 1.0).ln()
} ).collect();
Array1::from_vec(result)
}
pub fn differencing(v: &Array1<f64>, n: usize) -> Result<Array1<f64>, DigiFiError> {
let v_len: usize = v.len();
if v_len < n {
return Err(DigiFiError::Other { title: "Differencing".to_owned(), details: "The `n` must be smaller than the length of the array `v`.".to_owned(), })
}
let mut diff: Vec<f64> = Vec::with_capacity(v_len - n);
let n_u128: u128 = n as u128;
for j in (n..v_len).rev() {
if (j as i32 - n as i32) < 0 {
continue;
}
let x: Vec<f64> = v.slice(s![(j - n)..(j + 1)]).into_iter().map(|v_| *v_ ).rev().collect();
let mut d: f64 = 0.0;
let mut sign: f64 = 1.0;
for i in 0..(n + 1) {
d += sign * (n_choose_r(n_u128, i as u128)? as f64) * x[i];
sign *= -1.0;
}
diff.insert(0, d);
}
Ok(Array1::from_vec(diff))
}
pub fn rank_transformation(x: &Array1<f64>) -> Array1<f64> {
let mut ranked: Vec<(usize, f64)> = x.iter().enumerate().map(|(i, val)| { (i, *val) } ).collect();
ranked.sort_by(|a, b| { a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal) } );
let mut result: Vec<f64> = vec![0.0; x.len()];
let mut current_rank: f64 = 1.0;
for (i, pair) in ranked.iter().enumerate() {
if 0 < i && ranked[i].1 != ranked[i-1].1 {
current_rank += 1.0;
}
result[pair.0] = current_rank;
}
Array1::from_vec(result)
}
pub fn unit_vector_normalization(x: Array1<f64>, p: usize) -> Array1<f64> {
let p_: i32 = p as i32;
let one_over_p: f64 = 1.0 / p_ as f64;
let norm: f64 = x.iter().fold(0.0, |prev, curr| { prev + curr.abs().powi(p_) } ).powf(one_over_p);
x / norm
}
#[derive(Clone, Copy, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum TransformationType {
MinMaxScaling {a: f64, b: f64},
PercentChange,
LogReturnTransformation,
Differencing { order: usize },
RankTransformation,
UnitVectorNormalization { p: usize },
#[default]
No,
}
impl TransformationType {
pub fn transformation(&self, data: &Array1<f64>) -> Result<Array1<f64>, DigiFiError> {
let transformed_data: Array1<f64> = match self {
Self::MinMaxScaling { a, b } => min_max_scaling(data.clone(), *a, *b),
Self::PercentChange => percent_change(data),
Self::LogReturnTransformation => log_return_transformation(data),
Self::Differencing { order } => differencing(data, *order)?,
Self::RankTransformation => rank_transformation(data),
Self::UnitVectorNormalization { p } => unit_vector_normalization(data.clone(), *p),
Self::No => data.clone(),
};
Ok(transformed_data)
}
}
pub struct DataTransformations;
impl DataTransformations {
pub fn transformation(data: &Array1<f64>, transformation_type: &TransformationType) -> Result<Array1<f64>, DigiFiError> {
transformation_type.transformation(data)
}
pub fn sync_transformations(v1: &mut Array1<f64>, v2: &mut Array1<f64>) -> () {
let (len_diff, large_v) = if v1.len() < v2.len() {
(v2.len().checked_sub(v1.len()).unwrap_or(0), v2)
} else if v2.len() < v1.len() {
(v1.len().checked_sub(v2.len()).unwrap_or(0), v1)
} else {
return ();
};
for _ in 0..len_diff {
large_v.remove_index(Axis(0), 0);
}
}
pub fn transformation_multiple(data: &Vec<Array1<f64>>, transformation_types: &Vec<TransformationType>) -> Result<Vec<Array1<f64>>, DigiFiError> {
if data.len() != transformation_types.len() {
return Err(DigiFiError::UnmatchingLength { array_1: "data".to_owned(), array_2: "transformation_types".to_owned(), });
}
if data.is_empty() { return Ok(vec![]) }
let mut transformed_data: Vec<Array1<f64>> = vec![];
for (d, t) in data.iter().zip(transformation_types.iter()) {
transformed_data.push(t.transformation(d)?);
}
let shortest_series_index: usize = {
let mut shortest_index: usize = 0;
let mut shortest_len: usize = transformed_data[shortest_index].len();
for i in 0..transformed_data.len() {
if transformed_data[i].len() < shortest_len {
shortest_index = i;
shortest_len = transformed_data[i].len();
}
}
shortest_index
};
for i in 0..transformed_data.len() {
if i != shortest_series_index {
let len_diff: usize = transformed_data[i].len().checked_sub(transformed_data[shortest_series_index].len()).unwrap_or(0);
for _ in 0..len_diff { transformed_data[i].remove_index(Axis(0), 0); }
}
}
Ok(transformed_data)
}
}
#[cfg(test)]
mod tests {
use ndarray::{Array1, array};
use crate::utilities::TEST_ACCURACY;
#[test]
fn unit_test_min_max_scaling() -> () {
use crate::utilities::data_transformations::min_max_scaling;
let x: Array1<f64> = Array1::from_vec(vec![-10.0, -4.0, 5.0]);
let x_norm: Array1<f64> = min_max_scaling(x, 0.0, 1.0);
assert_eq!(x_norm, Array1::from_vec(vec![0.0, 0.4, 1.0]));
}
#[test]
fn unit_test_percent_change() -> () {
use crate::utilities::data_transformations::percent_change;
let x: Array1<f64> = array![1.0, 2.0, 1.6, 1.6, 0.0, 0.0];
assert!((percent_change(&x) - array![1.0, -0.2, 0.0, -1.0, 0.0]).map(|v| v.abs() ).sum() < TEST_ACCURACY);
}
#[test]
fn unit_test_log_return_transformation() -> () {
use crate::utilities::data_transformations::log_return_transformation;
let x: Array1<f64> = array![1.0, 2.0, 1.6, 1.6, 0.0, 0.0];
assert!((log_return_transformation(&x) - array![0.6931471805599453, -0.2231435513142097, 0.0, -32.236990899346836, 0.0]).map(|v| v.abs() ).sum() < TEST_ACCURACY);
}
#[test]
fn unit_test_differencing() -> () {
use crate::utilities::data_transformations::differencing;
let v: Array1<f64> = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
assert_eq!(differencing(&v, 1).unwrap(), Array1::from_vec(vec![1.0, 1.0, 1.0, 1.0]));
assert_eq!(differencing(&v, 2).unwrap(), Array1::from_vec(vec![0.0, 0.0, 0.0]));
assert_eq!(differencing(&v, 3).unwrap(), Array1::from_vec(vec![0.0, 0.0]));
}
#[test]
fn unit_test_rank_transformation() -> () {
use crate::utilities::data_transformations::rank_transformation;
let x: Array1<f64> = Array1::from_vec(vec![1.0, 3.0, 5.0, 2.0, 5.0, 6.0]);
let x_transformed: Array1<f64> = Array1::from_vec(vec![1.0, 3.0, 4.0, 2.0, 4.0, 5.0]);
assert_eq!(x_transformed, rank_transformation(&x));
}
#[test]
fn unit_test_unit_vector_normalization() -> () {
use crate::utilities::data_transformations::unit_vector_normalization;
let x: Array1<f64> = Array1::from_vec(vec![-15.0, 3.0, 5.0]);
let x_norm: Array1<f64> = unit_vector_normalization(x, 2);
assert!(((&x_norm * &x_norm).sum() - 1.0).abs() < TEST_ACCURACY)
}
#[test]
fn unit_test_sync_transformations() -> () {
use crate::utilities::data_transformations::DataTransformations;
let mut x: Array1<f64> = Array1::from_vec(vec![-15.0, 3.0, 5.0, 16.0, 43.0]);
let mut y: Array1<f64> = Array1::from_vec(vec![26.0, 1.0, 5.0, -9.0, 13.0, 45.0, 12.0]);
DataTransformations::sync_transformations(&mut x, &mut y);
assert_eq!(x.len(), 5);
assert_eq!(y.len(), 5);
assert_eq!(y, Array1::from_vec(vec![5.0, -9.0, 13.0, 45.0, 12.0]));
}
#[test]
fn unit_test_transformation_multiple() -> () {
use crate::utilities::data_transformations::{TransformationType, DataTransformations};
let x1: Array1<f64> = Array1::from_vec(vec![-15.0, 3.0, 5.0, 16.0, 43.0]);
let x2: Array1<f64> = Array1::from_vec(vec![26.0, 1.0, 5.0, -9.0, 13.0, 45.0, 12.0]);
let x3: Array1<f64> = Array1::from_vec(vec![36.0, 18.0, 5.0, 2.0, 60.0]);
let data: Vec<Array1<f64>> = vec![x1, x2, x3];
let tt: TransformationType = TransformationType::No;
let transformation_types: Vec<TransformationType> = vec![tt.clone(), tt.clone(), tt.clone()];
let transformed_data: Vec<Array1<f64>> = DataTransformations::transformation_multiple(&data, &transformation_types).unwrap();
assert_eq!(transformed_data.len(), 3);
assert_eq!(transformed_data[0].len(), 5);
assert_eq!(transformed_data[1].len(), 5);
assert_eq!(transformed_data[2].len(), 5);
assert_eq!(transformed_data[1], Array1::from_vec(vec![5.0, -9.0, 13.0, 45.0, 12.0]));
}
}