use std::collections::HashMap;
use crate::approximation::dft::{Dft, DftConfig};
use crate::approximation::sfa::{
fit_from_coefs, sfa_transform_symbolic, transform_from_coefs, Sfa, SfaConfig, SfaFitted,
};
use crate::core::config::{BinStrategy, NumerosityReduction};
use crate::core::traits::{FittableTransformer, Transformer};
#[derive(Debug, Clone)]
pub struct BossConfig {
pub window_size: usize,
pub word_size: usize,
pub n_bins: usize,
pub strategy: BinStrategy,
pub numerosity_reduction: NumerosityReduction,
pub window_step: usize,
pub norm_mean: bool,
pub norm_std: bool,
pub drop_sum: bool,
pub anova: bool,
}
impl BossConfig {
pub fn new(window_size: usize, word_size: usize) -> Self {
Self {
window_size,
word_size,
n_bins: 4,
strategy: BinStrategy::Quantile,
numerosity_reduction: NumerosityReduction::IdenticalConsecutive,
window_step: 1,
norm_mean: true,
norm_std: true,
drop_sum: false,
anova: false,
}
}
}
#[derive(Debug, Clone)]
pub struct BossFitted {
pub sfa_fitted: SfaFitted,
pub config: BossConfig,
}
pub struct Boss;
impl Boss {
pub fn fit(config: &BossConfig, x: &[Vec<f64>], y: Option<&[String]>) -> BossFitted {
assert!(!x.is_empty(), "Input must have at least one sample");
let n_timestamps = x[0].len();
assert!(
config.window_size <= n_timestamps,
"window_size must not exceed n_timestamps"
);
let windows = extract_all_windows(x, config.window_size, config.window_step);
let expanded_y: Option<Vec<String>> = y.map(|labels| {
let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
labels
.iter()
.flat_map(|l| std::iter::repeat_n(l.clone(), n_windows_per_sample))
.collect()
});
let sfa_config = SfaConfig {
n_coefs: Some(config.word_size),
n_bins: config.n_bins,
strategy: config.strategy,
drop_sum: config.drop_sum,
anova: config.anova,
norm_mean: config.norm_mean,
norm_std: config.norm_std,
};
let sfa_fitted = Sfa::fit(&sfa_config, &windows, expanded_y.as_deref());
BossFitted {
sfa_fitted,
config: config.clone(),
}
}
pub fn transform(fitted: &BossFitted, x: &[Vec<f64>]) -> Vec<HashMap<String, usize>> {
assert!(!x.is_empty(), "Input must have at least one sample");
let config = &fitted.config;
let n_timestamps = x[0].len();
let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
let windows = extract_all_windows(x, config.window_size, config.window_step);
let symbolic = sfa_transform_symbolic(&fitted.sfa_fitted, &windows);
build_histograms(&symbolic, x.len(), n_windows_per_sample, config)
}
pub fn fit_transform(
config: &BossConfig,
x: &[Vec<f64>],
y: Option<&[String]>,
) -> Vec<HashMap<String, usize>> {
let (fitted, histograms) = Self::fit_with_histograms(config, x, y);
let _ = fitted;
histograms
}
pub fn fit_with_histograms(
config: &BossConfig,
x: &[Vec<f64>],
y: Option<&[String]>,
) -> (BossFitted, Vec<HashMap<String, usize>>) {
assert!(!x.is_empty(), "Input must have at least one sample");
let n_timestamps = x[0].len();
assert!(
config.window_size <= n_timestamps,
"window_size must not exceed n_timestamps"
);
let windows = extract_all_windows(x, config.window_size, config.window_step);
let expanded_y: Option<Vec<String>> = y.map(|labels| {
let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
labels
.iter()
.flat_map(|l| std::iter::repeat_n(l.clone(), n_windows_per_sample))
.collect()
});
let dft_config = DftConfig {
n_coefs: None,
norm_mean: config.norm_mean,
norm_std: config.norm_std,
drop_sum: config.drop_sum,
};
let all_coefs = Dft::transform(&dft_config, &windows);
let sfa_config = SfaConfig {
n_coefs: Some(config.word_size),
n_bins: config.n_bins,
strategy: config.strategy,
drop_sum: config.drop_sum,
anova: config.anova,
norm_mean: config.norm_mean,
norm_std: config.norm_std,
};
let sfa_fitted =
fit_from_coefs(&sfa_config, &all_coefs, expanded_y.as_deref(), &dft_config);
let symbolic = transform_from_coefs(&sfa_fitted, &all_coefs);
let fitted = BossFitted {
sfa_fitted,
config: config.clone(),
};
let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
let histograms = build_histograms(&symbolic, x.len(), n_windows_per_sample, config);
(fitted, histograms)
}
}
fn build_histograms(
symbolic: &[Vec<u8>],
n_samples: usize,
n_windows_per_sample: usize,
config: &BossConfig,
) -> Vec<HashMap<String, usize>> {
let mut histograms = Vec::with_capacity(n_samples);
for sample_idx in 0..n_samples {
let start = sample_idx * n_windows_per_sample;
let end = start + n_windows_per_sample;
let sample_words = &symbolic[start..end];
let words: Vec<String> = sample_words
.iter()
.map(|bins| bins.iter().map(|&b| (b'a' + b) as char).collect())
.collect();
let reduced = match config.numerosity_reduction {
NumerosityReduction::IdenticalConsecutive => {
let mut result = Vec::new();
let mut prev = String::new();
for word in words {
if word != prev {
prev.clone_from(&word);
result.push(word);
}
}
result
}
NumerosityReduction::None => words,
};
let mut hist = HashMap::new();
for word in reduced {
*hist.entry(word).or_insert(0) += 1;
}
histograms.push(hist);
}
histograms
}
fn extract_all_windows(x: &[Vec<f64>], window_size: usize, window_step: usize) -> Vec<Vec<f64>> {
x.iter()
.flat_map(|sample| {
let n = sample.len();
(0..=n - window_size)
.step_by(window_step)
.map(move |i| sample[i..i + window_size].to_vec())
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_boss_basic() {
let config = BossConfig::new(4, 2);
let x = vec![
vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
vec![7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0],
vec![0.0, 2.0, 4.0, 6.0, 4.0, 2.0, 0.0, -2.0],
vec![1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0],
];
let histograms = Boss::fit_transform(&config, &x, None);
assert_eq!(histograms.len(), 4);
for hist in &histograms {
assert!(!hist.is_empty());
for word in hist.keys() {
assert_eq!(word.len(), 2);
}
}
}
#[test]
fn test_boss_fit_then_transform() {
let config = BossConfig::new(3, 2);
let x = vec![
vec![0.0, 1.0, 2.0, 3.0, 4.0],
vec![4.0, 3.0, 2.0, 1.0, 0.0],
vec![1.0, 3.0, 2.0, 4.0, 0.0],
];
let fitted = Boss::fit(&config, &x, None);
let result = Boss::transform(&fitted, &x);
assert_eq!(result.len(), 3);
}
#[test]
fn test_boss_histogram_counts() {
let config = BossConfig {
numerosity_reduction: NumerosityReduction::None,
..BossConfig::new(3, 2)
};
let x = vec![vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]];
let result = Boss::fit_transform(&config, &x, None);
let total: usize = result[0].values().sum();
assert_eq!(total, 4); }
}