use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rand_distr::Normal;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct LearningShapeletsConfig {
pub n_shapelets_per_size: usize,
pub shapelet_sizes: Vec<usize>,
pub learning_rate: f64,
pub n_epochs: usize,
pub regularization: f64,
pub random_seed: Option<u64>,
}
impl LearningShapeletsConfig {
pub fn new() -> Self {
Self {
n_shapelets_per_size: 3,
shapelet_sizes: vec![3, 5],
learning_rate: 0.01,
n_epochs: 100,
regularization: 0.01,
random_seed: None,
}
}
}
impl Default for LearningShapeletsConfig {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct LearningShapeletsFitted {
pub shapelets: Vec<Vec<f64>>,
pub weights: Vec<Vec<f64>>, pub biases: Vec<f64>, pub classes: Vec<String>,
}
pub struct LearningShapelets;
impl LearningShapelets {
pub fn fit(
config: &LearningShapeletsConfig,
x: &[Vec<f64>],
y: &[String],
) -> LearningShapeletsFitted {
assert!(!x.is_empty(), "Input must have at least one sample");
assert_eq!(x.len(), y.len(), "X and y must have same length");
let n_timestamps = x[0].len();
let mut rng = match config.random_seed {
Some(seed) => StdRng::seed_from_u64(seed),
None => StdRng::from_entropy(),
};
let mut classes: Vec<String> = y.to_vec();
classes.sort();
classes.dedup();
let n_classes = classes.len();
let class_idx: HashMap<&str, usize> = classes
.iter()
.enumerate()
.map(|(i, c)| (c.as_str(), i))
.collect();
let mut shapelets: Vec<Vec<f64>> = Vec::new();
for &size in &config.shapelet_sizes {
let size = size.min(n_timestamps);
for _ in 0..config.n_shapelets_per_size {
let sample_idx = rng.gen_range(0..x.len());
let start = rng.gen_range(0..=n_timestamps - size);
let shapelet = x[sample_idx][start..start + size].to_vec();
shapelets.push(shapelet);
}
}
let n_shapelets = shapelets.len();
let normal = Normal::new(0.0, 0.01).unwrap();
let mut weights: Vec<Vec<f64>> = (0..n_classes)
.map(|_| (0..n_shapelets).map(|_| rng.sample(normal)).collect())
.collect();
let mut biases = vec![0.0; n_classes];
let targets: Vec<Vec<f64>> = y
.iter()
.map(|label| {
let mut one_hot = vec![0.0; n_classes];
one_hot[class_idx[label.as_str()]] = 1.0;
one_hot
})
.collect();
let lr = config.learning_rate;
let reg = config.regularization;
for _epoch in 0..config.n_epochs {
let mut indices: Vec<usize> = (0..x.len()).collect();
for i in (1..indices.len()).rev() {
let j = rng.gen_range(0..=i);
indices.swap(i, j);
}
for &idx in &indices {
let distances: Vec<f64> = shapelets
.iter()
.map(|s| min_soft_distance(&x[idx], s))
.collect();
let logits: Vec<f64> = (0..n_classes)
.map(|c| {
weights[c]
.iter()
.zip(distances.iter())
.map(|(&w, &d)| w * d)
.sum::<f64>()
+ biases[c]
})
.collect();
let max_logit = logits.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let exp: Vec<f64> = logits.iter().map(|&l| (l - max_logit).exp()).collect();
let sum_exp: f64 = exp.iter().sum();
let probs: Vec<f64> = exp.iter().map(|&e| e / sum_exp).collect();
let d_logits: Vec<f64> = probs
.iter()
.zip(targets[idx].iter())
.map(|(&p, &t)| p - t)
.collect();
for c in 0..n_classes {
for s in 0..n_shapelets {
weights[c][s] -= lr * (d_logits[c] * distances[s] + reg * weights[c][s]);
}
biases[c] -= lr * d_logits[c];
}
for s in 0..n_shapelets {
let d_dist: f64 = (0..n_classes).map(|c| d_logits[c] * weights[c][s]).sum();
let grad = shapelet_gradient(&x[idx], &shapelets[s]);
for (k, g) in grad.iter().enumerate() {
shapelets[s][k] -= lr * d_dist * g;
}
}
}
}
LearningShapeletsFitted {
shapelets,
weights,
biases,
classes,
}
}
pub fn predict(fitted: &LearningShapeletsFitted, x: &[Vec<f64>]) -> Vec<String> {
let n_classes = fitted.classes.len();
x.iter()
.map(|sample| {
let distances: Vec<f64> = fitted
.shapelets
.iter()
.map(|s| min_soft_distance(sample, s))
.collect();
let logits: Vec<f64> = (0..n_classes)
.map(|c| {
fitted.weights[c]
.iter()
.zip(distances.iter())
.map(|(&w, &d)| w * d)
.sum::<f64>()
+ fitted.biases[c]
})
.collect();
let (best_class, _) = logits
.iter()
.enumerate()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
.unwrap();
fitted.classes[best_class].clone()
})
.collect()
}
pub fn score(fitted: &LearningShapeletsFitted, x: &[Vec<f64>], y: &[String]) -> f64 {
let predictions = Self::predict(fitted, x);
let correct = predictions
.iter()
.zip(y.iter())
.filter(|(p, t)| p == t)
.count();
correct as f64 / y.len() as f64
}
}
fn min_soft_distance(ts: &[f64], shapelet: &[f64]) -> f64 {
let n = ts.len();
let m = shapelet.len();
if m > n {
return f64::INFINITY;
}
let alpha = -10.0;
let mut distances = Vec::with_capacity(n - m + 1);
for i in 0..=n - m {
let d: f64 = ts[i..i + m]
.iter()
.zip(shapelet.iter())
.map(|(&a, &b)| (a - b).powi(2))
.sum::<f64>()
/ m as f64;
distances.push(d);
}
let max_alpha_d = distances
.iter()
.map(|&d| alpha * d)
.fold(f64::NEG_INFINITY, f64::max);
let sum_exp: f64 = distances
.iter()
.map(|&d| (alpha * d - max_alpha_d).exp())
.sum();
(max_alpha_d + sum_exp.ln()) / alpha
}
fn shapelet_gradient(ts: &[f64], shapelet: &[f64]) -> Vec<f64> {
let n = ts.len();
let m = shapelet.len();
if m > n {
return vec![0.0; m];
}
let alpha = -10.0;
let mut distances = Vec::with_capacity(n - m + 1);
for i in 0..=n - m {
let d: f64 = ts[i..i + m]
.iter()
.zip(shapelet.iter())
.map(|(&a, &b)| (a - b).powi(2))
.sum::<f64>()
/ m as f64;
distances.push(d);
}
let max_alpha_d = distances
.iter()
.map(|&d| alpha * d)
.fold(f64::NEG_INFINITY, f64::max);
let exp_weights: Vec<f64> = distances
.iter()
.map(|&d| (alpha * d - max_alpha_d).exp())
.collect();
let sum_exp: f64 = exp_weights.iter().sum();
let softmax_weights: Vec<f64> = exp_weights.iter().map(|&e| e / sum_exp).collect();
let mut grad = vec![0.0; m];
for (i, &w) in softmax_weights.iter().enumerate() {
for (k, g) in grad.iter_mut().enumerate() {
*g += w * (-2.0 * (ts[i + k] - shapelet[k]) / m as f64);
}
}
grad
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_learning_shapelets_basic() {
let config = LearningShapeletsConfig {
n_shapelets_per_size: 2,
shapelet_sizes: vec![3],
learning_rate: 0.1,
n_epochs: 50,
regularization: 0.001,
random_seed: Some(42),
};
let x = vec![
vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
vec![0.0, 1.0, 2.0, 3.0, 4.0, 6.0],
vec![5.0, 4.0, 3.0, 2.0, 1.0, 0.0],
vec![6.0, 4.0, 3.0, 2.0, 1.0, 0.0],
];
let y = vec![
"A".to_string(),
"A".to_string(),
"B".to_string(),
"B".to_string(),
];
let fitted = LearningShapelets::fit(&config, &x, &y);
let predictions = LearningShapelets::predict(&fitted, &x);
assert_eq!(predictions.len(), 4);
}
#[test]
fn test_learning_shapelets_converges() {
let config = LearningShapeletsConfig {
n_shapelets_per_size: 3,
shapelet_sizes: vec![3, 4],
learning_rate: 0.1,
n_epochs: 200,
regularization: 0.001,
random_seed: Some(42),
};
let x = vec![
vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
vec![0.0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5],
vec![7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0],
vec![7.5, 5.5, 4.5, 3.5, 2.5, 1.5, 0.5, 0.0],
];
let y = vec![
"up".to_string(),
"up".to_string(),
"down".to_string(),
"down".to_string(),
];
let fitted = LearningShapelets::fit(&config, &x, &y);
let score = LearningShapelets::score(&fitted, &x, &y);
assert!(score >= 0.5, "Score too low: {score}");
}
#[test]
fn test_soft_min_distance() {
let ts = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let shapelet = vec![0.0, 0.0, 0.0];
let d = min_soft_distance(&ts, &shapelet);
assert!(
d < 0.1,
"Distance should be near 0 for exact match, got {d}"
);
assert!(d > -0.1, "Distance too negative: {d}");
}
}