use crate::TimeSeries;
use torsh_core::error::Result;
#[derive(Debug, Clone)]
pub struct ChangePointResult {
pub change_points: Vec<usize>,
pub scores: Vec<f64>,
pub algorithm: String,
}
#[derive(Debug, Clone, Copy)]
pub enum CostFunction {
L2,
L1,
Variance,
KolmogorovSmirnov,
}
pub struct PELT {
penalty: f64,
cost_function: CostFunction,
min_segment_length: usize,
}
impl PELT {
pub fn new(penalty: f64, min_segment_length: usize) -> Self {
Self {
penalty,
cost_function: CostFunction::L2,
min_segment_length,
}
}
pub fn with_cost_function(mut self, cost_fn: CostFunction) -> Self {
self.cost_function = cost_fn;
self
}
pub fn detect(&self, series: &TimeSeries) -> Result<ChangePointResult> {
let data = series.values.to_vec()?;
let n = data.len();
if n < 2 * self.min_segment_length {
return Ok(ChangePointResult {
change_points: vec![],
scores: vec![],
algorithm: "PELT".to_string(),
});
}
let mut f = vec![f64::INFINITY; n + 1]; f[0] = -self.penalty;
let mut cp = vec![0; n + 1]; let mut r = vec![0];
for t in self.min_segment_length..=n {
let mut costs = Vec::new();
for &s in &r {
if t - s >= self.min_segment_length {
let segment_cost = self.compute_cost(&data[s..t]);
costs.push((f[s] + segment_cost + self.penalty, s));
}
}
if let Some(&(min_cost, s_star)) = costs
.iter()
.min_by(|(a, _), (b, _)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
{
f[t] = min_cost;
cp[t] = s_star;
r.retain(|&s| f[s] + self.compute_cost(&data[s..t]) <= f[t]);
r.push(t);
}
}
let mut change_points = Vec::new();
let mut current = n;
while current > 0 {
let prev = cp[current];
if prev > 0 {
change_points.push(prev);
}
current = prev;
}
change_points.reverse();
let scores = change_points
.iter()
.map(|&cp_idx| {
if cp_idx < n {
self.compute_cost(&data[cp_idx - self.min_segment_length..cp_idx])
} else {
0.0
}
})
.collect();
Ok(ChangePointResult {
change_points,
scores,
algorithm: "PELT".to_string(),
})
}
fn compute_cost(&self, segment: &[f32]) -> f64 {
match self.cost_function {
CostFunction::L2 => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
}
CostFunction::L1 => {
let mut sorted = segment.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = sorted[sorted.len() / 2];
segment
.iter()
.map(|&x| ((x - median) as f64).abs())
.sum::<f64>()
}
CostFunction::Variance => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
let variance = segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
/ segment.len() as f64;
-variance.ln() * segment.len() as f64 }
CostFunction::KolmogorovSmirnov => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
}
}
}
}
pub struct BinarySegmentation {
threshold: f64,
max_change_points: Option<usize>,
cost_function: CostFunction,
min_segment_length: usize,
}
impl BinarySegmentation {
pub fn new(threshold: f64) -> Self {
Self {
threshold,
max_change_points: None,
cost_function: CostFunction::L2,
min_segment_length: 2,
}
}
pub fn with_max_change_points(mut self, max_cp: usize) -> Self {
self.max_change_points = Some(max_cp);
self
}
pub fn with_cost_function(mut self, cost_fn: CostFunction) -> Self {
self.cost_function = cost_fn;
self
}
pub fn with_min_segment_length(mut self, min_len: usize) -> Self {
self.min_segment_length = min_len;
self
}
pub fn detect(&self, series: &TimeSeries) -> Result<ChangePointResult> {
let data = series.values.to_vec()?;
let n = data.len();
let mut change_points = Vec::new();
let mut scores = Vec::new();
let mut segments = vec![(0, n)];
while !segments.is_empty() {
if let Some(max_cp) = self.max_change_points {
if change_points.len() >= max_cp {
break;
}
}
let (start, end) = segments.pop().expect("segments was checked non-empty");
if end - start < 2 * self.min_segment_length {
continue;
}
let (best_split, best_score) = self.find_best_split(&data, start, end);
if best_score > self.threshold {
change_points.push(best_split);
scores.push(best_score);
segments.push((start, best_split));
segments.push((best_split, end));
}
}
change_points.sort_unstable();
Ok(ChangePointResult {
change_points,
scores,
algorithm: "Binary Segmentation".to_string(),
})
}
fn find_best_split(&self, data: &[f32], start: usize, end: usize) -> (usize, f64) {
let mut best_split = start + self.min_segment_length;
let mut best_score = 0.0;
let original_cost = self.segment_cost(data, start, end);
for split in (start + self.min_segment_length)..(end - self.min_segment_length) {
let left_cost = self.segment_cost(data, start, split);
let right_cost = self.segment_cost(data, split, end);
let cost_reduction = original_cost - (left_cost + right_cost);
if cost_reduction > best_score {
best_score = cost_reduction;
best_split = split;
}
}
(best_split, best_score)
}
fn segment_cost(&self, data: &[f32], start: usize, end: usize) -> f64 {
let segment = &data[start..end];
match self.cost_function {
CostFunction::L2 => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
}
CostFunction::L1 => {
let mut sorted = segment.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = sorted[sorted.len() / 2];
segment
.iter()
.map(|&x| ((x - median) as f64).abs())
.sum::<f64>()
}
CostFunction::Variance => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
/ segment.len() as f64
}
CostFunction::KolmogorovSmirnov => {
let mean = segment.iter().sum::<f32>() / segment.len() as f32;
segment
.iter()
.map(|&x| ((x - mean) as f64).powi(2))
.sum::<f64>()
}
}
}
}
pub struct WindowDetector {
window_size: usize,
threshold: f64,
statistic: WindowStatistic,
}
#[derive(Debug, Clone, Copy)]
pub enum WindowStatistic {
Mean,
Variance,
CUSUM,
}
impl WindowDetector {
pub fn new(window_size: usize, threshold: f64) -> Self {
Self {
window_size,
threshold,
statistic: WindowStatistic::Mean,
}
}
pub fn with_statistic(mut self, stat: WindowStatistic) -> Self {
self.statistic = stat;
self
}
pub fn detect(&self, series: &TimeSeries) -> Result<ChangePointResult> {
let data = series.values.to_vec()?;
let n = data.len();
if n < 2 * self.window_size {
return Ok(ChangePointResult {
change_points: vec![],
scores: vec![],
algorithm: "Window".to_string(),
});
}
let mut change_points = Vec::new();
let mut scores = Vec::new();
for i in self.window_size..(n - self.window_size) {
let left_window = &data[(i - self.window_size)..i];
let right_window = &data[i..(i + self.window_size)];
let score = match self.statistic {
WindowStatistic::Mean => self.mean_change(left_window, right_window),
WindowStatistic::Variance => self.variance_change(left_window, right_window),
WindowStatistic::CUSUM => self.cusum_statistic(&data, i),
};
if score.abs() > self.threshold {
change_points.push(i);
scores.push(score);
}
}
Ok(ChangePointResult {
change_points,
scores,
algorithm: "Window".to_string(),
})
}
fn mean_change(&self, left: &[f32], right: &[f32]) -> f64 {
let left_mean = left.iter().sum::<f32>() / left.len() as f32;
let right_mean = right.iter().sum::<f32>() / right.len() as f32;
(right_mean - left_mean) as f64
}
fn variance_change(&self, left: &[f32], right: &[f32]) -> f64 {
let left_mean = left.iter().sum::<f32>() / left.len() as f32;
let right_mean = right.iter().sum::<f32>() / right.len() as f32;
let left_var = left
.iter()
.map(|&x| ((x - left_mean) as f64).powi(2))
.sum::<f64>()
/ left.len() as f64;
let right_var = right
.iter()
.map(|&x| ((x - right_mean) as f64).powi(2))
.sum::<f64>()
/ right.len() as f64;
right_var - left_var
}
fn cusum_statistic(&self, data: &[f32], pos: usize) -> f64 {
let mean = data.iter().sum::<f32>() / data.len() as f32;
let mut cusum = 0.0;
for &x in &data[..pos] {
cusum += (x - mean) as f64;
}
cusum
}
}
#[cfg(test)]
mod tests {
use super::*;
use torsh_tensor::Tensor;
fn create_change_point_series() -> TimeSeries {
let mut data = Vec::with_capacity(100);
for _i in 0..50 {
data.push(1.0f32);
}
for _i in 50..100 {
data.push(5.0f32);
}
let tensor = Tensor::from_vec(data, &[100]).expect("Tensor should succeed");
TimeSeries::new(tensor)
}
#[test]
fn test_pelt_creation() {
let pelt = PELT::new(10.0, 5);
assert_eq!(pelt.min_segment_length, 5);
}
#[test]
fn test_pelt_detection() {
let series = create_change_point_series();
let pelt = PELT::new(10.0, 5);
let result = pelt
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "PELT");
assert!(!result.change_points.is_empty());
}
#[test]
fn test_pelt_with_cost_functions() {
let series = create_change_point_series();
for cost_fn in [
CostFunction::L2,
CostFunction::L1,
CostFunction::Variance,
CostFunction::KolmogorovSmirnov,
] {
let pelt = PELT::new(10.0, 5).with_cost_function(cost_fn);
let result = pelt
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "PELT");
}
}
#[test]
fn test_binary_segmentation_creation() {
let bs = BinarySegmentation::new(0.5);
assert_eq!(bs.threshold, 0.5);
}
#[test]
fn test_binary_segmentation_detection() {
let series = create_change_point_series();
let bs = BinarySegmentation::new(1.0);
let result = bs
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "Binary Segmentation");
assert!(!result.change_points.is_empty());
}
#[test]
fn test_binary_segmentation_max_change_points() {
let series = create_change_point_series();
let bs = BinarySegmentation::new(0.1).with_max_change_points(2);
let result = bs
.detect(&series)
.expect("detection operation should succeed");
assert!(result.change_points.len() <= 2);
}
#[test]
fn test_window_detector_creation() {
let detector = WindowDetector::new(10, 0.5);
assert_eq!(detector.window_size, 10);
assert_eq!(detector.threshold, 0.5);
}
#[test]
fn test_window_detector_mean() {
let series = create_change_point_series();
let detector = WindowDetector::new(10, 1.0).with_statistic(WindowStatistic::Mean);
let result = detector
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "Window");
assert!(!result.change_points.is_empty());
}
#[test]
fn test_window_detector_variance() {
let series = create_change_point_series();
let detector = WindowDetector::new(10, 0.1).with_statistic(WindowStatistic::Variance);
let result = detector
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "Window");
}
#[test]
fn test_window_detector_cusum() {
let series = create_change_point_series();
let detector = WindowDetector::new(10, 5.0).with_statistic(WindowStatistic::CUSUM);
let result = detector
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.algorithm, "Window");
}
#[test]
fn test_change_point_result() {
let series = create_change_point_series();
let pelt = PELT::new(10.0, 5);
let result = pelt
.detect(&series)
.expect("detection operation should succeed");
assert_eq!(result.change_points.len(), result.scores.len());
assert!(!result.algorithm.is_empty());
}
}