#![allow(clippy::needless_range_loop, clippy::manual_is_multiple_of)]
#[derive(Debug, Clone)]
pub struct MinimaResult {
pub indices: Vec<usize>,
pub values: Vec<f64>,
}
#[derive(Debug, Clone)]
pub struct FilteredIndices {
pub indices: Vec<usize>,
pub values: Vec<f64>,
}
fn matrix_multiply(a: &[f64], b: &[f64], m: usize, n: usize, p: usize) -> Vec<f64> {
let mut c = vec![0.0; m * p];
for i in 0..m {
for j in 0..p {
let mut sum = 0.0;
for k in 0..n {
sum += a[i * n + k] * b[k * p + j];
}
c[i * p + j] = sum;
}
}
c
}
fn matrix_transpose(a: &[f64], m: usize, n: usize) -> Vec<f64> {
let mut at = vec![0.0; n * m];
for i in 0..m {
for j in 0..n {
at[j * m + i] = a[i * n + j];
}
}
at
}
fn matrix_inverse(a: &[f64], n: usize) -> Option<Vec<f64>> {
let mut a_inv = vec![0.0; n * n];
for i in 0..n {
a_inv[i * n + i] = 1.0;
}
let mut work = a.to_vec();
for i in 0..n {
let mut max_row = i;
let mut max_val = work[i * n + i].abs();
for k in (i + 1)..n {
let val = work[k * n + i].abs();
if val > max_val {
max_val = val;
max_row = k;
}
}
if max_row != i {
for j in 0..n {
work.swap(i * n + j, max_row * n + j);
a_inv.swap(i * n + j, max_row * n + j);
}
}
let pivot = work[i * n + i];
if pivot.abs() < 1e-10 {
return None;
}
for j in 0..n {
work[i * n + j] /= pivot;
a_inv[i * n + j] /= pivot;
}
for k in 0..n {
if k != i {
let factor = work[k * n + i];
for j in 0..n {
work[k * n + j] -= factor * work[i * n + j];
a_inv[k * n + j] -= factor * a_inv[i * n + j];
}
}
}
}
Some(a_inv)
}
fn compute_savgol_coeffs(window_size: usize, poly_order: usize, deriv: usize) -> Option<Vec<f64>> {
let half_window = (window_size - 1) / 2;
let poly_cols = poly_order + 1;
let mut a = vec![0.0; window_size * poly_cols];
for i in 0..window_size {
let x = i as f64 - half_window as f64;
for j in 0..poly_cols {
a[i * poly_cols + j] = x.powi(j as i32);
}
}
let at = matrix_transpose(&a, window_size, poly_cols);
let ata = matrix_multiply(&at, &a, poly_cols, window_size, poly_cols);
let ata_inv = matrix_inverse(&ata, poly_cols)?;
let factorial: f64 = (1..=deriv).map(|i| i as f64).product::<f64>().max(1.0);
let mut coeffs = vec![0.0; window_size];
for i in 0..window_size {
if deriv < poly_cols {
let mut sum = 0.0;
for k in 0..poly_cols {
sum += ata_inv[deriv * poly_cols + k] * a[i * poly_cols + k];
}
coeffs[i] = factorial * sum;
}
}
Some(coeffs)
}
fn apply_convolution(data: &[f64], kernel: &[f64]) -> Vec<f64> {
let n = data.len();
let kernel_size = kernel.len();
let half = kernel_size / 2;
let mut output = vec![0.0; n];
for i in 0..n {
let mut sum = 0.0;
for j in 0..kernel_size {
let mut idx = i as isize - half as isize + j as isize;
if idx < 0 {
idx = -idx;
} else if idx >= n as isize {
idx = 2 * n as isize - idx - 2;
}
idx = idx.clamp(0, n as isize - 1);
sum += data[idx as usize] * kernel[j];
}
output[i] = sum;
}
output
}
pub fn savgol_filter(
data: &[f64],
window_length: usize,
poly_order: usize,
deriv: usize,
) -> Option<Vec<f64>> {
if window_length % 2 == 0 || window_length <= poly_order || data.is_empty() {
return None;
}
let coeffs = compute_savgol_coeffs(window_length, poly_order, deriv)?;
Some(apply_convolution(data, &coeffs))
}
pub fn find_local_minima_interpolated(
data: &[f64],
window_size: usize,
poly_order: usize,
tolerance: f64,
) -> Option<MinimaResult> {
if data.is_empty() {
return Some(MinimaResult {
indices: vec![],
values: vec![],
});
}
let first_deriv = savgol_filter(data, window_size, poly_order, 1)?;
let second_deriv = savgol_filter(data, window_size, poly_order, 2)?;
let mut indices = Vec::new();
let mut values = Vec::new();
for i in 0..data.len() {
if first_deriv[i].abs() < tolerance && second_deriv[i] > 0.0 {
indices.push(i);
values.push(data[i]);
}
}
Some(MinimaResult { indices, values })
}
pub fn windowed_cross_similarity(
embeddings: &[f64],
n: usize,
d: usize,
window_size: usize,
) -> Option<Vec<f64>> {
if window_size % 2 == 0 || window_size < 3 || n < 2 || d == 0 {
return None;
}
let half_window = window_size / 2;
let mut result = vec![0.0; n - 1];
for i in 0..(n - 1) {
let start = i.saturating_sub(half_window);
let end = (i + half_window + 2).min(n);
let mut total_sim = 0.0;
let mut count = 0;
for j in start..(end - 1) {
let emb1_start = j * d;
let emb2_start = (j + 1) * d;
let mut dot = 0.0;
let mut norm1 = 0.0;
let mut norm2 = 0.0;
for k in 0..d {
let v1 = embeddings[emb1_start + k];
let v2 = embeddings[emb2_start + k];
dot += v1 * v2;
norm1 += v1 * v1;
norm2 += v2 * v2;
}
if norm1 > 0.0 && norm2 > 0.0 {
total_sim += dot / (norm1.sqrt() * norm2.sqrt());
count += 1;
}
}
result[i] = if count > 0 {
total_sim / count as f64
} else {
0.0
};
}
Some(result)
}
fn percentile(data: &[f64], p: f64) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let idx = p * (sorted.len() - 1) as f64;
let lower = idx.floor() as usize;
let upper = (lower + 1).min(sorted.len() - 1);
let weight = idx - lower as f64;
sorted[lower] * (1.0 - weight) + sorted[upper] * weight
}
pub fn filter_split_indices(
indices: &[usize],
values: &[f64],
threshold: f64,
min_distance: usize,
) -> FilteredIndices {
if indices.is_empty() || values.is_empty() {
return FilteredIndices {
indices: vec![],
values: vec![],
};
}
let threshold_val = percentile(values, threshold);
let mut result_indices = Vec::new();
let mut result_values = Vec::new();
let mut last_idx: Option<usize> = None;
for (&idx, &val) in indices.iter().zip(values.iter()) {
let distance_ok = match last_idx {
Some(last) => idx >= last + min_distance,
None => true,
};
if val <= threshold_val && distance_ok {
result_indices.push(idx);
result_values.push(val);
last_idx = Some(idx);
}
}
FilteredIndices {
indices: result_indices,
values: result_values,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_savgol_filter_smoothing() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let result = savgol_filter(&data, 5, 2, 0).unwrap();
assert_eq!(result.len(), data.len());
for (i, &val) in result.iter().enumerate() {
assert!((val - (i as f64 + 1.0)).abs() < 0.5);
}
}
#[test]
fn test_savgol_filter_invalid_params() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert!(savgol_filter(&data, 4, 2, 0).is_none());
assert!(savgol_filter(&data, 3, 3, 0).is_none());
}
#[test]
fn test_find_local_minima() {
let data: Vec<f64> = (0..20)
.map(|i| {
let x = (i as f64 - 10.0) / 3.0;
x * x })
.collect();
let result = find_local_minima_interpolated(&data, 5, 2, 0.5).unwrap();
assert!(!result.indices.is_empty());
let min_idx = result.indices[0];
assert!((min_idx as isize - 10).abs() <= 2);
}
#[test]
fn test_windowed_cross_similarity() {
let embeddings = vec![
1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ];
let result = windowed_cross_similarity(&embeddings, 3, 3, 3).unwrap();
assert_eq!(result.len(), 2);
assert!((result[0] - 0.5).abs() < 0.1); }
#[test]
fn test_filter_split_indices() {
let indices = vec![0, 5, 8, 15, 20];
let values = vec![0.1, 0.3, 0.2, 0.5, 0.4];
let result = filter_split_indices(&indices, &values, 0.5, 3);
assert!(!result.indices.is_empty());
}
#[test]
fn test_percentile() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert!((percentile(&data, 0.0) - 1.0).abs() < 0.001);
assert!((percentile(&data, 0.5) - 3.0).abs() < 0.001);
assert!((percentile(&data, 1.0) - 5.0).abs() < 0.001);
}
#[test]
fn test_matrix_inverse() {
let a = vec![1.0, 0.0, 0.0, 1.0];
let inv = matrix_inverse(&a, 2).unwrap();
assert!((inv[0] - 1.0).abs() < 0.001);
assert!((inv[3] - 1.0).abs() < 0.001);
let b = vec![2.0, 1.0, 1.0, 1.0];
let inv_b = matrix_inverse(&b, 2).unwrap();
assert!((inv_b[0] - 1.0).abs() < 0.001);
assert!((inv_b[1] - (-1.0)).abs() < 0.001);
assert!((inv_b[2] - (-1.0)).abs() < 0.001);
assert!((inv_b[3] - 2.0).abs() < 0.001);
}
}