1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
//! Sequential outlier detection and removal using Hampel identifiers.
//!
//! It supports `f32` and `f64`.
//!
//! # Example
//!
//! ```rust
//! use hampel::Window;
//!
//! fn main() {
//! // Window size: 5 (>= 3)
//! // Initialization value of window: 0.0
//! // Threshold: Median of the window ±3σ.
//! let mut filter = Window::<f64, 5>::new(0.0, 3.0);
//!
//! let input_vals = [0.0; 100]; // <- Containing outliers
//! let mut filtered_vals = [0.0; 100];
//! for (i, val) in input_vals.iter().enumerate() {
//! filtered_vals[i] = filter.update(*val);
//! }
//! // filtered_vals <-- Outliers have been removed
//! }
//! ```
#![no_std]
use core::mem::MaybeUninit;
use num_traits::{cast, float::FloatCore};
/// Window of Hampel filter
///
/// * `WINDOW_SIZE` >= 3
pub struct Window<T: FloatCore, const WINDOW_SIZE: usize> {
window: [T; WINDOW_SIZE],
working_array: [T; WINDOW_SIZE],
oldest: usize, // window内の最も古い要素のインデックス
coef: T, // 閾値判定に使う係数
}
// n_sigmaを大きくするほど判定が緩くなる(外れ値を見落としやすくなる)
impl<T: FloatCore, const WINDOW_SIZE: usize> Window<T, WINDOW_SIZE> {
/// * `init_val`: Initialization value of window.
/// * `n_sigma`: Threshold for determining an outlier.
///
/// If the window's input value exceeds the `window's standard deviation` * `n_sigma`,
/// it is determined to be an outlier.
/// The larger n_sigma is, the harder it is to detect outliers.
pub fn new(init_val: T, n_sigma: T) -> Self {
assert!(WINDOW_SIZE >= 3, "WINDOW_SIZE must be at least 3");
Self {
window: [init_val; WINDOW_SIZE],
working_array: unsafe { MaybeUninit::uninit().assume_init() },
oldest: 0,
coef: cast::<f32, T>(1.4826).unwrap() * n_sigma, // 1.4826は正規分布にするための係数
}
}
/// Update element in window.
///
/// When `x` is determined to be an outlier, the median value of the window is usually returned.
/// If the `extrapolation` feature is enabled, the linear extrapolated value is returned.
///
/// When `x` is judged not to be an outlier, `x` is returned as is.
pub fn update(&mut self, x: T) -> T {
// Range of `oldest`: [0, WINDOW_SIZE)
unsafe {*self.window.get_unchecked_mut(self.oldest) = x};
self.oldest = (self.oldest + 1) % WINDOW_SIZE;
self.working_array = self.window;
// ウィンドウの中央値を計算
let w0 = self.get_median();
// ウィンドウの各値に対して,中央値との絶対差分を取る
for w in self.working_array.iter_mut() {
*w = (*w - w0).abs();
}
// 絶対差分を取ったので再度中央値を計算
let s0 = self.get_median();
// 外れ値かどうか判定
if (x - w0).abs() <= self.coef * s0 {
x
} else {
#[cfg(feature = "extrapolation")]
{
// 線形外挿した値を返す
self.extrapolation()
}
#[cfg(not(feature = "extrapolation"))]
{
// ウィンドウの中央値を返す
w0
}
}
}
/// working_arrayの中央値を返す
fn get_median(&mut self) -> T {
// Insertion sort
for i in 1..WINDOW_SIZE {
let mut j = i;
while j > 0 {
let j_pre = j - 1;
if unsafe{ self.working_array.get_unchecked(j_pre) > self.working_array.get_unchecked(j) } {
self.working_array.swap(j_pre, j);
j = j_pre;
} else {
break;
}
}
}
self.working_array[WINDOW_SIZE / 2]
}
/// 一番最後に追加されたデータ(外れ値)を無視して線形外挿する
#[cfg(feature = "extrapolation")]
fn extrapolation(&self) -> T {
// x座標を(0, 1, 2, ...)と取った場合の平均値(等差数列の平均)
let mu_x = cast::<usize, T>(WINDOW_SIZE - 2).unwrap() * cast::<f32, T>(0.5).unwrap();
// windowの平均値(外れ値を除いた平均値なので WINDOW_SIZE-1 になっている)
let mut mu_y = T::zero();
for i in 0..(WINDOW_SIZE - 1) {
mu_y = mu_y + self.window[(self.oldest + i) % WINDOW_SIZE];
}
mu_y = mu_y / cast::<usize, T>(WINDOW_SIZE - 1).unwrap();
let mut numer = T::zero();
let mut denom = T::zero();
for i in 0..(WINDOW_SIZE - 1) {
let dev_x = cast::<usize, T>(i).unwrap() - mu_x;
let dev_y = self.window[(self.oldest + i) % WINDOW_SIZE] - mu_y;
numer = numer + dev_x * dev_y;
denom = denom + dev_x * dev_x;
}
// 最小二乗法で求めた傾きと切片
let a = numer / denom; // denom=0となることは無い
let b = mu_y - a * mu_x;
a * cast::<usize, T>(WINDOW_SIZE - 1).unwrap() + b
}
}