#[derive(Debug, Clone)]
pub struct HSTConfig {
pub n_trees: usize,
pub max_depth: usize,
pub window_size: usize,
pub n_features: usize,
pub seed: u64,
pub threshold: f64,
}
impl HSTConfig {
pub fn new(n_features: usize) -> Self {
Self {
n_trees: 25,
max_depth: 15,
window_size: 250,
n_features,
seed: 42,
threshold: 0.5,
}
}
pub fn n_trees(mut self, n: usize) -> Self {
self.n_trees = n;
self
}
pub fn max_depth(mut self, d: usize) -> Self {
self.max_depth = d;
self
}
pub fn window_size(mut self, w: usize) -> Self {
self.window_size = w;
self
}
pub fn seed(mut self, s: u64) -> Self {
self.seed = s;
self
}
pub fn threshold(mut self, t: f64) -> Self {
self.threshold = t;
self
}
}
#[derive(Debug, Clone)]
struct HSTNode {
feature: usize,
split_value: f64,
left: usize,
right: usize,
r_mass: u64,
l_mass: u64,
depth: usize,
}
#[derive(Debug, Clone)]
struct HSTree {
nodes: Vec<HSTNode>,
}
impl HSTree {
fn build(
n_features: usize,
max_depth: usize,
rng: &mut SimpleRng,
work_ranges: &[(f64, f64)],
) -> Self {
let capacity = (1 << (max_depth + 1)) - 1; let mut nodes = Vec::with_capacity(capacity.min(4096));
nodes.push(HSTNode {
feature: 0,
split_value: 0.0,
left: 0,
right: 0,
r_mass: 0,
l_mass: 0,
depth: 0,
});
Self::build_recursive(&mut nodes, 0, 0, max_depth, n_features, rng, work_ranges);
HSTree { nodes }
}
fn build_recursive(
nodes: &mut Vec<HSTNode>,
node_idx: usize,
depth: usize,
max_depth: usize,
n_features: usize,
rng: &mut SimpleRng,
work_ranges: &[(f64, f64)],
) {
if depth >= max_depth {
return; }
let feature = rng.next_usize() % n_features;
let (lo, hi) = work_ranges[feature];
let split_value = if (hi - lo).abs() < 1e-15 {
lo
} else {
lo + rng.next_f64() * (hi - lo)
};
let left_idx = nodes.len();
nodes.push(HSTNode {
feature: 0,
split_value: 0.0,
left: 0,
right: 0,
r_mass: 0,
l_mass: 0,
depth: depth + 1,
});
let right_idx = nodes.len();
nodes.push(HSTNode {
feature: 0,
split_value: 0.0,
left: 0,
right: 0,
r_mass: 0,
l_mass: 0,
depth: depth + 1,
});
nodes[node_idx].feature = feature;
nodes[node_idx].split_value = split_value;
nodes[node_idx].left = left_idx;
nodes[node_idx].right = right_idx;
let mut left_ranges: Vec<(f64, f64)> = work_ranges.to_vec();
left_ranges[feature].1 = split_value;
let mut right_ranges: Vec<(f64, f64)> = work_ranges.to_vec();
right_ranges[feature].0 = split_value;
Self::build_recursive(
nodes,
left_idx,
depth + 1,
max_depth,
n_features,
rng,
&left_ranges,
);
Self::build_recursive(
nodes,
right_idx,
depth + 1,
max_depth,
n_features,
rng,
&right_ranges,
);
}
fn update(&mut self, features: &[f64]) {
let mut idx = 0;
loop {
let node = &mut self.nodes[idx];
node.l_mass += 1;
if node.left == 0 && node.right == 0 {
break; }
if features[node.feature] < node.split_value {
idx = node.left;
} else {
idx = node.right;
}
}
}
fn score(&self, features: &[f64], max_depth: usize) -> f64 {
let mut idx = 0;
let mut score = 0.0;
loop {
let node = &self.nodes[idx];
let depth_weight = (1u64 << (max_depth - node.depth)) as f64;
score += node.r_mass as f64 * depth_weight;
if node.left == 0 && node.right == 0 {
break;
}
if features[node.feature] < node.split_value {
idx = node.left;
} else {
idx = node.right;
}
}
score
}
fn rotate(&mut self) {
for node in &mut self.nodes {
node.r_mass = node.l_mass;
node.l_mass = 0;
}
}
}
#[derive(Debug, Clone)]
struct SimpleRng {
state: u64,
}
impl SimpleRng {
fn new(seed: u64) -> Self {
Self {
state: if seed == 0 { 1 } else { seed },
}
}
fn next_u64(&mut self) -> u64 {
let mut x = self.state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.state = x;
x
}
fn next_f64(&mut self) -> f64 {
(self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
}
fn next_usize(&mut self) -> usize {
self.next_u64() as usize
}
}
#[derive(Debug, Clone, Copy)]
pub struct AnomalyScore {
pub score: f64,
pub is_anomaly: bool,
}
#[derive(Debug, Clone)]
pub struct HalfSpaceTree {
config: HSTConfig,
trees: Vec<HSTree>,
samples_seen: u64,
max_score: f64,
}
impl HalfSpaceTree {
pub fn new(config: HSTConfig) -> Self {
let mut rng = SimpleRng::new(config.seed);
let n_features = config.n_features;
let max_depth = config.max_depth;
let work_ranges: Vec<(f64, f64)> = vec![(0.0, 1.0); n_features];
let trees: Vec<HSTree> = (0..config.n_trees)
.map(|_| HSTree::build(n_features, max_depth, &mut rng, &work_ranges))
.collect();
let max_score = {
let ws = config.window_size as f64;
let mut s = 0.0;
for d in 0..=max_depth {
s += ws * (1u64 << (max_depth - d)) as f64;
}
s * config.n_trees as f64
};
Self {
config,
trees,
samples_seen: 0,
max_score,
}
}
pub fn with_ranges(config: HSTConfig, ranges: &[(f64, f64)]) -> Self {
assert_eq!(
ranges.len(),
config.n_features,
"ranges length must match n_features"
);
let mut rng = SimpleRng::new(config.seed);
let max_depth = config.max_depth;
let trees: Vec<HSTree> = (0..config.n_trees)
.map(|_| HSTree::build(config.n_features, max_depth, &mut rng, ranges))
.collect();
let max_score = {
let ws = config.window_size as f64;
let mut s = 0.0;
for d in 0..=max_depth {
s += ws * (1u64 << (max_depth - d)) as f64;
}
s * config.n_trees as f64
};
Self {
config,
trees,
samples_seen: 0,
max_score,
}
}
pub fn update(&mut self, features: &[f64]) {
assert!(
features.len() >= self.config.n_features,
"expected at least {} features, got {}",
self.config.n_features,
features.len()
);
for tree in &mut self.trees {
tree.update(features);
}
self.samples_seen += 1;
if self.samples_seen % self.config.window_size as u64 == 0 {
for tree in &mut self.trees {
tree.rotate();
}
}
}
pub fn score(&self, features: &[f64]) -> AnomalyScore {
let raw: f64 = self
.trees
.iter()
.map(|t| t.score(features, self.config.max_depth))
.sum();
let normalized = if self.max_score > 0.0 {
1.0 - (raw / self.max_score)
} else {
0.0
};
let score = normalized.clamp(0.0, 1.0);
AnomalyScore {
score,
is_anomaly: score > self.config.threshold,
}
}
pub fn score_and_update(&mut self, features: &[f64]) -> AnomalyScore {
let result = self.score(features);
self.update(features);
result
}
pub fn reset(&mut self) {
for tree in &mut self.trees {
for node in &mut tree.nodes {
node.r_mass = 0;
node.l_mass = 0;
}
}
self.samples_seen = 0;
}
pub fn samples_seen(&self) -> u64 {
self.samples_seen
}
pub fn windows_completed(&self) -> u64 {
self.samples_seen / self.config.window_size as u64
}
pub fn threshold(&self) -> f64 {
self.config.threshold
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_construction() {
let config = HSTConfig::new(3).n_trees(10).max_depth(8).seed(123);
let hst = HalfSpaceTree::new(config);
assert_eq!(hst.samples_seen(), 0);
assert_eq!(hst.windows_completed(), 0);
}
#[test]
fn test_update_increments_count() {
let config = HSTConfig::new(2).n_trees(5).window_size(50);
let mut hst = HalfSpaceTree::new(config);
for _ in 0..100 {
hst.update(&[0.5, 0.5]);
}
assert_eq!(hst.samples_seen(), 100);
assert_eq!(hst.windows_completed(), 2);
}
#[test]
fn test_score_range() {
let config = HSTConfig::new(3).n_trees(10).window_size(50).seed(42);
let mut hst = HalfSpaceTree::new(config);
for i in 0..100 {
hst.update(&[0.5 + (i as f64) * 0.001, 0.5, 0.5]);
}
let result = hst.score(&[0.5, 0.5, 0.5]);
assert!(
(0.0..=1.0).contains(&result.score),
"score {} not in [0,1]",
result.score
);
}
#[test]
fn test_anomaly_detection_basic() {
let config = HSTConfig::new(2)
.n_trees(50)
.max_depth(10)
.window_size(200)
.seed(7)
.threshold(0.5);
let mut hst = HalfSpaceTree::new(config);
let mut rng = SimpleRng::new(99);
for _ in 0..400 {
let x = 0.4 + rng.next_f64() * 0.2; let y = 0.4 + rng.next_f64() * 0.2;
hst.update(&[x, y]);
}
let normal = hst.score(&[0.5, 0.5]);
let anomaly = hst.score(&[0.01, 0.99]);
assert!(
anomaly.score > normal.score,
"anomaly score ({:.4}) should exceed normal ({:.4})",
anomaly.score,
normal.score
);
}
#[test]
fn test_score_and_update() {
let config = HSTConfig::new(2).n_trees(5).window_size(50);
let mut hst = HalfSpaceTree::new(config);
for _ in 0..100 {
let result = hst.score_and_update(&[0.5, 0.5]);
assert!((0.0..=1.0).contains(&result.score));
}
assert_eq!(hst.samples_seen(), 100);
}
#[test]
fn test_reset() {
let config = HSTConfig::new(2).n_trees(5).window_size(50);
let mut hst = HalfSpaceTree::new(config);
for _ in 0..100 {
hst.update(&[0.5, 0.5]);
}
hst.reset();
assert_eq!(hst.samples_seen(), 0);
assert_eq!(hst.windows_completed(), 0);
}
#[test]
fn test_with_ranges() {
let config = HSTConfig::new(2).n_trees(10).window_size(50).seed(42);
let ranges = vec![(-10.0, 10.0), (0.0, 100.0)];
let mut hst = HalfSpaceTree::with_ranges(config, &ranges);
for i in 0..100 {
hst.update(&[i as f64 * 0.1 - 5.0, i as f64]);
}
let result = hst.score(&[0.0, 50.0]);
assert!((0.0..=1.0).contains(&result.score));
}
#[test]
fn test_deterministic_with_seed() {
let make = || {
let config = HSTConfig::new(3).n_trees(10).seed(999).window_size(50);
let mut hst = HalfSpaceTree::new(config);
for i in 0..100 {
hst.update(&[i as f64 * 0.01, 0.5, 0.3]);
}
hst.score(&[0.5, 0.5, 0.3]).score
};
let s1 = make();
let s2 = make();
assert!(
(s1 - s2).abs() < 1e-12,
"same seed should produce identical scores"
);
}
#[test]
fn test_window_rotation() {
let config = HSTConfig::new(2).n_trees(3).window_size(10).max_depth(4);
let mut hst = HalfSpaceTree::new(config);
for _ in 0..10 {
hst.update(&[0.5, 0.5]);
}
assert_eq!(hst.windows_completed(), 1);
let normal = hst.score(&[0.5, 0.5]);
let outlier = hst.score(&[0.01, 0.01]);
assert!(
outlier.score >= normal.score,
"outlier ({:.4}) should score >= normal ({:.4})",
outlier.score,
normal.score
);
}
}