pub fn extract_structural_features_raw(
param_count: i64,
complexity: i64,
nesting_depth: i64,
loc: u32,
return_count: i64,
) -> Vec<f64> {
vec![
param_count as f64,
complexity as f64,
nesting_depth as f64,
loc as f64,
return_count as f64,
]
}
pub struct StructuralScorer {
mean: Vec<f64>,
inv_cov: Vec<Vec<f64>>,
dim: usize,
}
impl StructuralScorer {
pub fn from_features(features: &[Vec<f64>]) -> Self {
let n = features.len();
if n == 0 || features[0].is_empty() {
return Self {
mean: vec![],
inv_cov: vec![],
dim: 0,
};
}
let dim = features[0].len();
let mut mean = vec![0.0; dim];
for f in features {
for (i, v) in f.iter().enumerate() {
mean[i] += v;
}
}
for m in &mut mean {
*m /= n as f64;
}
let mut cov = vec![vec![0.0; dim]; dim];
for f in features {
for i in 0..dim {
for j in 0..dim {
cov[i][j] += (f[i] - mean[i]) * (f[j] - mean[j]);
}
}
}
for row in &mut cov {
for v in row.iter_mut() {
*v /= n as f64;
}
}
for i in 0..dim {
cov[i][i] += 1e-6;
}
let inv_cov = invert_matrix(&cov);
Self { mean, inv_cov, dim }
}
pub fn mahalanobis_distance(&self, point: &[f64]) -> f64 {
if self.dim == 0 || point.len() != self.dim {
return 0.0;
}
let diff: Vec<f64> = point.iter().zip(&self.mean).map(|(p, m)| p - m).collect();
let mut result = 0.0;
for i in 0..self.dim {
for j in 0..self.dim {
result += diff[i] * self.inv_cov[i][j] * diff[j];
}
}
result.max(0.0).sqrt()
}
}
fn invert_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = matrix.len();
let mut aug = vec![vec![0.0; 2 * n]; n];
for i in 0..n {
for j in 0..n {
aug[i][j] = matrix[i][j];
}
aug[i][n + i] = 1.0;
}
for col in 0..n {
let mut max_row = col;
for row in (col + 1)..n {
if aug[row][col].abs() > aug[max_row][col].abs() {
max_row = row;
}
}
aug.swap(col, max_row);
let pivot = aug[col][col];
if pivot.abs() < 1e-12 {
return (0..n)
.map(|i| {
let mut row = vec![0.0; n];
row[i] = 1.0;
row
})
.collect();
}
for j in 0..(2 * n) {
aug[col][j] /= pivot;
}
for row in 0..n {
if row != col {
let factor = aug[row][col];
for j in 0..(2 * n) {
aug[row][j] -= factor * aug[col][j];
}
}
}
}
aug.iter().map(|row| row[n..].to_vec()).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mahalanobis_near_mean_is_small() {
let features: Vec<Vec<f64>> = (0..50)
.map(|i| {
let offset = (i as f64) * 0.1 - 2.5;
vec![
3.0 + offset,
5.0 + offset * 0.5,
2.0 + offset * 0.3,
20.0 + offset * 2.0,
1.0 + offset * 0.1,
]
})
.collect();
let scorer = StructuralScorer::from_features(&features);
let near_mean = vec![3.0, 5.0, 2.0, 20.0, 1.0];
let dist = scorer.mahalanobis_distance(&near_mean);
assert!(
dist < 2.0,
"Point near mean should have small Mahalanobis distance, got {dist}"
);
}
#[test]
fn test_outlier_has_high_distance() {
let normal_point = vec![3.0, 5.0, 2.0, 20.0, 1.0];
let mut features: Vec<Vec<f64>> = (0..100).map(|_| normal_point.clone()).collect();
let outlier = vec![30.0, 50.0, 20.0, 200.0, 10.0];
features.push(outlier.clone());
let scorer = StructuralScorer::from_features(&features);
let dist_normal = scorer.mahalanobis_distance(&normal_point);
let dist_outlier = scorer.mahalanobis_distance(&outlier);
assert!(
dist_outlier > dist_normal,
"Outlier distance ({dist_outlier}) should be greater than normal distance ({dist_normal})"
);
assert!(
dist_outlier > 5.0,
"Outlier should have a clearly high distance, got {dist_outlier}"
);
}
#[test]
fn test_feature_extraction() {
let features = extract_structural_features_raw(3, 10, 4, 150, 2);
assert_eq!(features.len(), 5);
assert!((features[0] - 3.0).abs() < f64::EPSILON);
assert!((features[1] - 10.0).abs() < f64::EPSILON);
assert!((features[2] - 4.0).abs() < f64::EPSILON);
assert!((features[3] - 150.0).abs() < f64::EPSILON);
assert!((features[4] - 2.0).abs() < f64::EPSILON);
}
#[test]
fn test_empty_features() {
let scorer = StructuralScorer::from_features(&[]);
assert_eq!(scorer.dim, 0);
assert_eq!(scorer.mahalanobis_distance(&[1.0, 2.0]), 0.0);
let scorer = StructuralScorer::from_features(&[vec![]]);
assert_eq!(scorer.dim, 0);
assert_eq!(scorer.mahalanobis_distance(&[]), 0.0);
let features = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let scorer = StructuralScorer::from_features(&features);
assert_eq!(scorer.mahalanobis_distance(&[1.0, 2.0, 3.0]), 0.0);
}
#[test]
fn test_invert_identity() {
let identity = vec![
vec![1.0, 0.0, 0.0],
vec![0.0, 1.0, 0.0],
vec![0.0, 0.0, 1.0],
];
let inv = invert_matrix(&identity);
for i in 0..3 {
for j in 0..3 {
let expected = if i == j { 1.0 } else { 0.0 };
assert!(
(inv[i][j] - expected).abs() < 1e-10,
"inv[{i}][{j}] = {}, expected {expected}",
inv[i][j]
);
}
}
}
#[test]
fn test_invert_known_matrix() {
let matrix = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
let inv = invert_matrix(&matrix);
assert!((inv[0][0] - 0.6).abs() < 1e-10, "inv[0][0] = {}", inv[0][0]);
assert!(
(inv[0][1] - (-0.2)).abs() < 1e-10,
"inv[0][1] = {}",
inv[0][1]
);
assert!(
(inv[1][0] - (-0.2)).abs() < 1e-10,
"inv[1][0] = {}",
inv[1][0]
);
assert!((inv[1][1] - 0.4).abs() < 1e-10, "inv[1][1] = {}", inv[1][1]);
}
#[test]
fn test_mahalanobis_with_correlated_features() {
let features: Vec<Vec<f64>> = (0..100)
.map(|i| {
let x = (i as f64) * 0.1;
vec![x, x * 2.0] })
.collect();
let scorer = StructuralScorer::from_features(&features);
let along = vec![15.0, 30.0]; let breaking = vec![5.0, 30.0];
let dist_along = scorer.mahalanobis_distance(&along);
let dist_breaking = scorer.mahalanobis_distance(&breaking);
assert!(
dist_breaking > dist_along,
"Breaking correlation ({dist_breaking}) should give higher distance than following it ({dist_along})"
);
}
#[test]
fn test_distance_at_mean_is_zero() {
let features = vec![
vec![1.0, 2.0, 3.0],
vec![3.0, 4.0, 5.0],
vec![5.0, 6.0, 7.0],
];
let scorer = StructuralScorer::from_features(&features);
let dist = scorer.mahalanobis_distance(&[3.0, 4.0, 5.0]);
assert!(dist < 1e-6, "Distance at the mean should be ~0, got {dist}");
}
}