use crate::analysis::finite::ensure_finite_2d;
use crate::errors::AnalysisError;
use ndarray::{Array2, Axis};
pub fn isotropy_score(embeddings: &Array2<f32>) -> Result<f32, AnalysisError> {
let n = embeddings.shape()[0];
if n < 2 {
return Err(AnalysisError::InsufficientData(format!(
"Isotropy requires at least 2 embeddings, got {n}"
)));
}
ensure_finite_2d(embeddings, "embeddings for isotropy")?;
let norms: Vec<f32> = embeddings
.rows()
.into_iter()
.map(|row| row.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-10))
.collect();
let mut total_sim = 0.0_f64;
let mut pair_count = 0_u64;
for i in 0..n {
let row_i = embeddings.row(i);
for j in (i + 1)..n {
let row_j = embeddings.row(j);
let dot: f32 = row_i.iter().zip(row_j.iter()).map(|(a, b)| a * b).sum();
let cosine = (dot / (norms[i] * norms[j])).clamp(-1.0, 1.0);
total_sim += cosine as f64;
pair_count += 1;
}
}
let avg_cosine = if pair_count > 0 {
(total_sim / pair_count as f64) as f32
} else {
0.0
};
Ok((1.0 - avg_cosine).clamp(0.0, 1.0))
}
pub fn partition_isotropy(embeddings: &Array2<f32>) -> Result<f32, AnalysisError> {
let n = embeddings.shape()[0];
let d = embeddings.shape()[1];
if n < 2 {
return Err(AnalysisError::InsufficientData(format!(
"Partition isotropy requires at least 2 embeddings, got {n}"
)));
}
ensure_finite_2d(embeddings, "embeddings for partition isotropy")?;
let mean = embeddings
.mean_axis(Axis(0))
.ok_or_else(|| AnalysisError::EmptyInput("isotropy input cannot be empty".into()))?;
let mut centered = embeddings.to_owned();
for mut row in centered.rows_mut() {
row -= &mean;
}
let k = d.min(n - 1).min(32); let eigenvalues = top_eigenvalues(¢ered, k, 100)?;
let positive_eigenvalues: Vec<f32> = eigenvalues.into_iter().filter(|&e| e > 1e-10).collect();
if positive_eigenvalues.is_empty() {
return Ok(0.0);
}
let log_eigenvalues: Vec<f32> = positive_eigenvalues.iter().map(|e| e.ln()).collect();
let mean_log = log_eigenvalues.iter().sum::<f32>() / log_eigenvalues.len() as f32;
let variance_log = log_eigenvalues
.iter()
.map(|l| (l - mean_log).powi(2))
.sum::<f32>()
/ log_eigenvalues.len() as f32;
Ok((-variance_log).exp().clamp(0.0, 1.0))
}
fn top_eigenvalues(
centered: &Array2<f32>,
k: usize,
max_iter: usize,
) -> Result<Vec<f32>, AnalysisError> {
let (_n, d) = (centered.shape()[0], centered.shape()[1]);
let n_minus_1 = (centered.shape()[0] - 1).max(1) as f32;
let mut deflated = centered.to_owned();
let mut eigenvalues = Vec::with_capacity(k);
for i in 0..k {
let mut v = ndarray::Array1::<f32>::zeros(d);
v[i % d] = 1.0;
for _ in 0..max_iter {
let u = deflated.dot(&v);
let v_new = deflated.t().dot(&u);
let lambda = v_new.iter().map(|x| x * x).sum::<f32>().sqrt();
if lambda < 1e-12 {
break;
}
let next = v_new / lambda;
let converged = (1.0 - next.dot(&v).abs()) < 1e-6;
v = next;
if converged {
break;
}
}
let u = deflated.dot(&v);
let eigenvalue = u.dot(&u) / n_minus_1;
eigenvalues.push(eigenvalue);
let projection = deflated.dot(&v);
for (row_idx, mut row) in deflated.rows_mut().into_iter().enumerate() {
let scale = projection[row_idx];
for (value, comp) in row.iter_mut().zip(v.iter()) {
*value -= comp * scale;
}
}
}
Ok(eigenvalues)
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::Array2;
#[test]
fn isotropy_of_identical_vectors_is_near_zero() {
let data = Array2::from_shape_fn((50, 16), |(_i, j)| (j + 1) as f32);
let score = isotropy_score(&data).unwrap();
assert!(score < 0.05, "Expected low isotropy, got {score}");
}
#[test]
fn isotropy_of_orthogonal_basis_is_high() {
let mut data = Array2::zeros((16, 16));
for i in 0..16 {
data[[i, i]] = 1.0;
}
let score = isotropy_score(&data).unwrap();
assert!(score > 0.9, "Expected high isotropy, got {score}");
}
#[test]
fn isotropy_requires_two_samples() {
let data = Array2::from_elem((1, 8), 1.0_f32);
assert!(isotropy_score(&data).is_err());
}
#[test]
fn isotropy_rejects_non_finite() {
let mut data = Array2::from_elem((4, 4), 1.0_f32);
data[[1, 2]] = f32::NAN;
assert!(isotropy_score(&data).is_err());
}
#[test]
fn partition_isotropy_of_uniform_eigenvalues_is_high() {
let data = Array2::from_shape_fn((100, 8), |(i, j)| ((i * 7 + j * 13) % 37) as f32 / 37.0);
let score = partition_isotropy(&data).unwrap();
assert!(
score > 0.3,
"Expected moderate-to-high partition isotropy, got {score}"
);
}
#[test]
fn partition_isotropy_of_rank_one_data_is_low() {
let data = Array2::from_shape_fn((50, 16), |(i, _j)| i as f32);
let score = partition_isotropy(&data).unwrap();
assert!(score < 0.3, "Expected low partition isotropy, got {score}");
}
}