#![cfg(feature = "hnsw")]
#![allow(clippy::unwrap_used, clippy::expect_used)]
use proptest::prelude::*;
use vicinity::lid::{
aggregate_lid, estimate_lid_mle, estimate_twonn, estimate_twonn_with_ci, LidAggregation,
LidCategory, LidConfig, LidEstimate, LidStats,
};
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn lid_positive_for_valid_distances(
base in 0.01f32..0.1,
increments in prop::collection::vec(0.01f32..0.5, 19),
) {
let mut distances = vec![base];
let mut cumsum = base;
for inc in increments {
cumsum += inc;
distances.push(cumsum);
}
let config = LidConfig::default();
let estimate = estimate_lid_mle(&distances, &config);
prop_assert!(estimate.lid > 0.0 || estimate.lid.is_infinite(),
"LID should be positive, got {}", estimate.lid);
}
#[test]
fn lid_scale_invariant(
base in 0.01f32..0.1,
increments in prop::collection::vec(0.01f32..0.5, 19),
scale in 0.1f32..10.0,
) {
let mut distances = vec![base];
let mut cumsum = base;
for inc in &increments {
cumsum += inc;
distances.push(cumsum);
}
let scaled: Vec<f32> = distances.iter().map(|d| d * scale).collect();
let config = LidConfig::default();
let est1 = estimate_lid_mle(&distances, &config);
let est2 = estimate_lid_mle(&scaled, &config);
if est1.lid.is_finite() && est2.lid.is_finite() {
let relative_diff = (est1.lid - est2.lid).abs() / est1.lid.max(1.0);
prop_assert!(relative_diff < 0.3,
"LID not scale invariant: {} vs {} (scale={})",
est1.lid, est2.lid, scale);
}
}
#[test]
fn lid_respects_k(
distances in prop::collection::vec(0.1f32..10.0, 30..50),
k in 5usize..25,
) {
let mut sorted = distances.clone();
sorted.sort_by(|a, b| a.total_cmp(b));
let config = LidConfig { k, epsilon: 1e-10 };
let estimate = estimate_lid_mle(&sorted, &config);
prop_assert_eq!(estimate.k, k.min(sorted.len()),
"k should be min of config.k and distances.len()");
}
#[test]
fn lid_stats_valid(
lids in prop::collection::vec(1.0f32..50.0, 10..30),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let stats = LidStats::from_estimates(&estimates);
prop_assert_eq!(stats.count, estimates.len());
prop_assert!(stats.min <= stats.mean);
prop_assert!(stats.mean <= stats.max);
prop_assert!(stats.std_dev >= 0.0);
}
#[test]
fn high_lid_threshold_above_median(
lids in prop::collection::vec(1.0f32..50.0, 5..20),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let stats = LidStats::from_estimates(&estimates);
if stats.std_dev > 0.0 {
prop_assert!(stats.high_lid_threshold() > stats.median,
"threshold {} should be > median {}",
stats.high_lid_threshold(), stats.median);
}
}
#[test]
fn lid_categorization_consistent(
lids in prop::collection::vec(1.0f32..100.0, 20..50),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let stats = LidStats::from_estimates(&estimates);
for &lid in &lids {
let category = stats.categorize(lid);
match category {
LidCategory::Low => {
prop_assert!(lid < stats.median - stats.std_dev + 1e-5,
"Low category but lid {} >= median - std = {}",
lid, stats.median - stats.std_dev);
}
LidCategory::High => {
prop_assert!(lid > stats.median + stats.std_dev - 1e-5,
"High category but lid {} <= median + std = {}",
lid, stats.median + stats.std_dev);
}
LidCategory::Normal => {}
}
}
}
#[test]
fn lid_uniform_distances_finite(
n in 10usize..50,
step in 0.01f32..0.5,
) {
let distances: Vec<f32> = (0..n).map(|i| (i + 1) as f32 * step).collect();
let config = LidConfig::default();
let estimate = estimate_lid_mle(&distances, &config);
prop_assert!(estimate.lid.is_finite(),
"LID should be finite for uniform distances, got {}", estimate.lid);
prop_assert!(estimate.lid > 0.0,
"LID should be positive, got {}", estimate.lid);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn twonn_positive_dimension(
n in 20usize..100,
seed in any::<u64>(),
) {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
let h = hasher.finish();
let ratios: Vec<f32> = (0..n)
.map(|i| {
let x = ((h.wrapping_mul(i as u64 + 1)) % 10000) as f32 / 10000.0;
1.0 + x * 3.0
})
.collect();
let dim = estimate_twonn(&ratios, 0.1);
if dim.is_finite() {
prop_assert!(dim > 0.0, "TwoNN dimension should be positive, got {}", dim);
}
}
#[test]
fn twonn_ci_ordering(
n in 50usize..200,
seed in any::<u64>(),
) {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
let h = hasher.finish();
let ratios: Vec<f32> = (0..n)
.map(|i| {
let x = ((h.wrapping_mul(i as u64 + 1)) % 10000) as f32 / 10000.0;
1.0 + x * 2.0
})
.collect();
let result = estimate_twonn_with_ci(&ratios, 0.1);
if result.dimension.is_finite() && result.ci_lower.is_finite() && result.ci_upper.is_finite() {
prop_assert!(
result.ci_lower <= result.dimension,
"CI lower {} should be <= dimension {}",
result.ci_lower, result.dimension
);
prop_assert!(
result.dimension <= result.ci_upper,
"dimension {} should be <= CI upper {}",
result.dimension, result.ci_upper
);
}
}
#[test]
fn twonn_consistency(
n in 50usize..200,
seed in any::<u64>(),
) {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
let h = hasher.finish();
let ratios: Vec<f32> = (0..n)
.map(|i| {
let x = ((h.wrapping_mul(i as u64 + 1)) % 10000) as f32 / 10000.0;
1.0 + x * 2.0
})
.collect();
let dim1 = estimate_twonn(&ratios, 0.1);
let result2 = estimate_twonn_with_ci(&ratios, 0.1);
if dim1.is_finite() && result2.dimension.is_finite() {
let rel_diff = (dim1 - result2.dimension).abs() / dim1.max(result2.dimension);
prop_assert!(
rel_diff < 0.5,
"TwoNN methods disagree too much: {} vs {} (rel diff {})",
dim1, result2.dimension, rel_diff
);
}
}
#[test]
fn twonn_discard_sensitivity(
n in 100usize..300,
seed in any::<u64>(),
) {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
let h = hasher.finish();
let ratios: Vec<f32> = (0..n)
.map(|i| {
let x = ((h.wrapping_mul(i as u64 + 1)) % 10000) as f32 / 10000.0;
if i < n - 5 {
1.0 + x * 2.0
} else {
5.0 + x * 10.0
}
})
.collect();
let dim_no_discard = estimate_twonn(&ratios, 0.0);
let dim_with_discard = estimate_twonn(&ratios, 0.1);
if dim_no_discard.is_finite() && dim_with_discard.is_finite() {
prop_assert!(
dim_no_discard > 0.0 || dim_with_discard > 0.0,
"At least one estimate should be positive"
);
}
}
#[test]
fn twonn_empty_returns_nan(
n in 0usize..2,
) {
let ratios: Vec<f32> = (0..n).map(|i| 1.0 + i as f32 * 0.5).collect();
let dim = estimate_twonn(&ratios, 0.1);
let result = estimate_twonn_with_ci(&ratios, 0.1);
if n < 2 {
prop_assert!(dim.is_nan(), "Should return NaN for n={}", n);
prop_assert!(result.dimension.is_nan(), "CI version should return NaN for n={}", n);
}
}
#[test]
fn twonn_handles_equidistant(
n in 20usize..50,
frac_equidistant in 0.0f32..0.5,
) {
let n_equidistant = (n as f32 * frac_equidistant) as usize;
let ratios: Vec<f32> = (0..n)
.map(|i| {
if i < n_equidistant {
1.0
} else {
1.1 + (i as f32 * 0.1) % 2.0
}
})
.collect();
let dim = estimate_twonn(&ratios, 0.1);
if n - n_equidistant >= 3 {
prop_assert!(dim.is_finite() && dim > 0.0,
"Expected positive finite LID estimate, got {} (n={}, equidistant={})",
dim, n, n_equidistant);
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn harmonic_le_arithmetic(
lids in prop::collection::vec(0.5f32..50.0, 5..30),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let mean = aggregate_lid(&estimates, LidAggregation::Mean);
let harmonic = aggregate_lid(&estimates, LidAggregation::HarmonicMean);
prop_assert!(
harmonic <= mean + 1e-6,
"Harmonic mean {} should be <= arithmetic mean {}",
harmonic, mean
);
}
#[test]
fn median_bounded(
lids in prop::collection::vec(1.0f32..100.0, 3..50),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let median = aggregate_lid(&estimates, LidAggregation::Median);
let min_lid = lids.iter().cloned().reduce(f32::min).unwrap();
let max_lid = lids.iter().cloned().reduce(f32::max).unwrap();
prop_assert!(
median >= min_lid - 1e-6 && median <= max_lid + 1e-6,
"Median {} should be in [{}, {}]",
median, min_lid, max_lid
);
}
#[test]
fn aggregation_finite(
lids in prop::collection::vec(1.0f32..50.0, 5..20),
) {
let estimates: Vec<LidEstimate> = lids.iter()
.map(|&lid| LidEstimate { lid, k: 20, max_dist: 1.0 })
.collect();
let mean = aggregate_lid(&estimates, LidAggregation::Mean);
let median = aggregate_lid(&estimates, LidAggregation::Median);
let harmonic = aggregate_lid(&estimates, LidAggregation::HarmonicMean);
prop_assert!(mean.is_finite(), "Mean should be finite");
prop_assert!(median.is_finite(), "Median should be finite");
prop_assert!(harmonic.is_finite(), "Harmonic mean should be finite");
}
}