use crate::builder::ArrowSpaceBuilder;
use crate::clustering::{ClusteredOutput, ClusteringHeuristic};
use crate::core::ArrowSpace;
use crate::graph::GraphLaplacian;
use crate::maps::eigenmaps::EigenMaps;
use crate::search::taumode::TauMode;
use crate::tests::GAUSSIAN_DATA;
use crate::tests::init;
use approx::{relative_eq, relative_ne};
use log::{debug, info};
fn assert_lambdas_equal(a: &[f64], b: &[f64], tol: f64, label: &str, spectral: bool) {
init();
assert_eq!(a.len(), b.len(), "{}: lambda vector length mismatch", label);
for (i, (&la, &lb)) in a.iter().zip(b.iter()).enumerate() {
if !spectral {
assert!(
la >= 0.0 && lb >= 0.0,
"lambda_a is {} and lambda_b {}",
la,
lb
);
}
if relative_ne!(la, lb, epsilon = tol, max_relative = tol) {
debug!("lambdas are not equal: {} {}", la, lb);
panic!("{}[{}]: lambda mismatch", label, i)
} else {
};
}
}
fn assert_clustering_equal(a: &ArrowSpace, b: &ArrowSpace, label: &str) {
init();
assert_eq!(a.n_clusters, b.n_clusters, "{}: n_clusters mismatch", label);
assert_eq!(
a.cluster_assignments, b.cluster_assignments,
"{}: cluster_assignments mismatch",
label
);
assert_eq!(
a.cluster_sizes, b.cluster_sizes,
"{}: cluster_sizes mismatch",
label
);
if relative_ne!(a.cluster_radius, b.cluster_radius, epsilon = 1e-9) {
panic!("{}: cluster_radius mismatch", label)
} else {
};
}
fn assert_laplacian_equal(a: &GraphLaplacian, b: &GraphLaplacian, label: &str) {
init();
assert_eq!(a.shape(), b.shape(), "{}: Laplacian shape mismatch", label);
assert_eq!(a.nnodes, b.nnodes, "{}: nnodes mismatch", label);
assert_eq!(a.nnz(), b.nnz(), "{}: nnz mismatch", label);
if relative_ne!(
GraphLaplacian::sparsity(&a.matrix),
GraphLaplacian::sparsity(&b.matrix),
epsilon = 1e-9
) {
panic!("{}: sparsity mismatch", label)
} else {
};
}
fn assert_search_results_equal(a: &[(usize, f64)], b: &[(usize, f64)], tol: f64, label: &str) {
assert_eq!(a.len(), b.len(), "{}: result count mismatch", label);
for (i, ((idx_a, score_a), (idx_b, score_b))) in a.iter().zip(b.iter()).enumerate() {
assert_eq!(idx_a, idx_b, "{}[{}]: index mismatch", label, i);
if relative_ne!(score_a, score_b, epsilon = tol) {
panic!("{}[{}]: score mismatch", label, i)
} else {
};
}
}
#[test]
#[should_panic]
fn test_eigenmaps_non_sensical_undecidable_query() {
crate::init(); info!("Test: EigenMaps trait vs build() - basic dataset");
let rows = &*GAUSSIAN_DATA;
let query = vec![0.5; 100];
let mut builder = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_seed(12345) .with_inline_sampling(None)
.with_dims_reduction(false, None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
..
} = ArrowSpaceBuilder::start_clustering(&mut builder, rows.clone());
let gl = aspace.eigenmaps(&builder, ¢roids, n_items);
aspace.compute_taumode(&gl);
let _ = aspace.prepare_query_item(&query, &gl);
}
#[test]
fn test_eigenmaps_vs_build_basic() {
crate::init(); info!("Test: EigenMaps trait vs build() - basic dataset");
let rows = &*GAUSSIAN_DATA;
let query_idx = 10;
let query = &rows[query_idx];
let k = 5;
let tau = 0.7;
let builder_control = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_seed(12345) .with_inline_sampling(None) .with_dims_reduction(false, None);
let (aspace_control, gl_control) = builder_control.build(rows.clone());
let mut builder_exp = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_seed(12345) .with_inline_sampling(None)
.with_dims_reduction(false, None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
..
} = ArrowSpaceBuilder::start_clustering(&mut builder_exp, rows.clone());
let gl_exp = aspace.eigenmaps(&builder_exp, ¢roids, n_items);
aspace.compute_taumode(&gl_exp);
assert_clustering_equal(&aspace_control, &aspace, "basic");
assert_laplacian_equal(&gl_control, &gl_exp, "basic");
assert_lambdas_equal(
aspace_control.lambdas(),
aspace.lambdas(),
1e-6,
"basic lambdas",
false,
);
let query_lambda = aspace_control.prepare_query_item(&query, &gl_control);
let results_control = aspace_control.search_lambda_aware(
&crate::core::ArrowItem::new(query, query_lambda),
k,
tau,
);
let results_exp = aspace.search(&query, &gl_exp, k, tau);
assert_search_results_equal(&results_control, &results_exp, 1e-6, "basic search");
info!("✓ EigenMaps trait matches build() for basic dataset");
}
#[test]
fn test_eigenmaps_vs_build_with_spectral() {
crate::init();
info!("Test: EigenMaps trait vs build() - with spectral Laplacian");
let rows = &*GAUSSIAN_DATA;
let query = &rows[10];
let k = 4;
let tau = 0.6;
let builder_control = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_spectral(true) .with_seed(5555)
.with_dims_reduction(false, None)
.with_inline_sampling(None);
let (aspace_control, gl_control) = builder_control.build(rows.clone());
let mut builder_exp = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_spectral(true)
.with_seed(5555)
.with_dims_reduction(false, None)
.with_inline_sampling(None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
..
} = ArrowSpaceBuilder::start_clustering(&mut builder_exp, rows.clone());
let gl_exp = aspace.eigenmaps(&builder_exp, ¢roids, n_items);
aspace.compute_taumode(&gl_exp);
assert_eq!(
aspace_control.signals.shape(),
aspace.signals.shape(),
"signals shape mismatch"
);
assert_eq!(
aspace_control.signals.nnz(),
aspace.signals.nnz(),
"signals nnz mismatch"
);
assert_clustering_equal(&aspace_control, &aspace, "spectral");
assert_laplacian_equal(&gl_control, &gl_exp, "spectral");
assert_lambdas_equal(
aspace_control.lambdas(),
aspace.lambdas(),
1e-6,
"spectral lambdas",
true,
);
let results_control = aspace_control.search_lambda_aware(
&crate::core::ArrowItem::new(
query,
aspace_control.prepare_query_item(&query, &gl_control),
),
k,
tau,
);
let results_exp = aspace.search(&query, &gl_exp, k, tau);
assert_search_results_equal(&results_control, &results_exp, 1e-6, "spectral search");
info!("✓ EigenMaps trait matches build() with spectral Laplacian");
}
#[test]
fn test_eigenmaps_vs_build_different_taumode() {
crate::init();
info!("Test: EigenMaps trait vs build() - Mean taumode");
let rows = &*GAUSSIAN_DATA;
let query_idx = 10;
let query = &rows[query_idx];
let k = 6;
let tau = 0.75;
let builder_control = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Mean) .with_seed(7777)
.with_inline_sampling(None);
let (aspace_control, gl_control) = builder_control.build(rows.clone());
let mut builder_exp = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_synthesis(TauMode::Mean)
.with_seed(7777)
.with_inline_sampling(None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
..
} = ArrowSpaceBuilder::start_clustering(&mut builder_exp, rows.clone());
let gl_exp = aspace.eigenmaps(&builder_exp, ¢roids, n_items);
aspace.compute_taumode(&gl_exp);
assert_clustering_equal(&aspace_control, &aspace, "mean_taumode");
assert_laplacian_equal(&gl_control, &gl_exp, "mean_taumode");
assert_lambdas_equal(
aspace_control.lambdas(),
aspace.lambdas(),
1e-6,
"mean_taumode lambdas",
false,
);
let results_control = aspace_control.search_lambda_aware(
&crate::core::ArrowItem::new(
query,
aspace_control.prepare_query_item(&query, &gl_control),
),
k,
tau,
);
let results_exp = aspace.search(&query, &gl_exp, k, tau);
assert_search_results_equal(&results_control, &results_exp, 1e-10, "mean_taumode search");
info!("✓ EigenMaps trait matches build() with Mean taumode");
}
#[test]
#[should_panic]
fn test_search_without_taumode_panics() {
crate::init();
info!("Test: Search without compute_taumode should panic in debug");
let rows = &*GAUSSIAN_DATA;
let query = vec![0.0; 6];
let mut builder = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_seed(111)
.with_inline_sampling(None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
..
} = ArrowSpaceBuilder::start_clustering(&mut builder, rows.clone());
let gl = aspace.eigenmaps(&builder, ¢roids, n_items);
let _ = aspace.search(&query, &gl, 3, 0.7);
}
#[test]
#[ignore = "run for too long"]
fn test_eigenmaps_stages_produce_valid_state() {
crate::init();
info!("Test: EigenMaps stages produce valid intermediate state");
let rows = &*GAUSSIAN_DATA;
let mut builder = ArrowSpaceBuilder::new()
.with_lambda_graph(1.0, 3, 3, 2.0, None)
.with_seed(222)
.with_inline_sampling(None);
let ClusteredOutput {
mut aspace,
centroids,
n_items,
n_features,
reduced_dim,
} = ArrowSpaceBuilder::start_clustering(&mut builder, rows.clone());
assert_eq!(n_items, 99, "n_items should match input");
assert_eq!(n_features, 100, "n_features should match input");
assert!(aspace.n_clusters > 0, "Should have clustered");
assert_eq!(reduced_dim, n_features, "No projection, dims unchanged");
let gl = aspace.eigenmaps(&builder, ¢roids, n_items);
assert_eq!(gl.nnodes, n_items, "Laplacian nodes should match items");
assert!(gl.nnz() > 0, "Laplacian should have edges");
assert!(
GraphLaplacian::sparsity(&gl.matrix) < 1.0,
"Should not be fully sparse"
);
let mut aspace = aspace; assert!(
aspace.lambdas().iter().all(|&l| l == 0.0),
"Lambdas should be zero before compute_taumode"
);
aspace.compute_taumode(&gl);
assert!(
aspace.lambdas().iter().any(|&l| l > 0.0),
"Lambdas should be nonzero after compute_taumode"
);
info!("✓ All stages produce valid intermediate state");
}
#[test]
fn test_eigenmaps_with_dim_reduction() {
crate::init();
let rows = &*GAUSSIAN_DATA;
let query_idx = 10;
let query = &rows[query_idx];
let k = 5;
let builder_control = ArrowSpaceBuilder::new()
.with_lambda_graph(0.5, 3, 3, 2.0, None)
.with_synthesis(TauMode::Median)
.with_seed(12345) .with_inline_sampling(None) .with_dims_reduction(true, Some(0.1));
let (aspace, gl) = builder_control.build(rows.clone());
assert_eq!(
aspace.lambdas()[query_idx],
aspace.prepare_query_item(query, &gl),
"original query lambda and prepared query lambda should be equal"
);
let results = aspace.search(query, &gl, k, 0.62);
assert!(
results
.iter()
.any(|(i, r)| *i == query_idx && relative_eq!(*r, 1.0, epsilon = 1e-10))
);
}