use crate::analysis::motives::MotiveConfig;
use crate::analysis::subgraphs::{Subgraph, SubgraphConfig, SubgraphsMotive};
use crate::builder::ArrowSpaceBuilder;
use crate::tests::test_data::make_gaussian_cliques_multi;
use log::debug;
use smartcore::linalg::basic::arrays::Array;
#[test]
fn test_subgraph_from_parent() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let (_aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.4, 10, 6, 2.0, None)
.with_seed(999)
.build(rows);
let nodes = vec![0, 1, 2];
let sg = Subgraph::from_parent(&gl, &nodes, None);
assert_eq!(sg.node_indices, nodes);
let (f_dim, x_sg) = sg.laplacian.init_data.shape();
assert_eq!(x_sg, nodes.len());
assert_eq!(sg.laplacian.nnodes, nodes.len());
let (mf_rows, mf_cols) = sg.laplacian.matrix.shape();
assert_eq!(mf_rows, f_dim);
assert_eq!(mf_cols, f_dim);
assert!(sg.laplacian.matrix.nnz() > 0, "Subgraph should have edges");
}
#[test]
fn test_subgraph_rayleigh_computation() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let (_aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.4, 10, 6, 2.0, None)
.with_seed(999)
.build(rows);
let nodes = vec![0, 1, 2];
let mut sg = Subgraph::from_parent(&gl, &nodes, None);
assert!(sg.rayleigh.is_none(), "Rayleigh should be None initially");
sg.compute_rayleigh();
assert!(sg.rayleigh.is_some(), "Rayleigh should be computed");
assert!(
sg.rayleigh.unwrap().is_finite(),
"Rayleigh should be finite"
);
}
#[test]
fn test_spot_subgraphs_energy_basic() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let (aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.4, 12, 8, 2.0, None)
.with_seed(999)
.build(rows);
let cfg = SubgraphConfig {
motives: MotiveConfig {
top_l: 18,
min_triangles: 3,
min_clust: 0.35,
max_motif_size: 30,
max_sets: 60,
jaccard_dedup: 0.65,
},
rayleigh_max: Some(0.4),
min_size: 5,
};
let subgraphs = gl.spot_subg_motives(&aspace, &cfg);
if subgraphs.is_empty() {
debug!("No subgraphs extracted (may need different params)");
return;
}
assert!(
!subgraphs.is_empty(),
"Should extract at least one subgraph from clique data"
);
for sg in &subgraphs {
let (f_dim, x_motif) = sg.laplacian.init_data.shape();
assert_eq!(
sg.laplacian.nnodes, x_motif,
"nnodes must equal number of motif centroids (columns in init_data)"
);
assert_eq!(
sg.node_indices.len(),
x_motif,
"node_indices length must match nnodes"
);
let (mf_rows, mf_cols) = sg.laplacian.matrix.shape();
assert_eq!(mf_rows, f_dim);
assert_eq!(mf_cols, f_dim);
assert!(sg.laplacian.matrix.nnz() > 0, "Subgraph should have edges");
assert!(
sg.item_indices.is_some(),
"Energy subgraphs must have item_indices"
);
if let Some(r) = sg.rayleigh {
assert!(r <= 0.4, "Rayleigh filter should be applied");
}
}
debug!("Extracted {} subgraphs from energy mode", subgraphs.len());
}
#[test]
fn test_spot_subgraphs_energy_with_item_mapping() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let builder = ArrowSpaceBuilder::new()
.with_lambda_graph(0.4, 12, 8, 2.0, None)
.with_seed(999);
let (aspace, gl) = builder.build(rows);
let cfg = SubgraphConfig {
motives: MotiveConfig {
top_l: 20,
min_triangles: 3,
min_clust: 0.35,
max_motif_size: 35,
max_sets: 70,
jaccard_dedup: 0.6,
},
rayleigh_max: None,
min_size: 5,
};
let subgraphs = gl.spot_subg_motives(&aspace, &cfg);
if subgraphs.is_empty() {
debug!("No energy subgraphs extracted");
return;
}
let (f_parent, n_parent) = gl.init_data.shape();
for sg in &subgraphs {
let (f_sg, x_centroid) = sg.laplacian.init_data.shape();
assert_eq!(f_sg, f_parent);
assert_eq!(sg.laplacian.nnodes, x_centroid);
let (mf_rows, mf_cols) = sg.laplacian.matrix.shape();
assert_eq!(mf_rows, f_parent);
assert_eq!(mf_cols, f_parent);
assert!(
sg.item_indices.is_some(),
"Energy subgraphs must have item_indices"
);
for &node_idx in &sg.node_indices {
assert!(
node_idx < n_parent,
"centroid index {} out of range",
node_idx
);
}
if let Some(ref items) = sg.item_indices {
for &item_idx in items {
assert!(
item_idx < aspace.nitems,
"item index {} out of range",
item_idx
);
}
}
}
debug!(
"Extracted {} subgraphs from energy mode with item mappings",
subgraphs.len()
);
}
#[test]
fn test_spot_subgraphs_energy_multi_motifs() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let (aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.35, 14, 10, 2.0, None)
.with_seed(999)
.build(rows);
let cfg = SubgraphConfig {
motives: MotiveConfig {
top_l: 22,
min_triangles: 3,
min_clust: 0.3,
max_motif_size: 40,
max_sets: 80,
jaccard_dedup: 0.55,
},
rayleigh_max: None,
min_size: 6,
};
let subgraphs = gl.spot_subg_motives(&aspace, &cfg);
if subgraphs.len() < 2 {
debug!(
"Extracted only {} subgraph(s); clique structure may yield fewer distinct motifs",
subgraphs.len()
);
}
if !subgraphs.is_empty() {
let mut centroid_appearances: std::collections::HashMap<usize, usize> =
std::collections::HashMap::new();
for sg in &subgraphs {
for &node_idx in &sg.node_indices {
*centroid_appearances.entry(node_idx).or_insert(0) += 1;
}
}
let overlapping_centroids: Vec<_> = centroid_appearances
.iter()
.filter(|(_, count)| **count > 1)
.collect();
if !overlapping_centroids.is_empty() {
debug!(
"Found {} centroids appearing in multiple motifs (overlapping structure)",
overlapping_centroids.len()
);
}
let (f_parent, n_parent) = gl.init_data.shape();
for sg in &subgraphs {
let (f_sg, x_sg) = sg.laplacian.init_data.shape();
assert_eq!(f_sg, f_parent);
assert_eq!(sg.laplacian.nnodes, x_sg);
assert_eq!(sg.node_indices.len(), x_sg);
for &node_idx in &sg.node_indices {
assert!(node_idx < n_parent);
}
assert!(sg.item_indices.is_some());
}
debug!(
"Extracted {} energy subgraphs with multi-motif validation",
subgraphs.len()
);
}
}
#[test]
fn test_subgraph_energy_rayleigh_filter() {
crate::init();
let rows = make_gaussian_cliques_multi(300, 0.2, 10, 100, 999);
let (aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.4, 12, 8, 2.0, None)
.with_seed(999)
.build(rows);
let cfg_strict = SubgraphConfig {
motives: MotiveConfig {
top_l: 18,
min_triangles: 3,
min_clust: 0.35,
max_motif_size: 30,
max_sets: 60,
jaccard_dedup: 0.65,
},
rayleigh_max: Some(0.15), min_size: 5,
};
let subgraphs_strict = gl.spot_subg_motives(&aspace, &cfg_strict);
let cfg_relaxed = SubgraphConfig {
rayleigh_max: Some(0.5), ..cfg_strict
};
let subgraphs_relaxed = gl.spot_subg_motives(&aspace, &cfg_relaxed);
assert!(
subgraphs_relaxed.len() >= subgraphs_strict.len(),
"Relaxed Rayleigh filter should yield at least as many subgraphs as strict filter"
);
debug!(
"Strict filter: {} subgraphs, Relaxed filter: {} subgraphs",
subgraphs_strict.len(),
subgraphs_relaxed.len()
);
}
#[test]
fn test_subgraph_structure_clique_data() {
crate::init();
let rows = make_gaussian_cliques_multi(200, 0.3, 6, 100, 999);
let (aspace, gl) = ArrowSpaceBuilder::new()
.with_lambda_graph(0.35, 14, 10, 2.0, None)
.with_seed(999)
.build(rows);
let cfg = SubgraphConfig {
motives: MotiveConfig {
top_l: 20,
min_triangles: 4,
min_clust: 0.4,
max_motif_size: 35,
max_sets: 70,
jaccard_dedup: 0.6,
},
rayleigh_max: None,
min_size: 8, };
let subgraphs = gl.spot_subg_motives(&aspace, &cfg);
if subgraphs.is_empty() {
debug!("No subgraphs extracted with these strict parameters");
return;
}
for (i, sg) in subgraphs.iter().enumerate() {
let (f_dim, x_centroids) = sg.laplacian.init_data.shape();
debug!(
"Subgraph {}: {} centroids, {} features, {} items",
i,
x_centroids,
f_dim,
sg.item_indices.as_ref().map(|v| v.len()).unwrap_or(0)
);
assert!(
x_centroids >= 2,
"Subgraph {} must have at least 2 centroids for graph construction",
i
);
if let Some(ref items) = sg.item_indices {
assert!(
items.len() >= cfg.min_size,
"Subgraph {} should have at least min_size items",
i
);
assert!(
items.len() >= x_centroids,
"Subgraph {} should have at least as many items as centroids",
i
);
}
}
}