use crate::graph::GraphLaplacian;
use log::{debug, info};
use rayon::prelude::*;
use smartcore::linalg::basic::arrays::Array;
use std::collections::HashSet;
#[derive(Clone, Debug)]
pub struct MotiveConfig {
pub top_l: usize,
pub min_triangles: usize,
pub min_clust: f64,
pub max_motif_size: usize,
pub max_sets: usize,
pub jaccard_dedup: f64,
}
impl Default for MotiveConfig {
fn default() -> Self {
Self {
top_l: 16,
min_triangles: 2,
min_clust: 0.4,
max_motif_size: 32,
max_sets: 256,
jaccard_dedup: 0.8,
}
}
}
pub trait Motives {
fn spot_motives_eigen(&self, cfg: &MotiveConfig) -> Vec<Vec<usize>>;
fn spot_motives_energy(
&self,
aspace: &crate::core::ArrowSpace,
cfg: &crate::analysis::motives::MotiveConfig,
) -> Vec<Vec<usize>>;
fn is_clique(&self, set: &HashSet<usize>) -> bool;
fn rayleigh_indicator(&self, set: &HashSet<usize>) -> f64;
}
impl Motives for GraphLaplacian {
fn spot_motives_eigen(&self, cfg: &MotiveConfig) -> Vec<Vec<usize>> {
info!(
"Spotting motifs: top_l={}, min_tri={}, min_clust={:.2}, max_size={}",
cfg.top_l, cfg.min_triangles, cfg.min_clust, cfg.max_motif_size
);
let n = self.init_data.shape().0;
let neigh: Vec<Vec<(usize, f64)>> = (0..n)
.into_par_iter()
.map(|i| {
let mut nb: Vec<(usize, f64)> = self.neighbors_of(i);
nb.sort_unstable_by(|a, b| {
b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
});
if nb.len() > cfg.top_l {
nb.truncate(cfg.top_l);
}
nb
})
.collect();
let neigh_idx: Vec<Vec<usize>> = neigh
.par_iter()
.map(|v| {
let mut ids: Vec<usize> = v.iter().map(|(j, _)| *j).collect();
ids.sort_unstable();
ids
})
.collect();
let (tri_count, clust) = triangle_stats_sorted(&neigh_idx, n);
debug!(
"Triangle stats: max_tri={}, max_clust={:.3}",
tri_count.iter().max().unwrap_or(&0),
clust.iter().cloned().fold(0.0f64, f64::max)
);
let mut seeds: Vec<usize> = (0..n)
.into_par_iter()
.filter(|&i| tri_count[i] >= cfg.min_triangles && clust[i] >= cfg.min_clust)
.collect();
seeds.par_sort_unstable_by_key(|&i| {
std::cmp::Reverse((tri_count[i], (clust[i] * 1e6) as i64))
});
info!("Seeds identified: {}", seeds.len());
debug!("Motives Seeds used: {:?}", seeds);
let expansions: Vec<Option<HashSet<usize>>> = seeds
.par_iter()
.map(|&s| {
let mut seeds_hashset: HashSet<usize> = HashSet::from([s]);
loop {
if seeds_hashset.len() >= cfg.max_motif_size {
break;
}
let mut cand = HashSet::new();
for &u in &seeds_hashset {
for &v in &neigh_idx[u] {
if !seeds_hashset.contains(&v) {
cand.insert(v);
}
}
}
if cand.is_empty() {
break;
}
let mut best_u: Option<usize> = None;
let mut best_gain: i64 = -1;
for u in cand {
let mut s_nbrs: Vec<usize> = neigh_idx[u]
.iter()
.copied()
.filter(|v| seeds_hashset.contains(v))
.collect();
s_nbrs.sort_unstable();
let mut edges = 0i64;
for i in 0..s_nbrs.len() {
let ui = s_nbrs[i];
edges += count_edges_among(&neigh_idx[ui], &s_nbrs, i + 1) as i64;
}
if edges > best_gain {
best_gain = edges;
best_u = Some(u);
}
}
match best_u {
Some(u) => {
let mut s2 = seeds_hashset.clone();
s2.insert(u);
seeds_hashset = s2;
}
None => break,
}
}
if seeds_hashset.len() >= 3 {
Some(seeds_hashset)
} else {
None
}
})
.collect();
let mut results: Vec<HashSet<usize>> = Vec::new();
for opt in expansions.into_iter().flatten() {
let mut keep = true;
for res in &results {
if jaccard(&opt, res) >= cfg.jaccard_dedup {
keep = false;
break;
}
}
if keep {
results.push(opt);
if results.len() >= cfg.max_sets {
break;
}
}
}
info!("Motifs found: {}", results.len());
let mut out: Vec<Vec<usize>> = results
.into_iter()
.map(|res| {
let mut v: Vec<usize> = res.into_iter().collect();
v.sort_unstable();
v
})
.collect();
out.shrink_to_fit();
out
}
fn spot_motives_energy(
&self,
aspace: &crate::core::ArrowSpace,
cfg: &MotiveConfig,
) -> Vec<Vec<usize>> {
let (rows, cols) = self.matrix.shape();
if rows == 0 || rows != cols {
return Vec::new();
}
let n_sc = rows;
info!(
"Spotting energy motifs: top_l={}, min_tri={}, min_clust={:.2}, max_size={}, n_sc={}",
cfg.top_l, cfg.min_triangles, cfg.min_clust, cfg.max_motif_size, n_sc
);
let neigh_idx: Vec<Vec<usize>> = (0..n_sc)
.into_par_iter()
.map(|i| {
let mut ids: Vec<usize> = self
.neighbors_of(i)
.into_iter()
.filter_map(|(j, w)| {
if j < n_sc && j != i && w > 0.0 {
Some(j)
} else {
None
}
})
.collect();
ids.sort_unstable();
if ids.len() > cfg.top_l {
ids.truncate(cfg.top_l);
}
ids
})
.collect();
let (tri_count, clust) = triangle_stats_sorted(&neigh_idx, n_sc);
debug!(
"Energy triangle stats: max_tri={}, max_clust={:.3}",
tri_count.iter().copied().max().unwrap_or(0),
clust.iter().cloned().fold(0.0f64, f64::max)
);
let mut seeds: Vec<usize> = (0..n_sc)
.into_par_iter()
.filter(|&i| tri_count[i] >= cfg.min_triangles && clust[i] >= cfg.min_clust)
.collect();
seeds.sort_by_key(|&i| {
(
std::cmp::Reverse(tri_count[i]),
std::cmp::Reverse((clust[i] * 1e6) as i64),
i, )
});
debug!("Energy motifs seeds (subcentroids): {:?}", seeds);
let expansions: Vec<Option<HashSet<usize>>> = seeds
.par_iter()
.map(|&s| {
let mut seeds_vec: Vec<usize> = vec![s];
loop {
if seeds_vec.len() >= cfg.max_motif_size {
break;
}
let seeds_set: HashSet<usize> = seeds_vec.iter().copied().collect();
let cand: Vec<usize> = {
let mut c = HashSet::new();
for &u in &seeds_vec {
for &v in &neigh_idx[u] {
if !seeds_set.contains(&v) {
c.insert(v);
}
}
}
let mut v: Vec<usize> = c.into_iter().collect();
v.sort_unstable(); v
};
if cand.is_empty() {
break;
}
let mut best_u: Option<usize> = None;
let mut best_gain: i64 = -1;
for &u in &cand {
let mut s_nbrs: Vec<usize> = neigh_idx[u]
.iter()
.copied()
.filter(|v| seeds_set.contains(v))
.collect();
s_nbrs.sort_unstable();
let mut edges = 0i64;
for i in 0..s_nbrs.len() {
let ui = s_nbrs[i];
edges += count_edges_among(&neigh_idx[ui], &s_nbrs, i + 1) as i64;
}
if edges > best_gain {
best_gain = edges;
best_u = Some(u);
}
}
match best_u {
Some(u) => {
seeds_vec.push(u);
seeds_vec.sort_unstable(); }
None => break,
}
}
if seeds_vec.len() >= 3 {
Some(seeds_vec.iter().copied().collect::<HashSet<usize>>())
} else {
None
}
})
.collect();
let mut sc_results: Vec<HashSet<usize>> = Vec::new();
for opt in expansions.into_iter().flatten() {
let mut keep = true;
for res in &sc_results {
if jaccard(&opt, res) >= cfg.jaccard_dedup {
keep = false;
break;
}
}
if keep {
sc_results.push(opt);
if sc_results.len() >= cfg.max_sets {
break;
}
}
}
info!(
"Energy motifs: {} subcentroid motifs found",
sc_results.len()
);
let cmap = match &aspace.centroid_map {
Some(m) => m,
None => {
let mut out_sc: Vec<Vec<usize>> = sc_results
.into_iter()
.map(|res| {
let mut v: Vec<usize> = res.into_iter().collect();
v.sort_unstable();
v
})
.collect();
out_sc.shrink_to_fit();
return out_sc;
}
};
cmap.par_iter().enumerate().for_each(|(_, &sc_idx)| {
if sc_idx < n_sc {
}
});
let sc_item_pairs: Vec<(usize, usize)> = cmap
.par_iter()
.enumerate()
.filter_map(|(it, &sc)| if sc < n_sc { Some((sc, it)) } else { None })
.collect();
let mut sc_to_items: Vec<Vec<usize>> = vec![Vec::new(); n_sc];
for (sc, it) in sc_item_pairs {
sc_to_items[sc].push(it);
}
let item_sets: Vec<HashSet<usize>> = sc_results
.par_iter()
.map(|s_sc| {
let mut s_items = HashSet::new();
for &sc in s_sc {
for &it in &sc_to_items[sc] {
s_items.insert(it);
}
}
s_items
})
.filter(|s_items| s_items.len() >= 3)
.collect();
let mut item_sets_sorted: Vec<Vec<usize>> = item_sets
.into_iter()
.map(|set| {
let mut v: Vec<usize> = set.into_iter().collect();
v.sort_unstable(); v
})
.collect();
item_sets_sorted.sort_unstable();
let mut deduped_items: Vec<Vec<usize>> = Vec::new();
for item in item_sets_sorted {
let item_set: HashSet<usize> = item.iter().copied().collect();
let mut keep = true;
for cmp in &deduped_items {
let cmp_set: HashSet<usize> = cmp.iter().copied().collect();
if jaccard(&item_set, &cmp_set) >= cfg.jaccard_dedup {
keep = false;
break;
}
}
if keep {
deduped_items.push(item); if deduped_items.len() >= cfg.max_sets {
break;
}
}
}
info!(
"Energy motifs: {} item-level motifs after mapping",
deduped_items.len()
);
let mut out: Vec<Vec<usize>> = deduped_items;
out.shrink_to_fit();
out
}
fn is_clique(&self, set: &HashSet<usize>) -> bool {
let sz = set.len();
if sz < 2 {
return false;
}
let ok = set.par_iter().all(|&u| {
let nbrs: HashSet<usize> = self.neighbors_of(u).iter().map(|(j, _)| *j).collect();
let need = sz - 1;
let have = nbrs.intersection(set).count();
have == need
});
ok
}
fn rayleigh_indicator(&self, set: &HashSet<usize>) -> f64 {
let (rows, cols) = self.matrix.shape();
if rows == 0 || rows != cols || set.is_empty() {
return f64::INFINITY;
}
let n = rows;
if set.iter().any(|&u| u >= n) {
return f64::INFINITY;
}
let mut x = vec![0.0f64; n];
for &i in set {
x[i] = 1.0;
}
self.rayleigh_quotient(&x)
}
}
fn triangle_stats_sorted(neigh_idx: &[Vec<usize>], n: usize) -> (Vec<usize>, Vec<f64>) {
let tri_count: Vec<usize> = (0..n)
.into_par_iter()
.map(|i| {
let nbrs_i = &neigh_idx[i];
if nbrs_i.len() < 2 {
return 0usize;
}
let mut t = 0usize;
for &j in nbrs_i {
if j <= i {
continue;
}
let nbrs_j = &neigh_idx[j];
t += count_intersection(nbrs_i, nbrs_j, i, j);
}
t
})
.collect();
let clust: Vec<f64> = (0..n)
.into_par_iter()
.map(|i| {
let k = neigh_idx[i].len();
if k >= 2 {
(2.0 * tri_count[i] as f64) / ((k * (k - 1)) as f64)
} else {
0.0
}
})
.collect();
(tri_count, clust)
}
#[inline]
fn count_intersection(a: &Vec<usize>, b: &Vec<usize>, i: usize, j: usize) -> usize {
let mut x = 0usize;
let (mut p, mut q) = (0usize, 0usize);
while p < a.len() && q < b.len() {
let va = a[p];
let vb = b[q];
if va == vb {
if va != i && va != j {
x += 1;
}
p += 1;
q += 1;
} else if va < vb {
p += 1;
} else {
q += 1;
}
}
x
}
#[inline]
fn count_edges_among(neigh_u: &Vec<usize>, s_nbrs: &Vec<usize>, start: usize) -> usize {
let mut x = 0usize;
let mut p = 0usize;
let mut q = start;
while p < neigh_u.len() && q < s_nbrs.len() {
let va = neigh_u[p];
let vb = s_nbrs[q];
if va == vb {
x += 1;
p += 1;
q += 1;
} else if va < vb {
p += 1;
} else {
q += 1;
}
}
x
}
pub fn jaccard(a: &HashSet<usize>, b: &HashSet<usize>) -> f64 {
let inter = a.intersection(b).count() as f64;
let union = (a.len() + b.len()) as f64 - inter;
if union == 0.0 { 0.0 } else { inter / union }
}