use log::{debug, info, trace, warn};
use std::cmp::Ordering;
use std::sync::Arc;
use rayon::prelude::*;
use smartcore::linalg::basic::arrays::{Array, Array2, MutArray};
use smartcore::linalg::basic::matrix::DenseMatrix;
use crate::builder::ArrowSpaceBuilder;
use crate::clustering::{ClusteredOutput, ClusteringHeuristic};
use crate::core::ArrowSpace;
use crate::graph::{GraphLaplacian, GraphParams};
use crate::laplacian::build_laplacian_matrix;
use crate::reduction::ImplicitProjection;
use crate::search::taumode::TauMode;
#[derive(Clone, Debug)]
pub struct EnergyParams {
pub optical_tokens: Option<usize>,
pub trim_quantile: f64,
pub eta: f64,
pub steps: usize,
pub split_quantile: f64,
pub neighbor_k: usize,
pub split_tau: f64,
pub w_lambda: f64,
pub w_disp: f64,
pub w_dirichlet: f64,
pub candidate_m: usize,
}
impl Default for EnergyParams {
fn default() -> Self {
debug!("Creating default EnergyParams");
Self {
optical_tokens: Some(50),
trim_quantile: 0.1,
eta: 0.1,
steps: 4,
split_quantile: 0.9,
neighbor_k: 20,
split_tau: 0.15,
w_lambda: 1.0,
w_disp: 0.5,
w_dirichlet: 0.25,
candidate_m: 32,
}
}
}
impl EnergyParams {
pub fn new(builder: &ArrowSpaceBuilder) -> Self {
info!("Creating adaptive EnergyParams from ArrowSpaceBuilder");
let base_k = builder.lambda_k;
let dim_reduction_ratio = builder.rp_eps;
let neighbor_k = (base_k * 2).max(15).min(50);
let candidate_m = (neighbor_k * 3).max(30).min(128);
let optical_tokens = if builder.nitems != 0 {
let tokens = Self::compute_adaptive_tokens(builder.nitems);
info!(
"Using dataset-aware optical_tokens={} for {} items (2√N rule)",
tokens, builder.nitems
);
Some(tokens)
} else if builder.use_dims_reduction {
let tokens = (80.0 / dim_reduction_ratio).ceil() as usize;
let tokens = tokens.max(40).min(200);
warn!(
"Using dim-reduction heuristic: optical_tokens={} (consider setting expected_nitems for better scaling)",
tokens
);
Some(tokens)
} else {
warn!("No dataset size or dim reduction info; using default optical_tokens=60");
Some(60)
};
debug!(
"Adaptive params: neighbor_k={}, candidate_m={}, optical_tokens={:?}",
neighbor_k, candidate_m, optical_tokens
);
Self {
optical_tokens,
trim_quantile: 0.1,
eta: 0.1,
steps: 4,
split_quantile: 0.9,
neighbor_k,
split_tau: 0.15,
w_lambda: 1.0,
w_disp: 0.5,
w_dirichlet: 0.25,
candidate_m,
}
}
pub fn compute_adaptive_tokens(nitems: usize) -> usize {
let sqrt_n = (nitems as f64).sqrt();
let tokens = (sqrt_n * 2.0).round() as usize;
tokens.max(100).min(2000)
}
pub fn high_resolution(builder: &ArrowSpaceBuilder) -> Self {
info!("Creating high-resolution EnergyParams");
Self {
optical_tokens: None, neighbor_k: (builder.lambda_k * 3).max(25),
candidate_m: (builder.lambda_k * 5).max(50),
split_quantile: 0.85,
steps: 5,
..Self::new(builder)
}
}
pub fn large_dataset(builder: &ArrowSpaceBuilder) -> Self {
info!("Creating large-dataset EnergyParams");
Self {
optical_tokens: Some(100), neighbor_k: builder.lambda_k.max(15).min(30),
candidate_m: (builder.lambda_k * 2).max(30).min(80),
steps: 3,
split_quantile: 0.92,
..Self::new(builder)
}
}
}
pub trait EnergyMaps {
fn optical_compress_centroids(
centroids: &DenseMatrix<f64>,
token_budget: usize,
trim_quantile: f64,
) -> DenseMatrix<f64>;
fn bootstrap_centroid_laplacian(
centroids: &DenseMatrix<f64>,
builder: &ArrowSpaceBuilder,
) -> GraphLaplacian;
fn diffuse_and_split_subcentroids(
centroids: &DenseMatrix<f64>,
l0: &GraphLaplacian,
p: &EnergyParams,
) -> DenseMatrix<f64>;
fn adaptive_w_lambda(&self) -> f64;
fn adaptive_energy_weights(&self) -> (f64, f64);
fn search_energy(
&self,
query: &[f64],
gl_energy: &GraphLaplacian,
k: usize,
) -> Vec<(usize, f64)>;
}
impl EnergyMaps for ArrowSpace {
fn optical_compress_centroids(
centroids: &DenseMatrix<f64>,
token_budget: usize,
trim_quantile: f64,
) -> DenseMatrix<f64> {
info!(
"EnergyMaps::optical_compress_centroids: target={} tokens, trim_q={:.2}",
token_budget, trim_quantile
);
let (x, f) = centroids.shape();
debug!("Input centroids: {} × {} (X centroids, F features)", x, f);
if token_budget == 0 || token_budget >= x {
info!(
"Optical compression skipped: budget {} >= centroids {}",
token_budget, x
);
return centroids.clone();
}
trace!(
"Creating implicit projection F={} → 2D for spatial binning",
f
);
let proj = Arc::new(ImplicitProjection::new(f, 2, None));
let xy: Vec<f64> = (0..x)
.into_par_iter()
.flat_map(|i| {
let row = (0..f).map(|c| *centroids.get((i, c))).collect::<Vec<_>>();
let p2 = proj.project(&row);
vec![p2[0], p2[1]]
})
.collect();
debug!("Projected {} centroids to 2D space [parallel]", x);
let g = (token_budget as f64).sqrt().ceil() as usize;
let (minx, maxx, miny, maxy) = minmax2d(&xy);
debug!(
"Grid size: {}×{}, bounds: x=[{:.3}, {:.3}], y=[{:.3}, {:.3}]",
g, g, minx, maxx, miny, maxy
);
let mut bins: Vec<Vec<usize>> = vec![Vec::new(); g * g];
for i in 0..x {
let px = (xy[2 * i] - minx) / (maxx - minx + 1e-9);
let py = (xy[2 * i + 1] - miny) / (maxy - miny + 1e-9);
let bx = (px * g as f64).floor().clamp(0.0, (g - 1) as f64) as usize;
let by = (py * g as f64).floor().clamp(0.0, (g - 1) as f64) as usize;
bins[by * g + bx].push(i);
}
let non_empty = bins.iter().filter(|b| !b.is_empty()).count();
debug!(
"Binned centroids: {} non-empty bins out of {}",
non_empty,
g * g
);
let mut out: Vec<f64> = Vec::new();
let mut pooled_count = 0;
for (bin_idx, bin) in bins.into_iter().enumerate() {
if bin.is_empty() {
continue;
}
let mut members = bin;
let orig_size = members.len();
if members.len() > 4 {
members = trim_high_norm(centroids, &members, trim_quantile);
trace!(
"Bin {}: trimmed {} → {} members",
bin_idx,
orig_size,
members.len()
);
}
let pooled = mean_rows(centroids, &members);
out.extend(pooled);
pooled_count += 1;
if out.len() / f >= token_budget {
debug!(
"Reached token budget after {} pooled centroids",
pooled_count
);
break;
}
}
if out.len() / f < token_budget {
let deficit = token_budget - (out.len() / f);
debug!(
"Underfilled by {} tokens, topping up with low-norm centroids [parallel]",
deficit
);
let mut norms: Vec<(usize, f64)> = (0..x)
.into_par_iter()
.map(|i| {
let n = (0..f)
.map(|c| {
let v = *centroids.get((i, c));
v * v
})
.sum::<f64>()
.sqrt();
(i, n)
})
.collect();
norms.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
let mut added = 0;
for (i, norm) in norms {
if out.len() / f >= token_budget {
break;
}
out.extend((0..f).map(|c| *centroids.get((i, c))));
added += 1;
trace!("Added centroid {} with norm {:.6}", i, norm);
}
debug!("Top-up complete: added {} centroids", added);
}
let rows = out.len() / f;
info!(
"Optical compression complete: {} → {} centroids ({:.1}% compression)",
x,
rows,
100.0 * (1.0 - rows as f64 / x as f64)
);
DenseMatrix::<f64>::from_iterator(out.iter().copied(), rows, f, 1)
}
fn bootstrap_centroid_laplacian(
centroids: &DenseMatrix<f64>,
builder: &ArrowSpaceBuilder,
) -> GraphLaplacian {
info!(
"EnergyMaps::bootstrap_centroid_laplacian: k={}, normalise={}",
builder.lambda_k, builder.normalise
);
let (x, f) = centroids.shape();
debug!(
"Building bootstrap L₀ on {} centroids (nodes) × {} features",
x, f
);
let params = GraphParams {
eps: builder.lambda_eps,
k: builder.lambda_k.min(x - 1), topk: builder.lambda_topk.min(4).min(x - 1),
p: 2.0,
sigma: None,
normalise: builder.normalise,
sparsity_check: builder.sparsity_check, };
trace!(
"GraphParams: eps={}, k={}, topk={}, p={}",
params.eps, params.k, params.topk, params.p
);
let gl = build_laplacian_matrix(centroids.transpose(), ¶ms, Some(x), true);
assert_eq!(gl.nnodes, x, "L₀ must be in centroid space ({}×{})", x, x);
gl
}
fn diffuse_and_split_subcentroids(
centroids: &DenseMatrix<f64>,
l0: &GraphLaplacian,
p: &EnergyParams,
) -> DenseMatrix<f64> {
info!(
"EnergyMaps::diffuse_and_split_subcentroids: eta={:.3}, steps={}, split_q={:.2}",
p.eta, p.steps, p.split_quantile
);
let (x, f) = centroids.shape();
debug!(
"Diffusing {} centroids × {} features over {} steps with F×F Laplacian ({}×{})",
x,
f,
p.steps,
l0.matrix.rows(),
l0.matrix.cols()
);
assert_eq!(
l0.matrix.rows(),
f,
"Laplacian rows {} must match feature count {}",
l0.matrix.rows(),
f
);
assert_eq!(
l0.matrix.rows(),
l0.matrix.cols(),
"Laplacian must be square"
);
let mut work = centroids.clone();
for step in 0..p.steps {
trace!("Diffusion step {}/{} [parallel]", step + 1, p.steps);
let updated_rows: Vec<Vec<f64>> = (0..x)
.into_par_iter()
.map(|row_idx| {
let row_vec: Vec<f64> = (0..f).map(|col| *work.get((row_idx, col))).collect();
let l_row = l0.multiply_vector(&row_vec);
(0..f)
.map(|feat_idx| row_vec[feat_idx] - p.eta * l_row[feat_idx])
.collect()
})
.collect();
for (row_idx, values) in updated_rows.iter().enumerate() {
for (col_idx, &val) in values.iter().enumerate() {
work.set((row_idx, col_idx), val);
}
}
}
debug!("Diffusion complete after {} steps", p.steps);
trace!(
"Computing node energy and dispersion with neighbor_k={}",
p.neighbor_k
);
let (lambda, gini) = node_energy_and_dispersion(&work, l0, p.neighbor_k);
let lambda_stats = (
lambda.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
lambda.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
lambda.iter().sum::<f64>() / lambda.len() as f64,
);
let gini_stats = (
gini.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
gini.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
gini.iter().sum::<f64>() / gini.len() as f64,
);
debug!(
"Energy: λ ∈ [{:.6}, {:.6}], mean={:.6}",
lambda_stats.0, lambda_stats.1, lambda_stats.2
);
debug!(
"Dispersion: G ∈ [{:.6}, {:.6}], mean={:.6}",
gini_stats.0, gini_stats.1, gini_stats.2
);
let mut g_sorted = gini.clone();
g_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let q_idx = ((g_sorted.len() as f64 - 1.0) * p.split_quantile).round() as usize;
let thresh = g_sorted[q_idx];
debug!(
"Split threshold (quantile {:.2}): G ≥ {:.6}",
p.split_quantile, thresh
);
let mut data: Vec<f64> = work.iterator(0).copied().collect();
let split_data: Vec<(usize, Vec<f64>, Vec<f64>)> = (0..x)
.into_par_iter()
.filter(|&i| gini[i] >= thresh)
.map(|i| {
let nbrs = topk_by_l2(&work, i, p.neighbor_k);
let mean = mean_rows(&work, &nbrs);
let dir = unit_diff(work.get_row(i).iterator(0).copied().collect(), &mean);
let std_loc = local_std(work.get_row(i).iterator(0).copied().collect(), &mean);
let tau = p.split_tau * std_loc.max(1e-6);
let c = work.get_row(i).iterator(0).copied().collect::<Vec<_>>();
let c1 = add_scaled(&c, &dir, tau);
let c2 = add_scaled(&c, &dir, -tau);
(i, c1, c2)
})
.collect();
let split_count = split_data.len();
debug!("Computed {} splits [parallel]", split_count);
for (i, c1, c2) in split_data {
data.extend(c1);
data.extend(c2);
trace!("Split centroid {}: G={:.6}", i, gini[i]);
}
let final_rows = data.len() / f;
info!(
"Sub-centroid generation: {} → {} centroids ({} splits)",
x, final_rows, split_count
);
DenseMatrix::<f64>::from_iterator(data.iter().copied(), final_rows, f, 1)
}
fn search_energy(
&self,
query: &[f64],
gl_energy: &GraphLaplacian,
k: usize,
) -> Vec<(usize, f64)> {
let query_lambda = self.prepare_query_item(query, gl_energy);
let query_norm = query.iter().map(|x| x * x).sum::<f64>().sqrt();
let mut scored: Vec<(usize, f64)> = self
.lambdas
.iter()
.enumerate()
.map(|(i, &lambda)| {
let lambda_dist = (query_lambda - lambda).abs();
let tie_breaker = if lambda_dist < 1e-9 {
let item = self.get_item(i);
let dot: f64 = query.iter().zip(item.item.iter()).map(|(a, b)| a * b).sum();
let item_norm = self
.item_norms
.as_ref()
.map(|norms| norms[i])
.unwrap_or_else(|| item.item.iter().map(|x| x * x).sum::<f64>().sqrt());
let cosine = dot / (query_norm * item_norm + 1e-9);
(1.0 - cosine) * 1e-9 } else {
0.0 };
(i, lambda_dist + tie_breaker)
})
.collect();
scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
scored.truncate(k);
scored
}
#[inline]
fn adaptive_w_lambda(&self) -> f64 {
if self.range_lambdas < 1e-9 {
return 0.5; }
0.5 + 1.5 * self.range_lambdas
}
#[inline]
fn adaptive_energy_weights(&self) -> (f64, f64) {
let w_lambda = self.adaptive_w_lambda();
let w_dirichlet = 2.5 - w_lambda; (w_lambda, w_dirichlet)
}
}
fn minmax2d(xy: &Vec<f64>) -> (f64, f64, f64, f64) {
trace!("Computing 2D bounds over {} points", xy.len() / 2);
let mut minx = f64::INFINITY;
let mut maxx = f64::NEG_INFINITY;
let mut miny = f64::INFINITY;
let mut maxy = f64::NEG_INFINITY;
for i in (0..xy.len()).step_by(2) {
let x = xy[i];
let y = xy[i + 1];
minx = minx.min(x);
maxx = maxx.max(x);
miny = miny.min(y);
maxy = maxy.max(y);
}
(minx, maxx, miny, maxy)
}
fn trim_high_norm(dm: &DenseMatrix<f64>, idx: &Vec<usize>, q: f64) -> Vec<usize> {
trace!(
"Trimming high-norm items: {} candidates, quantile={:.2} [parallel]",
idx.len(),
q
);
let f = dm.shape().1;
let mut pairs: Vec<(usize, f64)> = idx
.par_iter()
.map(|&i| {
let n = (0..f)
.map(|c| {
let v = *dm.get((i, c));
v * v
})
.sum::<f64>()
.sqrt();
(i, n)
})
.collect();
pairs.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
let cut = (pairs.len() as f64 * (1.0 - q))
.round()
.clamp(1.0, pairs.len() as f64) as usize;
let result = pairs
.into_iter()
.take(cut)
.map(|(i, _)| i)
.collect::<Vec<_>>();
trace!("Trimmed to {} items [parallel]", result.len());
result
}
fn mean_rows(dm: &DenseMatrix<f64>, idx: &Vec<usize>) -> Vec<f64> {
let f = dm.shape().1;
if idx.is_empty() {
trace!("mean_rows: empty index, returning zero vector");
return vec![0.0; f];
}
trace!("Computing mean of {} rows", idx.len());
let mut acc = vec![0.0; f];
for &i in idx {
for c in 0..f {
acc[c] += *dm.get((i, c));
}
}
for c in 0..f {
acc[c] /= idx.len() as f64;
}
acc
}
fn unit_diff(a: Vec<f64>, b: &Vec<f64>) -> Vec<f64> {
let mut d: Vec<f64> = a.iter().zip(b.iter()).map(|(x, y)| x - y).collect();
let n = (d.iter().map(|v| v * v).sum::<f64>()).sqrt().max(1e-9);
for v in d.iter_mut() {
*v /= n;
}
d
}
fn local_std(a: Vec<f64>, b: &Vec<f64>) -> f64 {
let diffs: Vec<f64> = a.iter().zip(b.iter()).map(|(x, y)| x - y).collect();
let mean = diffs.iter().sum::<f64>() / diffs.len().max(1) as f64;
let var =
diffs.iter().map(|d| (d - mean) * (d - mean)).sum::<f64>() / diffs.len().max(1) as f64;
var.sqrt()
}
fn add_scaled(a: &Vec<f64>, dir: &Vec<f64>, t: f64) -> Vec<f64> {
a.iter().zip(dir.iter()).map(|(x, d)| x + t * d).collect()
}
#[allow(dead_code)]
fn vec_diff(a: &Vec<f64>, b: &Vec<f64>) -> Vec<f64> {
a.iter().zip(b.iter()).map(|(x, y)| x - y).collect()
}
fn topk_by_l2(dm: &DenseMatrix<f64>, i: usize, k: usize) -> Vec<usize> {
let target = dm.get_row(i);
let mut scored: Vec<(usize, f64)> = (0..dm.shape().0)
.filter(|&j| j != i)
.map(|j| {
let v = dm.get_row(j);
let d = target
.iterator(0)
.zip(v.iterator(0))
.map(|(a, b)| (a - b) * (a - b))
.sum::<f64>();
(j, d)
})
.collect();
scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
scored.truncate(k);
scored.into_iter().map(|(j, _)| j).collect()
}
fn robust_scale(x: &Vec<f64>) -> f64 {
if x.is_empty() {
trace!("robust_scale: empty vector, returning 1.0");
return 1.0;
}
let mut v = x.clone();
v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let median = v[v.len() / 2];
let mut devs: Vec<f64> = v.iter().map(|t| (t - median).abs()).collect();
devs.sort_by(|a, b| a.partial_cmp(&b).unwrap_or(Ordering::Equal));
let mad = devs[devs.len() / 2];
let scale = (1.4826 * mad).max(1e-9);
trace!(
"robust_scale: median={:.6}, MAD={:.6}, scale={:.6}",
median, mad, scale
);
scale
}
fn node_energy_and_dispersion(
x: &DenseMatrix<f64>,
l: &GraphLaplacian,
k: usize,
) -> (Vec<f64>, Vec<f64>) {
let (n, f) = x.shape();
trace!(
"Computing node energy and dispersion: {} centroids × {} features, k={} [parallel]",
n, f, k
);
debug!(
"Laplacian: {}×{} (must be F×F feature-space)",
l.matrix.rows(),
l.matrix.cols()
);
assert_eq!(
f,
l.matrix.rows(),
"Feature count {} must match Laplacian rows {}",
f,
l.matrix.rows()
);
assert_eq!(
l.matrix.rows(),
l.matrix.cols(),
"Laplacian must be square: {}×{}",
l.matrix.rows(),
l.matrix.cols()
);
assert_eq!(
l.nnodes, n,
"Number of nodes must match: {} != {}",
l.nnodes, n,
);
let results: Vec<(f64, f64)> = (0..n)
.into_par_iter()
.map(|row_idx| {
let centroid_vec: Vec<f64> = (0..f).map(|col| *x.get((row_idx, col))).collect();
let lx = l.multiply_vector(¢roid_vec);
let (lambda, dispersion) = rayon::join(
|| {
let numerator: f64 = centroid_vec
.par_iter()
.zip(lx.par_iter())
.map(|(xi, lxi)| xi * lxi)
.sum();
let denominator: f64 = centroid_vec.par_iter().map(|xi| xi * xi).sum();
if denominator > 1e-12 {
(numerator / denominator).max(0.0)
} else {
0.0
}
},
|| {
let edge_energy_sum: f64 = (0..f)
.into_par_iter()
.map(|i| {
let mut local_sum = 0.0;
for j in (i + 1)..f {
if let Some(&lij) = l.matrix.get(i, j) {
let w = (-lij).max(0.0);
if w > 0.0 {
let diff = centroid_vec[i] - centroid_vec[j];
local_sum += w * diff * diff;
}
}
}
local_sum
})
.sum();
if edge_energy_sum > 1e-12 {
let g_sq_sum: f64 = (0..f)
.into_par_iter()
.map(|i| {
let mut local_g = 0.0;
for j in (i + 1)..f {
if let Some(&lij) = l.matrix.get(i, j) {
let w = (-lij).max(0.0);
if w > 0.0 {
let diff = centroid_vec[i] - centroid_vec[j];
let contrib = w * diff * diff;
let share = contrib / edge_energy_sum;
local_g += share * share;
}
}
}
local_g
})
.sum();
g_sq_sum.clamp(0.0, 1.0)
} else {
0.0
}
},
);
(lambda, dispersion)
})
.collect();
let (lambdas, dispersions): (Vec<_>, Vec<_>) = results.into_iter().unzip();
debug!("Energy and dispersion computed for {} nodes [parallel]", n);
(lambdas, dispersions)
}
pub trait EnergyMapsBuilder {
fn build_energy(
&mut self,
rows: Vec<Vec<f64>>,
energy_params: EnergyParams,
) -> (ArrowSpace, GraphLaplacian);
fn build_energy_laplacian(
&self,
sub_centroids: &DenseMatrix<f64>,
p: &EnergyParams,
) -> (GraphLaplacian, Vec<f64>, Vec<f64>);
}
impl EnergyMapsBuilder for ArrowSpaceBuilder {
fn build_energy(
&mut self,
rows: Vec<Vec<f64>>,
energy_params: EnergyParams,
) -> (ArrowSpace, GraphLaplacian) {
assert!(
self.use_dims_reduction == true,
"When using build_energy, dim reduction is needed"
);
if self.prebuilt_spectral == true {
panic!(
"Spectral mode not compatible with build_energy, please do not enable for energy search"
);
}
self.nitems = rows.len();
self.nfeatures = rows[0].len();
let ClusteredOutput {
mut aspace,
mut centroids,
..
} = self.start_clustering(rows);
if aspace.projection_matrix.is_some() && aspace.nfeatures > 64 {
assert_ne!(
centroids.shape().1,
aspace.nfeatures,
"aspace is now projected"
);
} else {
assert_eq!(
centroids.shape().1,
aspace.nfeatures,
"aspace has not been projected"
);
}
if let Some(tokens) = energy_params.optical_tokens {
centroids = ArrowSpace::optical_compress_centroids(
¢roids,
tokens,
energy_params.trim_quantile,
);
}
let l0: GraphLaplacian = ArrowSpace::bootstrap_centroid_laplacian(¢roids, &self);
assert_eq!(centroids.shape().0, l0.nnodes, "l0 is still non-projected");
let sub_centroids: DenseMatrix<f64> =
ArrowSpace::diffuse_and_split_subcentroids(¢roids, &l0, &energy_params);
assert_eq!(sub_centroids.shape().1, centroids.shape().1);
let (gl_energy, _, _) = self.build_energy_laplacian(&sub_centroids, &energy_params);
assert_eq!(
gl_energy.shape().1,
sub_centroids.shape().1,
"Graph cols ({}) must match sub_centroids features ({})",
gl_energy.shape().1,
sub_centroids.shape().1
);
aspace.sub_centroids = Some(sub_centroids.clone());
let sub_centroids_shape = sub_centroids.shape();
let mut subcentroid_space =
ArrowSpace::subcentroids_from_dense_matrix(sub_centroids.clone());
subcentroid_space.taumode = aspace.taumode;
subcentroid_space.projection_matrix = aspace.projection_matrix.clone();
subcentroid_space.reduced_dim = aspace.reduced_dim.clone();
subcentroid_space.signals = sprs::CsMat::empty(sprs::CSR, 0);
assert_eq!(
subcentroid_space.nfeatures,
gl_energy.shape().1,
"Subcentroid count must match energy graph dimensions"
);
info!(
"Computing lambdas on {} sub_centroids...",
subcentroid_space.nitems
);
TauMode::compute_taumode_lambdas_parallel(
&mut subcentroid_space,
&gl_energy,
self.synthesis,
);
aspace.subcentroid_lambdas = Some(subcentroid_space.lambdas.clone());
info!(
"Sub_centroid λ: min={:.6}, max={:.6}, mean={:.6}",
subcentroid_space
.lambdas
.iter()
.fold(f64::INFINITY, |a, &b| a.min(b)),
subcentroid_space
.lambdas
.iter()
.fold(0.0_f64, |a, &b| a.max(b)),
subcentroid_space.lambdas.iter().sum::<f64>() / subcentroid_space.nitems as f64
);
info!(
"Mapping {} items to {:?} sub_centroids and computing norms...",
aspace.nitems, sub_centroids_shape
);
let epsilon: f64 = 1e-11;
let results: Vec<(usize, f64, f64)> = (0..aspace.nitems)
.into_par_iter()
.map(|i| {
let item = aspace.get_item(i);
let projected_item = if aspace.projection_matrix.is_some()
&& item.item.len() == aspace.projection_matrix.as_ref().unwrap().original_dim
{
aspace.project_query(&item.item)
} else if aspace.projection_matrix.is_none()
|| item.item.len() == aspace.projection_matrix.as_ref().unwrap().reduced_dim
{
item.item.to_owned()
} else {
panic!(
"Check the projection pipeline, item seems neither projected nor unprojected. \n\
input item len: {:?} \
projection matrix is set: {} \
projection matrix original dims: {} \
projection matrix reduced dims: {}",
item.item.len(),
aspace.projection_matrix.as_ref().is_some(),
aspace.projection_matrix.as_ref().unwrap().original_dim,
aspace.projection_matrix.as_ref().unwrap().reduced_dim
)
};
let item_lambda = aspace.prepare_query_item(&projected_item, &gl_energy);
let mut best_idx = 0usize;
let mut best_dist = f64::INFINITY;
for sc_idx in 0..sub_centroids.shape().0 {
let sc_lambda = subcentroid_space.lambdas[sc_idx];
let lambda_dist = (item_lambda - sc_lambda).abs();
if lambda_dist < best_dist {
best_dist = lambda_dist;
best_idx = sc_idx;
}
}
let mut candidates: Vec<usize> = Vec::new();
for sc_idx in 0..sub_centroids.shape().0 {
let sc_lambda = subcentroid_space.lambdas[sc_idx];
let lambda_dist = (item_lambda - sc_lambda).abs();
if (lambda_dist - best_dist).abs() < epsilon {
candidates.push(sc_idx);
}
}
if candidates.len() > 1 {
let item_norm_proj: f64 =
projected_item.iter().map(|x| x * x).sum::<f64>().sqrt();
let mut best_cos = f64::NEG_INFINITY;
let mut best_sc = best_idx;
for sc_idx in candidates {
let mut dot = 0.0f64;
let mut cent_norm_sq = 0.0f64;
for (a, b) in projected_item
.iter()
.zip(sub_centroids.get_row(sc_idx).iterator(0))
{
dot += a * b;
cent_norm_sq += b * b;
}
let cent_norm = cent_norm_sq.sqrt();
let cosine = if item_norm_proj > 0.0 && cent_norm > 0.0 {
dot / (item_norm_proj * cent_norm)
} else {
0.0
};
if cosine > best_cos {
best_cos = cosine;
best_sc = sc_idx;
}
}
best_idx = best_sc;
}
let norm: f64 = item.item.iter().map(|x| x * x).sum::<f64>().sqrt();
(best_idx, subcentroid_space.lambdas[best_idx], norm)
})
.collect();
let (centroid_map, item_lambdas, item_norms): (Vec<_>, Vec<_>, Vec<_>) = {
let mut cmap = Vec::with_capacity(results.len());
let mut lambdas = Vec::with_capacity(results.len());
let mut norms = Vec::with_capacity(results.len());
for (cidx, lambda, norm) in results {
cmap.push(cidx);
lambdas.push(lambda);
norms.push(norm);
}
(cmap, lambdas, norms)
};
aspace.centroid_map = Some(centroid_map);
aspace.lambdas = item_lambdas;
aspace.item_norms = Some(item_norms);
aspace.build_lambdas_sorted();
info!(
"Item λ assigned: min={:.6}, max={:.6}, mean={:.6}",
aspace.lambdas.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
aspace.lambdas.iter().fold(0.0_f64, |a, &b| a.max(b)),
aspace.lambdas.iter().sum::<f64>() / aspace.nitems as f64
);
debug!(
"Item norms computed: min={:.6}, max={:.6}, mean={:.6}",
aspace
.item_norms
.as_ref()
.unwrap()
.iter()
.fold(f64::INFINITY, |a, &b| a.min(b)),
aspace
.item_norms
.as_ref()
.unwrap()
.iter()
.fold(0.0_f64, |a, &b| a.max(b)),
aspace.item_norms.as_ref().unwrap().iter().sum::<f64>() / aspace.nitems as f64
);
(aspace, gl_energy)
}
fn build_energy_laplacian(
&self,
sub_centroids: &DenseMatrix<f64>,
energy_params: &EnergyParams,
) -> (GraphLaplacian, Vec<f64>, Vec<f64>) {
info!(
"EnergyMaps::build_energy_laplacian: k={}, w_λ={:.2}, w_G={:.2}, w_D={:.2}",
self.lambda_k, energy_params.w_lambda, energy_params.w_disp, energy_params.w_dirichlet
);
let (x, f) = sub_centroids.shape();
debug!(
"Building energy Laplacian on {} sub-centroids × {} features",
x, f
);
trace!("Bootstrapping F×F Laplacian for taumode computation");
let l_boot = ArrowSpace::bootstrap_centroid_laplacian(sub_centroids, &self);
assert_eq!(
l_boot.matrix.rows(),
l_boot.matrix.cols(),
"graph laplacian should be square"
);
debug!(
"Bootstrap Laplacian: {}×{} (F×F feature-space for taumode)",
l_boot.matrix.rows(),
l_boot.matrix.cols()
);
trace!("Computing energy and dispersion features over F×F graph");
let (lambda, gini) = node_energy_and_dispersion(
sub_centroids,
&l_boot,
energy_params.neighbor_k.max(self.lambda_k),
);
let s_l = robust_scale(&lambda).max(1e-9);
let s_g = robust_scale(&gini).max(1e-9);
debug!("Robust scales: λ={:.6}, G={:.6}", s_l, s_g);
info!(
"Energy Laplacian (F×F): {}×{}, {} nnz, {:.2}% sparse",
l_boot.shape().0,
l_boot.shape().1,
l_boot.nnz(),
GraphLaplacian::sparsity(&l_boot.matrix) * 100.0
);
(l_boot, lambda, gini)
}
}
#[derive(Clone, Copy, Debug)]
pub struct ProjectedEnergyParams {
pub w_lambda: f64,
pub w_dirichlet: f64,
pub eps_norm: f64,
}
impl Default for ProjectedEnergyParams {
fn default() -> Self {
Self {
w_lambda: 1.0,
w_dirichlet: 0.5,
eps_norm: 1e-9,
}
}
}