use std::collections::HashSet;
use super::result::AlgoResultBatch;
use crate::engine::graph::algo::GraphAlgorithm;
use crate::engine::graph::csr::CsrIndex;
pub const DEFAULT_HIGH_DEGREE_THRESHOLD: usize = 2_000;
pub const DEFAULT_SAMPLE_PAIRS: usize = 10_000;
pub fn run(csr: &CsrIndex, high_degree_threshold: usize, sample_pairs: usize) -> AlgoResultBatch {
let n = csr.node_count();
if n == 0 {
return AlgoResultBatch::new(GraphAlgorithm::Lcc);
}
let mut batch = AlgoResultBatch::new(GraphAlgorithm::Lcc);
for node in 0..n {
let node_id = node as u32;
let coeff = compute_lcc(csr, node_id, high_degree_threshold, sample_pairs);
batch.push_node_f64(csr.node_name(node as u32).to_string(), coeff);
}
batch
}
fn compute_lcc(
csr: &CsrIndex,
node: u32,
high_degree_threshold: usize,
sample_pairs: usize,
) -> f64 {
let mut neighbor_set: HashSet<u32> = HashSet::new();
for (_lid, dst) in csr.iter_out_edges(node) {
if dst != node {
neighbor_set.insert(dst);
}
}
for (_lid, src) in csr.iter_in_edges(node) {
if src != node {
neighbor_set.insert(src);
}
}
let k = neighbor_set.len();
if k < 2 {
return 0.0;
}
let neighbors: Vec<u32> = neighbor_set.into_iter().collect();
let possible_pairs = k * (k - 1) / 2;
let triangles = if k > high_degree_threshold {
count_triangles_sampled(csr, &neighbors, possible_pairs, sample_pairs)
} else {
count_triangles_exact(csr, &neighbors)
};
triangles as f64 / possible_pairs as f64
}
fn count_triangles_exact(csr: &CsrIndex, neighbors: &[u32]) -> usize {
let neighbor_set: HashSet<u32> = neighbors.iter().copied().collect();
let mut triangles = 0;
for &u in neighbors {
for (_lid, w) in csr.iter_out_edges(u) {
if w > u && neighbor_set.contains(&w) {
triangles += 1;
}
}
for (_lid, w) in csr.iter_in_edges(u) {
if w > u && neighbor_set.contains(&w) {
let already_counted = csr.iter_out_edges(u).any(|(_l, t)| t == w);
if !already_counted {
triangles += 1;
}
}
}
}
triangles
}
fn count_triangles_sampled(
csr: &CsrIndex,
neighbors: &[u32],
total_pairs: usize,
sample_pairs: usize,
) -> usize {
let neighbor_set: HashSet<u32> = neighbors.iter().copied().collect();
let n = neighbors.len();
let samples = sample_pairs.min(total_pairs);
let mut found = 0usize;
let mut state: u64 = (n as u64).wrapping_mul(0x517cc1b727220a95).wrapping_add(1);
for _ in 0..samples {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
let i = (state >> 33) as usize % n;
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
let mut j = (state >> 33) as usize % n;
if j == i {
j = (j + 1) % n;
}
let u = neighbors[i];
let w = neighbors[j];
if has_undirected_edge(csr, u, w, &neighbor_set) {
found += 1;
}
}
(found as f64 / samples as f64 * total_pairs as f64).round() as usize
}
fn has_undirected_edge(csr: &CsrIndex, u: u32, w: u32, _neighbor_set: &HashSet<u32>) -> bool {
for (_lid, dst) in csr.iter_out_edges(u) {
if dst == w {
return true;
}
}
for (_lid, src) in csr.iter_in_edges(u) {
if src == w {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lcc_triangle() {
let mut csr = CsrIndex::new();
csr.add_edge("a", "L", "b");
csr.add_edge("b", "L", "c");
csr.add_edge("c", "L", "a");
csr.add_edge("b", "L", "a");
csr.add_edge("c", "L", "b");
csr.add_edge("a", "L", "c");
csr.compact();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
assert_eq!(batch.len(), 3);
let json = batch.to_json().unwrap();
let rows: Vec<serde_json::Value> = serde_json::from_slice(&json).unwrap();
for row in &rows {
let coeff = row["coefficient"].as_f64().unwrap();
assert!(
(coeff - 1.0).abs() < 1e-9,
"node {} has LCC {coeff}, expected 1.0",
row["node_id"]
);
}
}
#[test]
fn lcc_star() {
let mut csr = CsrIndex::new();
csr.add_edge("a", "L", "b");
csr.add_edge("a", "L", "c");
csr.add_edge("a", "L", "d");
csr.compact();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
let json = batch.to_json().unwrap();
let rows: Vec<serde_json::Value> = serde_json::from_slice(&json).unwrap();
for row in &rows {
let coeff = row["coefficient"].as_f64().unwrap();
assert!(
coeff.abs() < 1e-9,
"node {} has LCC {coeff}, expected 0.0",
row["node_id"]
);
}
}
#[test]
fn lcc_path() {
let mut csr = CsrIndex::new();
csr.add_edge("a", "L", "b");
csr.add_edge("b", "L", "c");
csr.compact();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
let json = batch.to_json().unwrap();
let rows: Vec<serde_json::Value> = serde_json::from_slice(&json).unwrap();
let map: std::collections::HashMap<&str, f64> = rows
.iter()
.map(|r| {
(
r["node_id"].as_str().unwrap(),
r["coefficient"].as_f64().unwrap(),
)
})
.collect();
assert!(map["a"].abs() < 1e-9); assert!(map["b"].abs() < 1e-9); assert!(map["c"].abs() < 1e-9); }
#[test]
fn lcc_empty_graph() {
let csr = CsrIndex::new();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
assert!(batch.is_empty());
}
#[test]
fn lcc_single_node() {
let mut csr = CsrIndex::new();
csr.add_node("lonely");
csr.compact();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
assert_eq!(batch.len(), 1);
let json = batch.to_json().unwrap();
let rows: Vec<serde_json::Value> = serde_json::from_slice(&json).unwrap();
assert!(rows[0]["coefficient"].as_f64().unwrap().abs() < 1e-9);
}
#[test]
fn lcc_partial_connectivity() {
let mut csr = CsrIndex::new();
csr.add_edge("a", "L", "b");
csr.add_edge("a", "L", "c");
csr.add_edge("b", "L", "c");
csr.add_edge("b", "L", "d");
csr.add_edge("c", "L", "d");
csr.compact();
let batch = run(&csr, DEFAULT_HIGH_DEGREE_THRESHOLD, DEFAULT_SAMPLE_PAIRS);
let json = batch.to_json().unwrap();
let rows: Vec<serde_json::Value> = serde_json::from_slice(&json).unwrap();
let map: std::collections::HashMap<&str, f64> = rows
.iter()
.map(|r| {
(
r["node_id"].as_str().unwrap(),
r["coefficient"].as_f64().unwrap(),
)
})
.collect();
assert!(
(map["a"] - 1.0).abs() < 1e-9,
"a LCC = {}, expected 1.0",
map["a"]
);
}
}