leiden-rs 0.6.0

//! Evaluation metrics for community detection.

use crate::partition::Partition;
use crate::quality::GraphData;

/// Normalized Mutual Information between two membership vectors.
///
/// Returns 1.0 for identical partitions and 0.0 for independent ones.
/// Uses natural logarithm (ln) for all entropy/MI computations.
///
/// # Panics
///
/// Panics if the two slices have different lengths.
pub fn nmi(partition_true: &[usize], partition_pred: &[usize]) -> f64 {
    assert!(
        partition_true.len() == partition_pred.len(),
        "nmi: partition lengths differ ({} vs {})",
        partition_true.len(),
        partition_pred.len()
    );

    let n = partition_true.len();
    if n == 0 {
        return 1.0;
    }

    let max_true = *partition_true.iter().max().unwrap();
    let max_pred = *partition_pred.iter().max().unwrap();

    let rows = max_true + 1;
    let cols = max_pred + 1;

    // Build contingency table
    let mut contingency = vec![0usize; rows * cols];
    for i in 0..n {
        contingency[partition_true[i] * cols + partition_pred[i]] += 1;
    }

    // Row sums (a) and column sums (b)
    let mut a = vec![0usize; rows];
    let mut b = vec![0usize; cols];
    for i in 0..rows {
        for j in 0..cols {
            let val = contingency[i * cols + j];
            a[i] += val;
            b[j] += val;
        }
    }

    let nf = n as f64;
    let epsilon = 1e-10;

    // Compute mutual information
    let mut mi = 0.0f64;
    for i in 0..rows {
        if a[i] == 0 {
            continue;
        }
        for j in 0..cols {
            let n_ij = contingency[i * cols + j];
            if n_ij == 0 {
                continue;
            }
            mi += (n_ij as f64 / nf) * (nf * n_ij as f64 / (a[i] as f64 * b[j] as f64)).ln();
        }
    }

    // Compute entropies
    let mut h_true = 0.0f64;
    for &count in &a {
        if count > 0 {
            let p = count as f64 / nf;
            h_true -= p * p.ln();
        }
    }

    let mut h_pred = 0.0f64;
    for &count in &b {
        if count > 0 {
            let p = count as f64 / nf;
            h_pred -= p * p.ln();
        }
    }

    if h_true + h_pred < epsilon {
        return 1.0;
    }
    if mi < epsilon {
        return 0.0;
    }

    2.0 * mi / (h_true + h_pred)
}

/// Adjusted Rand Index between two membership vectors.
///
/// Returns 1.0 for perfect agreement. Values near 0 indicate random-level
/// agreement; negative values indicate worse than random.
///
/// # Panics
///
/// Panics if the two slices have different lengths.
pub fn ari(partition_true: &[usize], partition_pred: &[usize]) -> f64 {
    assert!(
        partition_true.len() == partition_pred.len(),
        "ari: partition lengths differ ({} vs {})",
        partition_true.len(),
        partition_pred.len()
    );

    let n = partition_true.len();
    if n == 0 {
        return 1.0;
    }

    let max_true = *partition_true.iter().max().unwrap();
    let max_pred = *partition_pred.iter().max().unwrap();

    let rows = max_true + 1;
    let cols = max_pred + 1;

    // Build contingency table
    let mut contingency = vec![0usize; rows * cols];
    for i in 0..n {
        contingency[partition_true[i] * cols + partition_pred[i]] += 1;
    }

    // Row sums and column sums
    let mut a = vec![0usize; rows];
    let mut b = vec![0usize; cols];
    for i in 0..rows {
        for j in 0..cols {
            let val = contingency[i * cols + j];
            a[i] += val;
            b[j] += val;
        }
    }

    // Sum of C(n_ij, 2) over all cells
    let sum_comb_n: f64 = contingency
        .iter()
        .map(|&v| (v as f64) * (v as f64 - 1.0) / 2.0)
        .sum();

    // Sum of C(a_i, 2) over rows
    let sum_comb_a: f64 = a.iter().map(|&v| (v as f64) * (v as f64 - 1.0) / 2.0).sum();

    // Sum of C(b_j, 2) over columns
    let sum_comb_b: f64 = b.iter().map(|&v| (v as f64) * (v as f64 - 1.0) / 2.0).sum();

    let total_pairs = (n as f64) * (n as f64 - 1.0) / 2.0;

    if total_pairs == 0.0 {
        return 1.0;
    }

    let expected = sum_comb_a * sum_comb_b / total_pairs;
    let max_val = (sum_comb_a + sum_comb_b) / 2.0;

    if (max_val - expected).abs() < 1e-10 {
        return 1.0;
    }

    (sum_comb_n - expected) / (max_val - expected)
}

/// Conductance of each community: cut_c / min(vol_c, 2m - vol_c).
///
/// Returns a `Vec<f64>` with one entry per community. Communities with
/// zero volume or volume equal to `2m` have conductance 0.0.
pub fn conductance(data: &GraphData, partition: &Partition) -> Vec<f64> {
    let n = data.node_count();
    let two_m = 2.0 * data.total_weight();
    let num_comms = partition.num_communities();

    let mut vol = vec![0.0f64; num_comms];
    let mut cut = vec![0.0f64; num_comms];

    for node in 0..n {
        let comm = partition.community_of(node);
        if comm >= num_comms {
            continue;
        }
        vol[comm] += data.degree_of(node);
        for (neighbor, weight) in data.neighbors(node) {
            if partition.community_of(neighbor) != comm {
                cut[comm] += weight;
            }
        }
    }

    let mut result = Vec::with_capacity(num_comms);
    for c in 0..num_comms {
        if vol[c] == 0.0 || vol[c] == two_m {
            result.push(0.0);
        } else {
            let denom = vol[c].min(two_m - vol[c]);
            result.push(cut[c] / denom);
        }
    }
    result
}

/// Internal edge density of each community: 2 * e_c / (n_c * (n_c - 1)).
///
/// Communities with 0 or 1 nodes have density 1.0.
pub fn internal_density(data: &GraphData, partition: &Partition) -> Vec<f64> {
    let n = data.node_count();
    let num_comms = partition.num_communities();

    let mut n_c = vec![0usize; num_comms];
    let mut e_c = vec![0.0f64; num_comms];

    for node in 0..n {
        let comm = partition.community_of(node);
        if comm >= num_comms {
            continue;
        }
        n_c[comm] += 1;
        for (neighbor, weight) in data.neighbors(node) {
            if neighbor > node && partition.community_of(neighbor) == comm {
                e_c[comm] += weight;
            }
        }
    }

    let mut result = Vec::with_capacity(num_comms);
    for c in 0..num_comms {
        if n_c[c] <= 1 {
            result.push(1.0);
        } else {
            let pairs = (n_c[c] * (n_c[c] - 1)) as f64;
            result.push(2.0 * e_c[c] / pairs);
        }
    }
    result
}

/// Coverage: fraction of intra-community edges (sum of e_c / m).
pub fn coverage(data: &GraphData, partition: &Partition) -> f64 {
    let m = data.total_weight();
    if m == 0.0 {
        return 0.0;
    }

    let n = data.node_count();
    let num_comms = partition.num_communities();
    let mut total_internal = 0.0f64;

    for node in 0..n {
        let comm = partition.community_of(node);
        if comm >= num_comms {
            continue;
        }
        for (neighbor, weight) in data.neighbors(node) {
            if neighbor > node && partition.community_of(neighbor) == comm {
                total_internal += weight;
            }
        }
    }

    total_internal / m
}

/// Size of each community (number of nodes).
///
/// Delegates to `Partition::community_sizes()`.
pub fn community_sizes(partition: &Partition) -> Vec<usize> {
    partition.community_sizes()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_nmi_identical() {
        let p = vec![0, 0, 1, 1, 2, 2];
        assert!((nmi(&p, &p) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_nmi_independent() {
        let p1 = vec![0, 0, 0, 1, 1, 1];
        let p2 = vec![0, 1, 0, 1, 0, 1];
        assert!(nmi(&p1, &p2) < 0.5);
    }

    #[test]
    fn test_nmi_single_community() {
        let p1 = vec![0, 0, 0, 0];
        let p2 = vec![0, 0, 0, 0];
        assert!((nmi(&p1, &p2) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_nmi_symmetric() {
        let p1 = vec![0, 0, 1, 1, 2, 2, 3, 3];
        let p2 = vec![0, 0, 0, 1, 1, 1, 2, 2];
        assert!((nmi(&p1, &p2) - nmi(&p2, &p1)).abs() < 1e-10);
    }

    #[test]
    fn test_nmi_empty() {
        let p: Vec<usize> = vec![];
        assert!((nmi(&p, &p) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_nmi_range() {
        let p1 = vec![0, 0, 1, 1, 2, 2];
        let p2 = vec![0, 1, 2, 0, 1, 2];
        let val = nmi(&p1, &p2);
        assert!((0.0..=1.0).contains(&val), "NMI out of range: {val}");
    }

    #[test]
    fn test_ari_identical() {
        let p = vec![0, 0, 1, 1, 2, 2];
        assert!((ari(&p, &p) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_ari_random() {
        let p1 = vec![0, 0, 0, 1, 1, 1];
        let p2 = vec![0, 1, 0, 1, 0, 1];
        assert!(ari(&p1, &p2) < 0.2);
    }

    #[test]
    fn test_ari_perfect_split() {
        let p1 = vec![0, 0, 0, 1, 1, 1];
        let p2 = vec![0, 0, 0, 1, 1, 1];
        assert!((ari(&p1, &p2) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_ari_empty() {
        let p: Vec<usize> = vec![];
        assert!((ari(&p, &p) - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_ari_range() {
        let p1 = vec![0, 0, 1, 1, 2, 2];
        let p2 = vec![0, 1, 2, 0, 1, 2];
        let val = ari(&p1, &p2);
        assert!(
            (-0.5..=1.0).contains(&val),
            "ARI out of expected range: {val}"
        );
    }

    #[test]
    fn test_community_metrics() {
        let edges = [
            (0, 1, 1.0),
            (1, 2, 1.0),
            (0, 2, 1.0), // triangle 1
            (3, 4, 1.0),
            (4, 5, 1.0),
            (3, 5, 1.0), // triangle 2
            (2, 3, 1.0), // bridge
        ];
        let data = GraphData::from_edgelist(&edges, 6).unwrap();
        let partition = Partition::from_membership(vec![0, 0, 0, 1, 1, 1]);

        let cond = conductance(&data, &partition);
        assert_eq!(cond.len(), 2);
        // Community 0: cut = 1 (edge 2-3), vol = 7, conductance = 1/min(7, 5) = 0.2
        assert!(cond[0] > 0.0 && cond[0] < 0.5);
        // Community 1: symmetric
        assert!(cond[1] > 0.0 && cond[1] < 0.5);

        let cov = coverage(&data, &partition);
        // 6 internal edges out of 7 total = 6/7
        assert!(cov > 0.8 && cov < 1.0);

        let density = internal_density(&data, &partition);
        // Each community has 3 nodes, 3 internal edges: 2*3/(3*2) = 1.0
        assert!((density[0] - 1.0).abs() < 1e-10);
        assert!((density[1] - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_community_sizes_delegates() {
        let partition = Partition::from_membership(vec![0, 0, 1, 1, 1, 2]);
        let sizes = community_sizes(&partition);
        assert_eq!(sizes, vec![2, 3, 1]);
    }

    #[test]
    fn test_conductance_values() {
        // Two triangles connected by one bridge edge
        let edges = [
            (0, 1, 1.0),
            (1, 2, 1.0),
            (0, 2, 1.0),
            (3, 4, 1.0),
            (4, 5, 1.0),
            (3, 5, 1.0),
            (2, 3, 1.0),
        ];
        let data = GraphData::from_edgelist(&edges, 6).unwrap();
        let partition = Partition::from_membership(vec![0, 0, 0, 1, 1, 1]);

        let cond = conductance(&data, &partition);
        // total_weight m = 7, 2m = 14
        // Community 0: nodes 0,1,2; degrees 2+2+3=7; cut = 1 (edge 2-3)
        //   vol = 7, 2m-vol = 7, min(7,7) = 7, conductance = 1/7 ≈ 0.1429
        assert!((cond[0] - 1.0 / 7.0).abs() < 1e-10);
        assert!((cond[1] - 1.0 / 7.0).abs() < 1e-10);
    }

    #[test]
    fn test_coverage_exact() {
        let edges = [
            (0, 1, 1.0),
            (1, 2, 1.0),
            (0, 2, 1.0),
            (3, 4, 1.0),
            (4, 5, 1.0),
            (3, 5, 1.0),
            (2, 3, 1.0),
        ];
        let data = GraphData::from_edgelist(&edges, 6).unwrap();
        let partition = Partition::from_membership(vec![0, 0, 0, 1, 1, 1]);

        let cov = coverage(&data, &partition);
        // 6 internal edges (3 per community) / 7 total = 6/7
        assert!((cov - 6.0 / 7.0).abs() < 1e-10);
    }

    #[test]
    fn test_nmi_different_lengths_panics() {
        let p1 = vec![0, 0, 1];
        let p2 = vec![0, 1];
        let result = std::panic::catch_unwind(|| nmi(&p1, &p2));
        assert!(result.is_err());
    }

    #[test]
    fn test_ari_different_lengths_panics() {
        let p1 = vec![0, 0, 1];
        let p2 = vec![0, 1];
        let result = std::panic::catch_unwind(|| ari(&p1, &p2));
        assert!(result.is_err());
    }
}