hyperball 0.1.5

Hyperbolic geometry: Poincare ball, Lorentz model, Mobius operations
Documentation
//! Graph → distance matrix → “tree-likeness” diagnostics.
//!
//! This is a small-n diagnostic harness, meant to answer:
//! - is the *metric* induced by shortest-path distances plausibly tree-like?
//! - if it’s tree-like, hyperbolic embeddings are often a good geometric prior.
//!
//! Two checks:
//! - **δ-hyperbolicity** (4-point condition): trees have δ = 0.
//! - **ultrametric violation**: ultrametrics are “strongly tree-like” (hierarchical clustering).
//!
//! Distances here are shortest-path distances on an unweighted graph.

use hyperball::core::diagnostics;

fn main() {
    // Optional: load a real graph from an undirected edge list.
    //
    // HYP_EDGELIST=/path/to/edges.txt cargo run --example graph_diagnostics
    //
    // Format: two whitespace-separated integer node ids per line, undirected.
    if let Ok(path) = std::env::var("HYP_EDGELIST") {
        let g = load_undirected_edgelist(std::path::Path::new(&path))
            .expect("failed to load HYP_EDGELIST");
        let n = g.len();
        println!("loaded graph from HYP_EDGELIST: n={n}");

        // Exact δ is O(n^4). Keep it explicit and safe.
        let max_n = 60usize;
        let (g_small, note) = if n <= max_n {
            (g, None)
        } else {
            (induced_prefix_subgraph(&g, max_n), Some((n, max_n)))
        };
        if let Some((n_full, n_used)) = note {
            println!("  NOTE: n={n_full} too large for exact δ; using induced subgraph of first n={n_used}");
        }

        let d = all_pairs_shortest_path(&g_small);
        let n2 = g_small.len();
        let delta = diagnostics::delta_hyperbolicity_four_point_exact_f64(&d, n2);
        let um = diagnostics::ultrametric_max_violation_f64(&d, n2);
        println!("  δ (4-point exact) = {delta}");
        println!("  ultrametric max violation = {um}");
        println!();
    } else {
        // Prefer a small real graph shipped in-repo.
        //
        // HYP_DATASET selects from bundled datasets:
        // - karate (default)
        // - lesmis
        // - florentine
        let ds = std::env::var("HYP_DATASET").unwrap_or_else(|_| "karate".to_string());
        let rel = match ds.as_str() {
            "karate" => "testdata/karate_club.edgelist",
            "lesmis" => "testdata/les_miserables.edgelist",
            "florentine" => "testdata/florentine_families.edgelist",
            other => panic!("unknown HYP_DATASET '{other}' (expected: karate|lesmis|florentine)"),
        };
        let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join(rel);
        if path.exists() {
            let g = load_undirected_edgelist(&path).expect("failed to load bundled dataset");
            let n = g.len();
            println!("default dataset: {ds} n={n}");
            let d = all_pairs_shortest_path(&g);
            let delta = diagnostics::delta_hyperbolicity_four_point_exact_f64(&d, n);
            let um = diagnostics::ultrametric_max_violation_f64(&d, n);
            println!("  δ (4-point exact) = {delta}");
            println!("  ultrametric max violation = {um}");
            println!();
        }
    }

    // 1) A tree metric: path distances on a tree are 0-hyperbolic.
    let tree = make_path_graph(8);
    let d_tree = all_pairs_shortest_path(&tree);
    let n_tree = tree.len();
    let delta_tree = diagnostics::delta_hyperbolicity_four_point_exact_f64(&d_tree, n_tree);
    let um_tree = diagnostics::ultrametric_max_violation_f64(&d_tree, n_tree);

    println!("tree (path graph) n={n_tree}");
    println!("  δ (4-point exact) = {delta_tree}");
    println!("  ultrametric max violation = {um_tree}");
    println!();

    // 2) A cycle: C4 has δ=1 (see unit test), and larger cycles are “less tree-like”.
    let c = make_cycle_graph(8);
    let d_c = all_pairs_shortest_path(&c);
    let n_c = c.len();
    let delta_c = diagnostics::delta_hyperbolicity_four_point_exact_f64(&d_c, n_c);
    let um_c = diagnostics::ultrametric_max_violation_f64(&d_c, n_c);

    println!("cycle n={n_c}");
    println!("  δ (4-point exact) = {delta_c}");
    println!("  ultrametric max violation = {um_c}");
    println!();

    // 3) A true ultrametric (not from a graph): two tight clusters far apart.
    let n_u = 6usize;
    let mut d_u = vec![0.0f64; n_u * n_u];
    let set = |d: &mut [f64], n: usize, i: usize, j: usize, v: f64| {
        d[i * n + j] = v;
        d[j * n + i] = v;
    };
    // clusters: (0,1,2) and (3,4,5)
    for (i, j) in [(0, 1), (0, 2), (1, 2), (3, 4), (3, 5), (4, 5)] {
        set(&mut d_u, n_u, i, j, 1.0);
    }
    for i in 0..3 {
        for j in 3..6 {
            set(&mut d_u, n_u, i, j, 3.0);
        }
    }
    let delta_u = diagnostics::delta_hyperbolicity_four_point_exact_f64(&d_u, n_u);
    let um_u = diagnostics::ultrametric_max_violation_f64(&d_u, n_u);
    println!("synthetic ultrametric n={n_u}");
    println!("  δ (4-point exact) = {delta_u}");
    println!("  ultrametric max violation = {um_u}");
}

fn make_path_graph(n: usize) -> Vec<Vec<usize>> {
    let mut g = vec![Vec::new(); n];
    for (i, nbrs) in g.iter_mut().enumerate() {
        if i > 0 {
            nbrs.push(i - 1);
        }
        if i + 1 < n {
            nbrs.push(i + 1);
        }
    }
    g
}

fn make_cycle_graph(n: usize) -> Vec<Vec<usize>> {
    let mut g = vec![Vec::new(); n];
    for (i, nbrs) in g.iter_mut().enumerate() {
        let prev = if i == 0 { n - 1 } else { i - 1 };
        let next = (i + 1) % n;
        nbrs.push(prev);
        nbrs.push(next);
    }
    g
}

fn all_pairs_shortest_path(g: &[Vec<usize>]) -> Vec<f64> {
    let n = g.len();
    let mut dist = vec![0.0f64; n * n];
    for s in 0..n {
        let d = bfs(g, s);
        for t in 0..n {
            dist[s * n + t] = d[t] as f64;
        }
    }
    dist
}

fn bfs(g: &[Vec<usize>], start: usize) -> Vec<usize> {
    use std::collections::VecDeque;
    let n = g.len();
    let mut dist = vec![usize::MAX; n];
    let mut q = VecDeque::new();
    dist[start] = 0;
    q.push_back(start);
    while let Some(u) = q.pop_front() {
        let du = dist[u];
        for &v in &g[u] {
            if dist[v] == usize::MAX {
                dist[v] = du + 1;
                q.push_back(v);
            }
        }
    }
    // This example only uses connected graphs; keep it simple.
    for (i, &d) in dist.iter().enumerate() {
        assert!(d != usize::MAX, "graph disconnected (unreachable node {i})");
    }
    dist
}

fn load_undirected_edgelist(path: &std::path::Path) -> Result<Vec<Vec<usize>>, String> {
    let txt = std::fs::read_to_string(path)
        .map_err(|e| format!("failed to read {}: {e}", path.display()))?;

    let mut edges: Vec<(usize, usize)> = Vec::new();
    let mut max_node = 0usize;

    for (line_no, line) in txt.lines().enumerate() {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        let mut it = line.split_whitespace();
        let a = it
            .next()
            .ok_or_else(|| format!("line {}: missing src", line_no + 1))?;
        let b = it
            .next()
            .ok_or_else(|| format!("line {}: missing dst", line_no + 1))?;
        let u: usize = a
            .parse()
            .map_err(|e| format!("line {}: bad src '{a}': {e}", line_no + 1))?;
        let v: usize = b
            .parse()
            .map_err(|e| format!("line {}: bad dst '{b}': {e}", line_no + 1))?;
        max_node = max_node.max(u).max(v);
        edges.push((u, v));
    }

    let n = max_node + 1;
    if n == 0 {
        return Err("edgelist produced empty graph".to_string());
    }

    let mut g = vec![Vec::new(); n];
    for (u, v) in edges {
        if u == v {
            continue;
        }
        g[u].push(v);
        g[v].push(u);
    }
    for nbrs in &mut g {
        nbrs.sort_unstable();
        nbrs.dedup();
    }
    Ok(g)
}

fn induced_prefix_subgraph(g: &[Vec<usize>], n: usize) -> Vec<Vec<usize>> {
    let n = n.min(g.len());
    let mut out = vec![Vec::new(); n];
    for u in 0..n {
        out[u] = g[u].iter().copied().filter(|&v| v < n).collect();
    }
    out
}