sqlitegraph 2.2.2

Embedded graph database with full ACID transactions, HNSW vector search, dual backend support, and comprehensive graph algorithms library
Documentation
use rand::{Rng, SeedableRng, rngs::StdRng};
use serde_json::json;

use crate::{GraphEdge, GraphEntity};

#[derive(Clone, Debug)]
pub struct GraphDataset {
    pub entities: Vec<GraphEntity>,
    pub edges: Vec<GraphEdge>,
}

impl GraphDataset {
    pub fn nodes(&self) -> usize {
        self.entities.len()
    }

    pub fn edges(&self) -> usize {
        self.edges.len()
    }

    pub fn degrees(&self) -> Vec<usize> {
        let mut counts = vec![0usize; self.entities.len()];
        for edge in &self.edges {
            let from = edge.from_id as usize;
            let to = edge.to_id as usize;
            counts[from] += 1;
            counts[to] += 1;
        }
        counts
    }

    pub fn hub_index(&self) -> usize {
        let mut best = (0usize, 0usize);
        for (idx, deg) in self.degrees().into_iter().enumerate() {
            if deg > best.0 {
                best = (deg, idx);
            }
        }
        best.1
    }

    pub fn mapped_edge(edge: &GraphEdge, id_map: &[i64]) -> GraphEdge {
        GraphEdge {
            id: 0,
            from_id: id_map[edge.from_id as usize],
            to_id: id_map[edge.to_id as usize],
            edge_type: edge.edge_type.clone(),
            data: edge.data.clone(),
        }
    }
}

#[derive(Clone, Debug)]
pub enum GraphShape {
    Line,
    Star,
    Grid2D { width: usize, height: usize },
    RandomErdosRenyi { edges: usize },
    ScaleFree { m: usize },
}

pub fn generate_graph(shape: GraphShape, node_count: usize, seed: u64) -> GraphDataset {
    assert!(node_count > 1, "node_count must exceed 1");
    let entities = build_entities(node_count);
    let mut edges = match shape {
        GraphShape::Line => generate_line_edges(node_count),
        GraphShape::Star => generate_star_edges(node_count),
        GraphShape::Grid2D { width, height } => generate_grid_edges(width, height, node_count),
        GraphShape::RandomErdosRenyi { edges } => generate_random_edges(node_count, edges, seed),
        GraphShape::ScaleFree { m } => generate_scale_free_edges(node_count, m, seed),
    };
    edges.sort_by(|a, b| {
        a.from_id
            .cmp(&b.from_id)
            .then_with(|| a.to_id.cmp(&b.to_id))
            .then_with(|| a.edge_type.cmp(&b.edge_type))
    });
    GraphDataset { entities, edges }
}

fn build_entities(count: usize) -> Vec<GraphEntity> {
    (0..count)
        .map(|idx| GraphEntity {
            id: idx as i64,
            kind: "Node".to_string(),
            name: format!("Node{idx}"),
            file_path: None,
            data: json!({ "idx": idx }),
        })
        .collect()
}

fn generate_line_edges(count: usize) -> Vec<GraphEdge> {
    (0..count - 1)
        .map(|idx| new_edge(idx, idx + 1, "LINE"))
        .collect()
}

fn generate_star_edges(count: usize) -> Vec<GraphEdge> {
    (1..count).map(|leaf| new_edge(0, leaf, "STAR")).collect()
}

fn generate_grid_edges(width: usize, height: usize, node_count: usize) -> Vec<GraphEdge> {
    assert_eq!(
        width * height,
        node_count,
        "grid dimensions must match node count"
    );
    let mut edges = Vec::with_capacity(width * height * 2);
    for y in 0..height {
        for x in 0..width {
            let base = grid_index(x, y, width);
            if x + 1 < width {
                edges.push(new_edge(base, grid_index(x + 1, y, width), "GRID"));
            }
            if y + 1 < height {
                edges.push(new_edge(base, grid_index(x, y + 1, width), "GRID"));
            }
        }
    }
    edges
}

fn generate_random_edges(node_count: usize, edge_count: usize, seed: u64) -> Vec<GraphEdge> {
    let total_pairs = pair_count(node_count);
    assert!(
        edge_count as u128 <= total_pairs,
        "edge_count exceeds possible pairs"
    );
    let mut rng = StdRng::seed_from_u64(seed);
    let mut edges = Vec::with_capacity(edge_count);
    let mut idx = 0u64;
    let mut remaining_edges = edge_count as u64;
    while remaining_edges > 0 && idx < total_pairs as u64 {
        let remaining_pairs = total_pairs as u64 - idx;
        let p = remaining_edges as f64 / remaining_pairs as f64;
        let skip = sample_geometric(&mut rng, p);
        idx += skip;
        if idx >= total_pairs as u64 {
            break;
        }
        let (from, to) = pair_from_index(idx, node_count as u64);
        edges.push(new_edge(from as usize, to as usize, "ER"));
        idx += 1;
        remaining_edges -= 1;
    }
    edges
}

fn generate_scale_free_edges(node_count: usize, m: usize, seed: u64) -> Vec<GraphEdge> {
    assert!(m > 0, "m must be positive");
    assert!(node_count > m + 1, "node_count must exceed m + 1");
    let mut rng = StdRng::seed_from_u64(seed);
    let mut degrees = vec![0usize; node_count];
    let mut edges = Vec::new();
    let seed_nodes = m + 1;
    for u in 0..seed_nodes {
        for v in (u + 1)..seed_nodes {
            edges.push(new_edge(u, v, "SF"));
            degrees[u] += 1;
            degrees[v] += 1;
        }
    }
    let mut total_degree: usize = degrees.iter().sum();
    for new_node in seed_nodes..node_count {
        let mut targets = Vec::new();
        while targets.len() < m {
            let pick = rng.gen_range(0..total_degree);
            let mut cumulative = 0usize;
            for (candidate, degree) in degrees.iter().take(new_node).enumerate() {
                cumulative += *degree;
                if pick < cumulative {
                    if !targets.contains(&candidate) {
                        targets.push(candidate);
                    }
                    break;
                }
            }
        }
        targets.sort_unstable();
        targets.dedup();
        while targets.len() < m {
            targets.push(targets.len() % new_node);
            targets.sort_unstable();
            targets.dedup();
        }
        for target in targets {
            edges.push(new_edge(target, new_node, "SF"));
            degrees[target] += 1;
            degrees[new_node] += 1;
            total_degree += 2;
        }
    }
    edges
}

fn new_edge(from: usize, to: usize, label: &str) -> GraphEdge {
    GraphEdge {
        id: 0,
        from_id: from as i64,
        to_id: to as i64,
        edge_type: label.to_string(),
        data: json!({ "label": label }),
    }
}

fn grid_index(x: usize, y: usize, width: usize) -> usize {
    y * width + x
}

fn pair_count(nodes: usize) -> u128 {
    let n = nodes as u128;
    n * (n - 1) / 2
}

fn sample_geometric(rng: &mut StdRng, p: f64) -> u64 {
    let u = rng.r#gen::<f64>().max(f64::MIN_POSITIVE);
    ((u.ln() / (1.0 - p).ln()).floor().max(0.0)) as u64
}

fn pair_from_index(idx: u64, nodes: u64) -> (u64, u64) {
    let mut left = 0;
    let mut start = 0u64;
    while left < nodes - 1 {
        let remaining = nodes - left - 1;
        if idx < start + remaining {
            return (left, left + 1 + (idx - start));
        }
        start += remaining;
        left += 1;
    }
    (nodes - 2, nodes - 1)
}