use std::path::PathBuf;
use clap::Parser;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use serde::Serialize;
#[derive(Parser, Debug)]
#[command(name = "gen-fixtures")]
#[command(about = "Generate deterministic test fixtures for SparrowDB")]
struct Args {
#[arg(long, default_value = "42")]
seed: u64,
#[arg(long, default_value = "tests/fixtures")]
out: PathBuf,
}
#[derive(Debug, Serialize)]
struct PersonNode {
id: u64,
label: &'static str,
name: String,
}
#[derive(Debug, Serialize)]
struct PackageNode {
id: u64,
label: &'static str,
name: String,
version: String,
}
#[derive(Debug, Serialize)]
struct KnowledgeNode {
id: u64,
label: &'static str,
content: String,
confidence: f64,
source: &'static str,
}
#[derive(Debug, Serialize)]
struct Edge {
src: u64,
dst: u64,
rel: &'static str,
}
#[derive(Debug, Serialize)]
struct SocialGraph {
nodes: Vec<PersonNode>,
edges: Vec<Edge>,
}
#[derive(Debug, Serialize)]
struct DepsGraph {
nodes: Vec<PackageNode>,
edges: Vec<Edge>,
}
#[derive(Debug, Serialize)]
struct ConceptsGraph {
nodes: Vec<KnowledgeNode>,
edges: Vec<Edge>,
}
fn gen_social_graph(n_nodes: u64, n_edges: u64, rng: &mut impl Rng) -> SocialGraph {
let nodes: Vec<PersonNode> = (0..n_nodes)
.map(|id| PersonNode {
id,
label: "Person",
name: format!("Person_{id}"),
})
.collect();
let edges = gen_power_law_edges(n_nodes, n_edges, "KNOWS", rng);
SocialGraph { nodes, edges }
}
fn gen_power_law_edges(
n_nodes: u64,
target_edges: u64,
rel: &'static str,
rng: &mut impl Rng,
) -> Vec<Edge> {
let harmonic_sum: f64 = (1..=n_nodes).map(|i| 1.0 / i as f64).sum();
let scale = target_edges as f64 / harmonic_sum;
let mut edges: Vec<Edge> = Vec::with_capacity(target_edges as usize + n_nodes as usize);
for src in 0..n_nodes {
let raw_degree = (scale / (src + 1) as f64).round() as u64;
let degree = raw_degree.max(1).min(n_nodes - 1);
let mut chosen = std::collections::BTreeSet::new();
let mut attempts = 0u64;
while chosen.len() < degree as usize && attempts < degree * 3 + 10 {
let dst = rng.gen_range(0..n_nodes);
if dst != src {
chosen.insert(dst);
}
attempts += 1;
}
for dst in chosen {
edges.push(Edge { src, dst, rel });
}
}
if edges.len() > target_edges as usize {
edges.truncate(target_edges as usize);
} else {
while edges.len() < target_edges as usize {
let src = rng.gen_range(0..n_nodes);
let dst = rng.gen_range(0..n_nodes);
if src != dst {
edges.push(Edge { src, dst, rel });
}
}
}
edges
}
fn gen_deps_graph(n_nodes: u64, target_edges: u64, rng: &mut impl Rng) -> DepsGraph {
let nodes: Vec<PackageNode> = (0..n_nodes)
.map(|id| PackageNode {
id,
label: "Package",
name: format!("pkg_{id}"),
version: format!("1.{}.0", id % 10),
})
.collect();
let n_levels = 6u64;
let layer_size = n_nodes.div_ceil(n_levels);
let layer_of = |node_id: u64| node_id / layer_size;
let mut edges: Vec<Edge> = Vec::with_capacity(target_edges as usize);
for src in 0..n_nodes {
let src_layer = layer_of(src);
if src_layer >= n_levels - 1 {
continue; }
let next_layer_start = (src_layer + 1) * layer_size;
let next_layer_end = ((src_layer + 2) * layer_size).min(n_nodes);
if next_layer_start >= next_layer_end {
continue;
}
let n_deps = rng
.gen_range(2u64..5)
.min(next_layer_end - next_layer_start);
let mut chosen = std::collections::BTreeSet::new();
let mut attempts = 0u64;
while chosen.len() < n_deps as usize && attempts < n_deps * 3 + 5 {
let dst = rng.gen_range(next_layer_start..next_layer_end);
chosen.insert(dst);
attempts += 1;
}
for dst in chosen {
edges.push(Edge {
src,
dst,
rel: "DEPENDS_ON",
});
}
}
if edges.len() > target_edges as usize {
edges.truncate(target_edges as usize);
} else {
while edges.len() < target_edges as usize {
let src = rng.gen_range(0..n_nodes.saturating_sub(1));
let src_layer = layer_of(src);
if src_layer >= n_levels - 1 {
continue;
}
let min_dst = (src_layer + 1) * layer_size;
let max_dst = n_nodes;
if min_dst >= max_dst {
continue;
}
let dst = rng.gen_range(min_dst..max_dst);
edges.push(Edge {
src,
dst,
rel: "DEPENDS_ON",
});
}
}
DepsGraph { nodes, edges }
}
const TOPICS: &[&str] = &[
"AI",
"machine learning",
"neural networks",
"databases",
"distributed systems",
"graph theory",
"type theory",
"cryptography",
"operating systems",
"compilers",
"software architecture",
"data structures",
"algorithms",
"security",
"networking",
];
const SOURCES: &[&str] = &["manual", "import", "inference", "web"];
const EDGE_RELS: &[&str] = &["ABOUT", "MENTIONS", "RELATED_TO"];
fn gen_concepts_graph(n_nodes: u64, target_edges: u64, rng: &mut impl Rng) -> ConceptsGraph {
let nodes: Vec<KnowledgeNode> = (0..n_nodes)
.map(|id| {
let topic = TOPICS[id as usize % TOPICS.len()];
let source = SOURCES[id as usize % SOURCES.len()];
let confidence = 0.5 + (id as f64 % 50.0) / 100.0;
KnowledgeNode {
id,
label: "Knowledge",
content: format!("fact about {topic} #{id}"),
confidence,
source,
}
})
.collect();
let mut edges: Vec<Edge> = Vec::with_capacity(target_edges as usize);
while edges.len() < target_edges as usize {
let src = rng.gen_range(0..n_nodes);
let dst = rng.gen_range(0..n_nodes);
if src != dst {
let rel = EDGE_RELS[rng.gen_range(0..EDGE_RELS.len())];
edges.push(Edge { src, dst, rel });
}
}
ConceptsGraph { nodes, edges }
}
fn write_json<T: Serialize>(out_dir: &std::path::Path, filename: &str, value: &T) {
let path = out_dir.join(filename);
let json = serde_json::to_string_pretty(value).expect("JSON serialization failed");
std::fs::write(&path, json).unwrap_or_else(|e| panic!("Failed to write {filename}: {e}"));
println!(" wrote {}", path.display());
}
fn main() {
let args = Args::parse();
std::fs::create_dir_all(&args.out)
.unwrap_or_else(|e| panic!("Failed to create output dir {}: {e}", args.out.display()));
println!(
"Generating fixtures with seed={}, out={}",
args.seed,
args.out.display()
);
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(0));
let graph = gen_social_graph(50, 100, &mut rng);
write_json(&args.out, "social_small.json", &graph);
}
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(1));
let graph = gen_deps_graph(20, 50, &mut rng);
write_json(&args.out, "deps_small.json", &graph);
}
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(2));
let graph = gen_social_graph(10_000, 50_000, &mut rng);
write_json(&args.out, "social_10k.json", &graph);
}
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(3));
let graph = gen_social_graph(100_000, 500_000, &mut rng);
write_json(&args.out, "social_100k.json", &graph);
}
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(4));
let graph = gen_deps_graph(500, 2_000, &mut rng);
write_json(&args.out, "deps_500.json", &graph);
}
{
let mut rng = ChaCha8Rng::seed_from_u64(args.seed.wrapping_add(5));
let graph = gen_concepts_graph(1_000, 3_000, &mut rng);
write_json(&args.out, "concepts_1k.json", &graph);
}
println!("Done.");
}