use sqlitegraph::backend::GraphBackend;
use sqlitegraph::backend::NodeSpec;
use sqlitegraph::backend::native::v3::V3Backend;
use sqlitegraph::snapshot::SnapshotId;
use std::io::Write;
use std::time::Instant;
const CACHE_SIZES: &[usize] = &[16, 32, 64, 128, 256];
const NODE_COUNT: usize = 10_000;
const LOOKUP_COUNT: usize = 100_000;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== get_node Cache Capacity Sweep Benchmark ===");
println!(
"Dataset: {} nodes, {} lookups per test",
NODE_COUNT, LOOKUP_COUNT
);
println!();
let mut results = Vec::new();
for &cache_size in CACHE_SIZES {
println!("{}", "=".repeat(70));
println!("CACHE SIZE: {} pages", cache_size);
println!("{}", "=".repeat(70));
match run_benchmark(cache_size) {
Ok(stats) => {
results.push((cache_size, stats));
println!();
}
Err(e) => {
println!("ERROR: {}", e);
continue;
}
}
}
print_summary(&results);
Ok(())
}
#[derive(Debug, Clone)]
struct BenchmarkStats {
cache_size: usize,
warm_time_per_lookup_ns: f64,
warm_throughput: f64,
reopen_time_per_lookup_ns: f64,
reopen_throughput: f64,
warm_cache_hits: u64,
warm_cache_misses: u64,
warm_hit_rate: f64,
reopen_cache_hits: u64,
reopen_cache_misses: u64,
reopen_hit_rate: f64,
warm_page_reads: u64,
reopen_page_reads: u64,
}
fn run_benchmark(cache_size: usize) -> Result<BenchmarkStats, Box<dyn std::error::Error>> {
let temp_dir = tempfile::tempdir()?;
let db_path = temp_dir.path().join("cache_sweep.db");
print!("Creating database... ");
let _ = std::io::stdout().flush();
let create_start = Instant::now();
let backend = V3Backend::create(&db_path)?;
for i in 0..NODE_COUNT {
backend.insert_node(NodeSpec {
kind: "TestKind".to_string(),
name: format!("node_{:05}", i),
file_path: None,
data: serde_json::json!({"value": i, "data": "x".repeat(32)}),
})?;
}
backend.flush_to_disk()?;
let create_time = create_start.elapsed();
println!("{:.2}s", create_time.as_secs_f64());
#[cfg(feature = "v3-forensics")]
reset_forensic_counters();
print!("Running warm cache benchmark... ");
let _ = std::io::stdout().flush();
let warm_stats = measure_get_node(&backend, LOOKUP_COUNT)?;
println!(
"{:.2}μs/lookup, {:.1} lookups/sec",
warm_stats.time_per_lookup_ns / 1000.0,
warm_stats.throughput
);
#[cfg(feature = "v3-forensics")]
let warm_forensics = read_forensics();
#[cfg(not(feature = "v3-forensics"))]
let warm_forensics = ForensicsReading::default();
drop(backend);
#[cfg(feature = "v3-forensics")]
reset_forensic_counters();
print!("Running reopen (cold cache) benchmark... ");
let _ = std::io::stdout().flush();
let backend = V3Backend::open(&db_path)?;
let reopen_stats = measure_get_node(&backend, LOOKUP_COUNT)?;
println!(
"{:.2}μs/lookup, {:.1} lookups/sec",
reopen_stats.time_per_lookup_ns / 1000.0,
reopen_stats.throughput
);
#[cfg(feature = "v3-forensics")]
let reopen_forensics = read_forensics();
#[cfg(not(feature = "v3-forensics"))]
let reopen_forensics = ForensicsReading::default();
Ok(BenchmarkStats {
cache_size,
warm_time_per_lookup_ns: warm_stats.time_per_lookup_ns,
warm_throughput: warm_stats.throughput,
reopen_time_per_lookup_ns: reopen_stats.time_per_lookup_ns,
reopen_throughput: reopen_stats.throughput,
warm_cache_hits: warm_forensics.node_page_cache_hit_count,
warm_cache_misses: warm_forensics.node_page_cache_miss,
warm_hit_rate: warm_forensics.node_page_cache_hit_rate(),
reopen_cache_hits: reopen_forensics.node_page_cache_hit_count,
reopen_cache_misses: reopen_forensics.node_page_cache_miss,
reopen_hit_rate: reopen_forensics.node_page_cache_hit_rate(),
warm_page_reads: warm_forensics.page_read_count,
reopen_page_reads: reopen_forensics.page_read_count,
})
}
#[derive(Debug, Clone)]
struct Measurement {
time_per_lookup_ns: f64,
throughput: f64,
}
fn measure_get_node(
backend: &V3Backend,
count: usize,
) -> Result<Measurement, Box<dyn std::error::Error>> {
let snapshot_id = SnapshotId::current();
for i in 0..100 {
let node_id = (i as i64) * 100 + 1;
let _ = backend.get_node(snapshot_id, node_id);
}
#[cfg(feature = "v3-forensics")]
reset_forensic_counters();
let start = Instant::now();
for i in 0..count {
let node_id = (i % 1000) as i64 * 10 + 1;
let _ = backend.get_node(snapshot_id, node_id);
}
let duration = start.elapsed();
let time_per_lookup_ns = duration.as_nanos() as f64 / count as f64;
let throughput = count as f64 / duration.as_secs_f64();
Ok(Measurement {
time_per_lookup_ns,
throughput,
})
}
#[derive(Debug, Clone, Default)]
struct ForensicsReading {
node_page_cache_hit_count: u64,
node_page_cache_miss: u64,
page_read_count: u64,
}
impl ForensicsReading {
fn node_page_cache_hit_rate(&self) -> f64 {
let total = self.node_page_cache_hit_count + self.node_page_cache_miss;
if total == 0 {
0.0
} else {
self.node_page_cache_hit_count as f64 / total as f64
}
}
}
#[cfg(feature = "v3-forensics")]
fn read_forensics() -> ForensicsReading {
use sqlitegraph::backend::native::v3::forensics::FORENSIC_COUNTERS;
ForensicsReading {
node_page_cache_hit_count: FORENSIC_COUNTERS
.node_page_cache_hit_count
.load(std::sync::atomic::Ordering::Relaxed),
node_page_cache_miss: FORENSIC_COUNTERS
.node_page_cache_miss_count
.load(std::sync::atomic::Ordering::Relaxed),
page_read_count: FORENSIC_COUNTERS
.page_read_count
.load(std::sync::atomic::Ordering::Relaxed),
}
}
#[cfg(feature = "v3-forensics")]
fn reset_forensic_counters() {
use sqlitegraph::backend::native::v3::forensics::FORENSIC_COUNTERS;
FORENSIC_COUNTERS
.node_page_cache_hit_count
.store(0, std::sync::atomic::Ordering::Relaxed);
FORENSIC_COUNTERS
.node_page_cache_miss_count
.store(0, std::sync::atomic::Ordering::Relaxed);
FORENSIC_COUNTERS
.page_read_count
.store(0, std::sync::atomic::Ordering::Relaxed);
}
fn print_summary(results: &[(usize, BenchmarkStats)]) {
println!();
println!("{}", "=".repeat(80));
println!("SUMMARY: Cache Capacity vs get_node Performance");
println!("{}", "=".repeat(80));
println!();
println!("Time per lookup (lower is better):");
println!("{:=<80}", "");
println!(
"{:>12} | {:>12} | {:>12} | {:>12} | {:>12} | {:>12}",
"Cache Size", "Warm (μs)", "Reopen (μs)", "Warm Hit %", "Reopen Hit %", "Speedup"
);
println!("{:-<80}", "-");
let baseline_time = results
.first()
.map(|(_, s)| s.reopen_time_per_lookup_ns)
.unwrap_or(1.0);
for &(cache_size, ref stats) in results {
let speedup = baseline_time / stats.reopen_time_per_lookup_ns;
println!(
"{:>12} | {:>12.2} | {:>12.2} | {:>11.1}% | {:>11.1}% | {:>12.2}x",
cache_size,
stats.warm_time_per_lookup_ns / 1000.0,
stats.reopen_time_per_lookup_ns / 1000.0,
stats.warm_hit_rate * 100.0,
stats.reopen_hit_rate * 100.0,
speedup
);
}
println!();
println!();
println!("Detailed Cache Statistics:");
println!("{:=<80}", "");
println!(
"{:>12} | {:>12} | {:>12} | {:>12} | {:>12}",
"Cache Size", "Warm Hits", "Warm Misses", "Reopen Hits", "Reopen Misses"
);
println!("{:-<80}", "-");
for &(cache_size, ref stats) in results {
println!(
"{:>12} | {:>12} | {:>12} | {:>12} | {:>12}",
cache_size,
stats.warm_cache_hits,
stats.warm_cache_misses,
stats.reopen_cache_hits,
stats.reopen_cache_misses
);
}
println!();
println!();
println!("Page Read Count (lower is better):");
println!("{:=<80}", "");
println!(
"{:>12} | {:>12} | {:>12} | {:>12}",
"Cache Size", "Warm", "Reopen", "Reduction"
);
println!("{:-<80}", "-");
let baseline_reads = results
.first()
.map(|(_, s)| s.reopen_page_reads)
.unwrap_or(1);
for &(cache_size, ref stats) in results {
let reduction = if baseline_reads > 0 {
(baseline_reads as f64 - stats.reopen_page_reads as f64) / baseline_reads as f64 * 100.0
} else {
0.0
};
println!(
"{:>12} | {:>12} | {:>12} | {:>11.1}%",
cache_size, stats.warm_page_reads, stats.reopen_page_reads, reduction
);
}
println!();
println!();
println!("ANALYSIS:");
println!("{:-<80}", "-");
let best_speedup = results
.iter()
.min_by(|a, b| {
a.1.reopen_time_per_lookup_ns
.partial_cmp(&b.1.reopen_time_per_lookup_ns)
.unwrap()
})
.unwrap();
println!(
"1. Best performance: {} pages ({:.2}μs/lookup, {:.1}% hit rate)",
best_speedup.0,
best_speedup.1.reopen_time_per_lookup_ns / 1000.0,
best_speedup.1.reopen_hit_rate * 100.0
);
let last_three = results.len().saturating_sub(3);
if results.len() > 3 {
let plateau = results[last_three..].iter().all(|(_, s)| {
(s.reopen_time_per_lookup_ns - best_speedup.1.reopen_time_per_lookup_ns).abs()
/ best_speedup.1.reopen_time_per_lookup_ns
< 0.05
});
if plateau {
println!("2. Plateau detected: Last 3 cache sizes show <5% variance");
} else {
println!("2. No clear plateau: Performance still improving");
}
}
let baseline_16 = results
.iter()
.find(|(s, _)| *s == 16)
.map(|(_, s)| s.reopen_time_per_lookup_ns)
.unwrap_or(0.0);
if baseline_16 > 0.0 {
let improvement =
(baseline_16 - best_speedup.1.reopen_time_per_lookup_ns) / baseline_16 * 100.0;
println!("3. Improvement from 16 pages: {:.1}% faster", improvement);
}
let best_hit_rate = results
.iter()
.map(|(_, s)| s.reopen_hit_rate)
.fold(0.0_f64, f64::max);
if best_hit_rate > 0.95 {
println!(
"4. Hit rate saturation: {:.1}% - cache is highly effective",
best_hit_rate * 100.0
);
} else if best_hit_rate > 0.80 {
println!(
"4. Hit rate: {:.1}% - cache is working but not saturated",
best_hit_rate * 100.0
);
} else {
println!(
"4. Hit rate: {:.1}% - significant thrashing remains",
best_hit_rate * 100.0
);
}
println!("{}", "=".repeat(80));
}