use kitedb::types::NodeId;
use kitedb::vector::compaction::{
clear_deleted_fragments, compaction_stats, find_fragments_to_compact, run_compaction_if_needed,
CompactionStrategy,
};
use kitedb::vector::{
create_vector_store, vector_store_delete, vector_store_insert, vector_store_seal_active,
vector_store_stats, DistanceMetric, VectorStoreConfig,
};
use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
use std::env;
use std::time::Instant;
#[derive(Debug, Clone)]
struct BenchConfig {
vectors: usize,
dimensions: usize,
fragment_target_size: usize,
delete_ratio: f32,
strategy: CompactionStrategy,
seed: u64,
}
impl Default for BenchConfig {
fn default() -> Self {
Self {
vectors: 50_000,
dimensions: 384,
fragment_target_size: 5_000,
delete_ratio: 0.35,
strategy: CompactionStrategy::default(),
seed: 42,
}
}
}
fn parse_args() -> BenchConfig {
let mut config = BenchConfig::default();
let args: Vec<String> = env::args().collect();
let mut i = 1usize;
while i < args.len() {
match args[i].as_str() {
"--vectors" => {
if let Some(value) = args.get(i + 1) {
config.vectors = value.parse().unwrap_or(config.vectors);
i += 1;
}
}
"--dimensions" => {
if let Some(value) = args.get(i + 1) {
config.dimensions = value.parse().unwrap_or(config.dimensions);
i += 1;
}
}
"--fragment-target-size" => {
if let Some(value) = args.get(i + 1) {
config.fragment_target_size = value.parse().unwrap_or(config.fragment_target_size);
i += 1;
}
}
"--delete-ratio" => {
if let Some(value) = args.get(i + 1) {
config.delete_ratio = value.parse().unwrap_or(config.delete_ratio);
i += 1;
}
}
"--min-deletion-ratio" => {
if let Some(value) = args.get(i + 1) {
config.strategy.min_deletion_ratio =
value.parse().unwrap_or(config.strategy.min_deletion_ratio);
i += 1;
}
}
"--max-fragments" => {
if let Some(value) = args.get(i + 1) {
config.strategy.max_fragments_per_compaction = value
.parse()
.unwrap_or(config.strategy.max_fragments_per_compaction);
i += 1;
}
}
"--min-vectors-to-compact" => {
if let Some(value) = args.get(i + 1) {
config.strategy.min_vectors_to_compact = value
.parse()
.unwrap_or(config.strategy.min_vectors_to_compact);
i += 1;
}
}
"--seed" => {
if let Some(value) = args.get(i + 1) {
config.seed = value.parse().unwrap_or(config.seed);
i += 1;
}
}
_ => {}
}
i += 1;
}
config.delete_ratio = config.delete_ratio.clamp(0.0, 1.0);
config.vectors = config.vectors.max(1);
config.dimensions = config.dimensions.max(1);
config.fragment_target_size = config.fragment_target_size.max(1);
config.strategy.max_fragments_per_compaction =
config.strategy.max_fragments_per_compaction.max(1);
config
}
fn random_vector(rng: &mut StdRng, dims: usize) -> Vec<f32> {
let mut vector = vec![0.0f32; dims];
for value in &mut vector {
*value = rng.gen_range(-1.0f32..1.0f32);
}
vector
}
fn format_number(n: usize) -> String {
let mut s = n.to_string();
let mut i = s.len() as isize - 3;
while i > 0 {
s.insert(i as usize, ',');
i -= 3;
}
s
}
fn format_ratio(ratio: f32) -> String {
format!("{:.2}%", ratio * 100.0)
}
fn main() {
let config = parse_args();
let mut rng = StdRng::seed_from_u64(config.seed);
println!("{}", "=".repeat(100));
println!("Vector Compaction Strategy Benchmark (Rust)");
println!("{}", "=".repeat(100));
println!("vectors: {}", format_number(config.vectors));
println!("dimensions: {}", config.dimensions);
println!(
"fragment_target_size: {}",
format_number(config.fragment_target_size)
);
println!("delete_ratio: {}", format_ratio(config.delete_ratio));
println!(
"strategy: min_deletion_ratio={}, max_fragments={}, min_vectors_to_compact={}",
config.strategy.min_deletion_ratio,
config.strategy.max_fragments_per_compaction,
format_number(config.strategy.min_vectors_to_compact)
);
println!("{}", "=".repeat(100));
let store_config = VectorStoreConfig::new(config.dimensions)
.with_metric(DistanceMetric::Cosine)
.with_fragment_target_size(config.fragment_target_size);
let mut manifest = create_vector_store(store_config);
let insert_start = Instant::now();
for node_id in 0..config.vectors {
let vector = random_vector(&mut rng, config.dimensions);
vector_store_insert(&mut manifest, node_id as NodeId, &vector).expect("vector insert failed");
}
vector_store_seal_active(&mut manifest);
let insert_elapsed = insert_start.elapsed();
let mut ids: Vec<usize> = (0..config.vectors).collect();
ids.shuffle(&mut rng);
let delete_count = ((config.vectors as f32) * config.delete_ratio).round() as usize;
let delete_start = Instant::now();
let mut deleted = 0usize;
for node_id in ids.iter().take(delete_count) {
if vector_store_delete(&mut manifest, *node_id as NodeId) {
deleted += 1;
}
}
let delete_elapsed = delete_start.elapsed();
let before_store = vector_store_stats(&manifest);
let before_compaction = compaction_stats(&manifest);
let candidate_ids = find_fragments_to_compact(&manifest, &config.strategy);
let clear_start = Instant::now();
let cleared_fragments = clear_deleted_fragments(&mut manifest);
let clear_elapsed = clear_start.elapsed();
let compact_start = Instant::now();
let compacted = run_compaction_if_needed(&mut manifest, &config.strategy);
let compact_elapsed = compact_start.elapsed();
let after_store = vector_store_stats(&manifest);
let after_compaction = compaction_stats(&manifest);
println!(
"insert_elapsed_ms: {:.2}",
insert_elapsed.as_secs_f64() * 1000.0
);
println!(
"insert_throughput_vectors_per_sec: {}",
format_number((config.vectors as f64 / insert_elapsed.as_secs_f64()).round() as usize)
);
println!(
"delete_elapsed_ms: {:.2}",
delete_elapsed.as_secs_f64() * 1000.0
);
println!(
"deleted_vectors: {} (requested {})",
format_number(deleted),
format_number(delete_count)
);
println!(
"clear_deleted_elapsed_ms: {:.2}",
clear_elapsed.as_secs_f64() * 1000.0
);
println!("cleared_fragments: {}", cleared_fragments);
println!(
"compaction_elapsed_ms: {:.2}",
compact_elapsed.as_secs_f64() * 1000.0
);
println!("compaction_performed: {}", compacted);
println!(
"candidate_fragments_before: {} ({:?})",
candidate_ids.len(),
candidate_ids
);
println!("\nStore stats (before -> after):");
println!(
" live_vectors: {} -> {}",
format_number(before_store.live_vectors),
format_number(after_store.live_vectors)
);
println!(
" total_deleted: {} -> {}",
format_number(before_store.total_deleted),
format_number(after_store.total_deleted)
);
println!(
" fragment_count: {} -> {}",
before_store.fragment_count, after_store.fragment_count
);
println!(
" bytes_used: {} -> {}",
format_number(before_store.bytes_used),
format_number(after_store.bytes_used)
);
println!("\nCompaction stats (before -> after):");
println!(
" fragments_needing_compaction: {} -> {}",
before_compaction.fragments_needing_compaction, after_compaction.fragments_needing_compaction
);
println!(
" total_deleted_vectors: {} -> {}",
format_number(before_compaction.total_deleted_vectors),
format_number(after_compaction.total_deleted_vectors)
);
println!(
" average_deletion_ratio: {} -> {}",
format_ratio(before_compaction.average_deletion_ratio),
format_ratio(after_compaction.average_deletion_ratio)
);
}