use anyhow::Result;
use std::collections::HashMap;
use std::time::Instant;
use crate::cli_utils::{format_bytes, format_number, format_qps};
use crate::commands::bench::BenchConfig;
pub fn bench_literal_database(config: &BenchConfig) -> Result<()> {
let &BenchConfig {
count,
temp_file,
keep,
load_iterations,
query_count,
hit_rate,
cache_size,
cache_hit_rate,
} = config;
use matchy::Database;
use matchy::DatabaseBuilder;
use matchy::MatchMode;
println!("--- Phase 1: Build Literal Database ---");
let build_start = Instant::now();
let mut builder = DatabaseBuilder::new(MatchMode::CaseSensitive)
.with_database_type("Benchmark-Literal")
.with_description("en", "Literal database benchmark");
let empty_data = HashMap::new();
let tlds = [
"com", "net", "org", "io", "co", "dev", "app", "tech", "xyz", "cloud",
];
let categories = [
"api", "cdn", "web", "mail", "ftp", "vpn", "db", "auth", "admin", "test",
];
let services = [
"service", "server", "endpoint", "gateway", "proxy", "router", "node", "host", "instance",
"cluster",
];
for i in 0..count {
let literal = match i % 10 {
0 => {
let cat = categories[i % categories.len()];
let svc = services[(i / 10) % services.len()];
let tld = tlds[i % tlds.len()];
format!("{cat}-{svc}-{i}.example.{tld}")
}
1 => {
let cat = categories[i % categories.len()];
format!("/api/v2/{cat}/endpoint/{i}/resource")
}
2 => {
let svc = services[i % services.len()];
format!("/var/log/{svc}/application-{i}.log")
}
3 => {
let cat = categories[i % categories.len()];
let tld = tlds[i % tlds.len()];
format!("{}user{}@domain{}.{}", cat, i, i % 100, tld)
}
4 => {
format!(
"{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
i,
(i >> 16) & 0xFFFF,
(i >> 8) & 0xFFFF,
i & 0xFFFF,
i * 1000
)
}
5 => {
let cat = categories[i % categories.len()];
let svc = services[i % services.len()];
format!("{cat}_table_{i}.{svc}_column")
}
6 => {
format!("sk_live_{:016x}_{:016x}", i, i * 7)
}
7 => {
let cat = categories[i % categories.len()];
format!(
"docker.io/myorg/{}-image:v{}.{}.{}",
cat,
i / 100,
i % 10,
i % 5
)
}
8 => {
let cat = categories[i % categories.len()];
format!("feature/{cat}-implementation-{i}")
}
_ => {
let cat = categories[i % categories.len()];
let svc = services[i % services.len()];
format!("{cat}_{svc}_{i}")
}
};
builder.add_literal(&literal, empty_data.clone())?;
if count > 10_000 && (i + 1) % 10_000 == 0 {
println!(
" Progress: {}/{}",
format_number(i + 1),
format_number(count)
);
}
}
let db_bytes = builder.build()?;
let build_time = build_start.elapsed();
let build_rate = count as f64 / build_time.as_secs_f64();
println!(" Build time: {:.2}s", build_time.as_secs_f64());
println!(" Build rate: {} literals/sec", format_qps(build_rate));
println!(" DB size: {}", format_bytes(db_bytes.len()));
println!();
println!("--- Phase 2: Save to Disk ---");
let save_start = Instant::now();
std::fs::write(temp_file, &db_bytes)?;
let save_time = save_start.elapsed();
println!(" Save time: {:.2}s", save_time.as_secs_f64());
drop(db_bytes);
println!();
println!("--- Phase 3: Load Database (mmap) ---");
let mut load_times = Vec::new();
for i in 1..=load_iterations {
let load_start = Instant::now();
let _db = Database::from(temp_file.to_str().unwrap()).open()?;
let load_time = load_start.elapsed();
load_times.push(load_time);
println!(
" Load #{}: {:.3}ms",
i,
load_time.as_micros() as f64 / 1000.0
);
}
let avg_load = load_times.iter().sum::<std::time::Duration>()
/ u32::try_from(load_iterations).unwrap_or(1);
println!(" Average: {:.3}ms", avg_load.as_micros() as f64 / 1000.0);
println!();
println!("--- Phase 4: Query Performance ---");
let mut opener = Database::from(temp_file.to_str().unwrap());
if cache_size == 0 {
opener = opener.no_cache();
} else {
opener = opener.cache_capacity(cache_size);
}
let db = opener.open()?;
let unique_queries = if cache_hit_rate >= 100 {
1 } else if cache_hit_rate == 0 {
query_count } else {
let unique = query_count * (100 - cache_hit_rate) / 100;
unique.max(1)
};
let bench_start = Instant::now();
let mut found = 0;
let tlds = [
"com", "net", "org", "io", "co", "dev", "app", "tech", "xyz", "cloud",
];
let categories = [
"api", "cdn", "web", "mail", "ftp", "vpn", "db", "auth", "admin", "test",
];
let services = [
"service", "server", "endpoint", "gateway", "proxy", "router", "node", "host", "instance",
"cluster",
];
for i in 0..query_count {
let query_idx = i % unique_queries;
let should_hit = (query_idx * 100 / unique_queries) < hit_rate;
let test_str = if should_hit {
let pattern_id = (query_idx * 43) % count;
match pattern_id % 10 {
0 => {
let cat = categories[pattern_id % categories.len()];
let svc = services[(pattern_id / 10) % services.len()];
let tld = tlds[pattern_id % tlds.len()];
format!("{cat}-{svc}-{pattern_id}.example.{tld}")
}
1 => {
let cat = categories[pattern_id % categories.len()];
format!("/api/v2/{cat}/endpoint/{pattern_id}/resource")
}
2 => {
let svc = services[pattern_id % services.len()];
format!("/var/log/{svc}/application-{pattern_id}.log")
}
3 => {
let cat = categories[pattern_id % categories.len()];
let tld = tlds[pattern_id % tlds.len()];
format!(
"{}user{}@domain{}.{}",
cat,
pattern_id,
pattern_id % 100,
tld
)
}
4 => format!(
"{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
pattern_id,
(pattern_id >> 16) & 0xFFFF,
(pattern_id >> 8) & 0xFFFF,
pattern_id & 0xFFFF,
pattern_id * 1000
),
5 => {
let cat = categories[pattern_id % categories.len()];
let svc = services[pattern_id % services.len()];
format!("{cat}_table_{pattern_id}.{svc}_column")
}
6 => format!("sk_live_{:016x}_{:016x}", pattern_id, pattern_id * 7),
7 => {
let cat = categories[pattern_id % categories.len()];
format!(
"docker.io/myorg/{}-image:v{}.{}.{}",
cat,
pattern_id / 100,
pattern_id % 10,
pattern_id % 5
)
}
8 => {
let cat = categories[pattern_id % categories.len()];
format!("feature/{cat}-implementation-{pattern_id}")
}
_ => {
let cat = categories[pattern_id % categories.len()];
let svc = services[pattern_id % services.len()];
format!("{cat}_{svc}_{pattern_id}")
}
}
} else {
format!("nomatch-query-string-{query_idx}")
};
if let Some(matchy::QueryResult::Pattern { pattern_ids, .. }) = db.lookup(&test_str)? {
if !pattern_ids.is_empty() {
found += 1;
}
}
}
let bench_time = bench_start.elapsed();
let qps = query_count as f64 / bench_time.as_secs_f64();
let avg_query = bench_time / u32::try_from(query_count).unwrap_or(1);
println!(" Query count: {}", format_number(query_count));
println!(" Total time: {:.2}s", bench_time.as_secs_f64());
println!(" QPS: {} queries/sec", format_qps(qps));
println!(
" Avg latency: {:.2}µs",
avg_query.as_nanos() as f64 / 1000.0
);
println!(
" Found: {}/{}",
format_number(found),
format_number(query_count)
);
println!();
if keep {
println!("✓ Benchmark complete (file kept: {})", temp_file.display());
} else {
std::fs::remove_file(temp_file)?;
println!("✓ Benchmark complete (temp file removed)");
}
Ok(())
}