use crate::models::ConfidenceSource;
use anyhow::{Context, Result};
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::time::{Duration, Instant};
use crate::db;
use crate::models::{Memory, Tier};
pub const P95_TOLERANCE: f64 = 1.10;
#[cfg(target_os = "macos")]
pub const MACOS_BUDGET_MULT: f64 = 3.0;
#[cfg(not(target_os = "macos"))]
pub const MACOS_BUDGET_MULT: f64 = 1.0;
pub const BENCH_NAMESPACE: &str = "ai-memory-bench";
pub const DEFAULT_ITERATIONS: usize = 200;
pub const DEFAULT_WARMUP: usize = 20;
pub const MAX_ITERATIONS: usize = 100_000;
pub const MAX_WARMUP: usize = 10_000;
pub const MAX_REGRESSION_THRESHOLD_PCT: f64 = 1000.0;
pub const CI_SCALE_GATE_ROWS: usize = 10_000;
pub const MAX_SCALE: usize = 1_000_000;
#[derive(Debug, Clone, Copy)]
pub struct ScaleBudgets {
pub scale: usize,
pub store_no_embedding_ms: f64,
pub search_fts_ms: f64,
pub recall_hot_ms: f64,
}
pub const SCALE_BUDGETS: &[ScaleBudgets] = &[ScaleBudgets {
scale: CI_SCALE_GATE_ROWS,
store_no_embedding_ms: 120.0,
search_fts_ms: 60.0,
recall_hot_ms: 80.0,
}];
#[must_use]
pub fn scale_budgets_for(requested: usize) -> ScaleBudgets {
for row in SCALE_BUDGETS {
if row.scale >= requested {
return *row;
}
}
*SCALE_BUDGETS
.last()
.expect("SCALE_BUDGETS table must be non-empty")
}
pub const DEFAULT_REGRESSION_THRESHOLD_PCT: f64 = 10.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Operation {
StoreNoEmbedding,
SearchFts,
RecallHot,
KgQueryDepth1,
KgQueryDepth3,
KgQueryDepth5,
KgTimeline,
}
impl Operation {
#[must_use]
pub fn label(self) -> &'static str {
match self {
Self::StoreNoEmbedding => "memory_store (no embedding)",
Self::SearchFts => "memory_search (FTS5)",
Self::RecallHot => "memory_recall (hot, depth=1)",
Self::KgQueryDepth1 => "memory_kg_query (depth=1)",
Self::KgQueryDepth3 => "memory_kg_query (depth=3)",
Self::KgQueryDepth5 => "memory_kg_query (depth=5)",
Self::KgTimeline => crate::mcp::registry::tool_names::MEMORY_KG_TIMELINE,
}
}
#[must_use]
#[allow(clippy::match_same_arms)]
pub fn target_p95_ms(self) -> f64 {
match self {
Self::StoreNoEmbedding => 20.0,
Self::SearchFts => 100.0,
Self::RecallHot => 50.0,
Self::KgQueryDepth1 => 100.0,
Self::KgQueryDepth3 => 100.0,
Self::KgQueryDepth5 => 250.0,
Self::KgTimeline => 100.0,
}
}
#[must_use]
pub fn effective_target_p95_ms(self) -> f64 {
self.target_p95_ms() * MACOS_BUDGET_MULT
}
#[must_use]
pub fn target_p95_ms_at_scale(self, scale: Option<usize>) -> f64 {
let Some(rows) = scale else {
return self.target_p95_ms();
};
let budgets = scale_budgets_for(rows);
match self {
Self::StoreNoEmbedding => budgets.store_no_embedding_ms,
Self::SearchFts => budgets.search_fts_ms,
Self::RecallHot => budgets.recall_hot_ms,
Self::KgQueryDepth1 | Self::KgQueryDepth3 | Self::KgQueryDepth5 | Self::KgTimeline => {
self.target_p95_ms()
}
}
}
#[must_use]
pub fn effective_target_p95_ms_at_scale(self, scale: Option<usize>) -> f64 {
self.target_p95_ms_at_scale(scale) * MACOS_BUDGET_MULT
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Status {
Pass,
Fail,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperationResult {
pub operation: Operation,
pub label: &'static str,
pub target_p95_ms: f64,
pub measured_p50_ms: f64,
pub measured_p95_ms: f64,
pub measured_p99_ms: f64,
pub samples: usize,
pub status: Status,
}
#[derive(Debug, Clone)]
pub struct BenchConfig {
pub iterations: usize,
pub warmup: usize,
pub namespace: String,
pub scale: Option<usize>,
}
impl Default for BenchConfig {
fn default() -> Self {
Self {
iterations: DEFAULT_ITERATIONS,
warmup: DEFAULT_WARMUP,
namespace: BENCH_NAMESPACE.to_string(),
scale: None,
}
}
}
pub fn run(conn: &Connection, config: &BenchConfig) -> Result<Vec<OperationResult>> {
if let Some(rows) = config.scale {
seed_corpus(conn, &config.namespace, "scale", rows)?;
}
let store = run_store_no_embedding(conn, config)?;
let search = run_search_fts(conn, config)?;
let recall = run_recall_hot(conn, config)?;
let kg_sources = seed_kg_fixture(conn, &config.namespace)?;
let kg_query = run_kg_query_depth1(conn, config, &kg_sources)?;
let kg_chain_sources = seed_kg_chain_fixture(conn, &config.namespace)?;
let kg_query_d3 =
run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth3, 3)?;
let kg_query_d5 =
run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth5, 5)?;
let kg_timeline = run_kg_timeline(conn, config, &kg_sources)?;
Ok(vec![
store,
search,
recall,
kg_query,
kg_query_d3,
kg_query_d5,
kg_timeline,
])
}
fn run_store_no_embedding(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
let total = config.warmup + config.iterations;
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..total {
let mem = synth_memory(&config.namespace, i, "store");
let start = Instant::now();
db::insert(conn, &mem)?;
let elapsed = start.elapsed();
if i >= config.warmup {
samples.push(elapsed);
}
}
Ok(percentile_summary(
Operation::StoreNoEmbedding,
&samples,
config.scale,
))
}
fn run_search_fts(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
seed_corpus(conn, &config.namespace, "search", 200)?;
let total = config.warmup + config.iterations;
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..total {
let query = format!("topic-{}", i % 50);
let start = Instant::now();
let _ = db::search(
conn,
&query,
Some(&config.namespace),
None,
10,
None,
None,
None,
None,
None,
None,
false,
)?;
let elapsed = start.elapsed();
if i >= config.warmup {
samples.push(elapsed);
}
}
Ok(percentile_summary(
Operation::SearchFts,
&samples,
config.scale,
))
}
fn run_recall_hot(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
seed_corpus(conn, &config.namespace, "recall", 200)?;
let warmup_query = "topic 0 category 0";
for _ in 0..config.warmup {
let _ = db::recall(
conn,
warmup_query,
Some(&config.namespace),
10,
None,
None,
None,
0,
0,
None,
None,
false,
None,
)?;
}
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..config.iterations {
let query = format!("topic {} category {}", i % 50, i % 10);
let start = Instant::now();
let _ = db::recall(
conn,
&query,
Some(&config.namespace),
10,
None,
None,
None,
0,
0,
None,
None,
false,
None,
)?;
samples.push(start.elapsed());
}
Ok(percentile_summary(
Operation::RecallHot,
&samples,
config.scale,
))
}
const KG_FIXTURE_SOURCES: usize = 50;
const KG_FIXTURE_LINKS_PER_SOURCE: usize = 4;
const KG_CHAIN_FIXTURE_CHAINS: usize = 50;
const KG_CHAIN_FIXTURE_HOPS: usize = 5;
fn run_kg_query_depth1(
conn: &Connection,
config: &BenchConfig,
sources: &[String],
) -> Result<OperationResult> {
debug_assert!(
!sources.is_empty(),
"kg_query bench requires a seeded fixture"
);
let total = config.warmup + config.iterations;
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..total {
let src = &sources[i % sources.len()];
let start = Instant::now();
let _ = db::kg_query(conn, src, 1, None, None, None, false)?;
let elapsed = start.elapsed();
if i >= config.warmup {
samples.push(elapsed);
}
}
Ok(percentile_summary(
Operation::KgQueryDepth1,
&samples,
config.scale,
))
}
fn run_kg_query_chain(
conn: &Connection,
config: &BenchConfig,
sources: &[String],
operation: Operation,
max_depth: usize,
) -> Result<OperationResult> {
debug_assert!(
!sources.is_empty(),
"kg_query chain bench requires a seeded fixture"
);
let total = config.warmup + config.iterations;
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..total {
let src = &sources[i % sources.len()];
let start = Instant::now();
let _ = db::kg_query(conn, src, max_depth, None, None, None, false)?;
let elapsed = start.elapsed();
if i >= config.warmup {
samples.push(elapsed);
}
}
Ok(percentile_summary(operation, &samples, config.scale))
}
fn run_kg_timeline(
conn: &Connection,
config: &BenchConfig,
sources: &[String],
) -> Result<OperationResult> {
debug_assert!(
!sources.is_empty(),
"kg_timeline bench requires a seeded fixture"
);
let total = config.warmup + config.iterations;
let mut samples = Vec::with_capacity(config.iterations);
for i in 0..total {
let src = &sources[i % sources.len()];
let start = Instant::now();
let _ = db::kg_timeline(conn, src, None, None, None)?;
let elapsed = start.elapsed();
if i >= config.warmup {
samples.push(elapsed);
}
}
Ok(percentile_summary(
Operation::KgTimeline,
&samples,
config.scale,
))
}
fn seed_kg_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
let mut sources = Vec::with_capacity(KG_FIXTURE_SOURCES);
for s in 0..KG_FIXTURE_SOURCES {
let src = synth_memory(namespace, s, "kg-src");
let src_id = db::insert(conn, &src)?;
for t in 0..KG_FIXTURE_LINKS_PER_SOURCE {
let target_idx = s * KG_FIXTURE_LINKS_PER_SOURCE + t;
let tgt = synth_memory(namespace, target_idx, "kg-tgt");
let tgt_id = db::insert(conn, &tgt)?;
db::create_link(
conn,
&src_id,
&tgt_id,
crate::models::MemoryLinkRelation::RelatedTo.as_str(),
)?;
}
sources.push(src_id);
}
Ok(sources)
}
fn seed_kg_chain_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
let mut sources = Vec::with_capacity(KG_CHAIN_FIXTURE_CHAINS);
for c in 0..KG_CHAIN_FIXTURE_CHAINS {
let mut prev_id = {
let head = synth_memory(namespace, c, "kg-chain-src");
db::insert(conn, &head)?
};
let chain_head_id = prev_id.clone();
for h in 0..KG_CHAIN_FIXTURE_HOPS {
let node_idx = c * KG_CHAIN_FIXTURE_HOPS + h;
let next = synth_memory(namespace, node_idx, "kg-chain-node");
let next_id = db::insert(conn, &next)?;
db::create_link(
conn,
&prev_id,
&next_id,
crate::models::MemoryLinkRelation::RelatedTo.as_str(),
)?;
prev_id = next_id;
}
sources.push(chain_head_id);
}
Ok(sources)
}
fn seed_corpus(conn: &Connection, namespace: &str, prefix: &str, count: usize) -> Result<()> {
for i in 0..count {
let mem = synth_memory(namespace, i, prefix);
db::insert(conn, &mem)?;
}
Ok(())
}
fn synth_memory(namespace: &str, i: usize, prefix: &str) -> Memory {
let now = chrono::Utc::now().to_rfc3339();
Memory {
id: uuid::Uuid::new_v4().to_string(),
tier: Tier::Long,
namespace: namespace.to_string(),
title: format!("bench-{prefix}-{i}"),
content: format!(
"bench memory {i} content about topic {} category {} for {prefix} workload",
i % 50,
i % 10
),
tags: vec![],
priority: i32::try_from((i % 9) + 1).unwrap_or(5),
confidence: 1.0,
source: "bench".to_string(),
access_count: 0,
created_at: now.clone(),
updated_at: now,
last_accessed_at: None,
expires_at: None,
metadata: serde_json::json!({"agent_id": "bench"}),
reflection_depth: 0,
memory_kind: crate::models::MemoryKind::Observation,
entity_id: None,
persona_version: None,
citations: Vec::new(),
source_uri: None,
source_span: None,
confidence_source: ConfidenceSource::CallerProvided,
confidence_signals: None,
confidence_decayed_at: None,
version: 1,
}
}
fn percentile_summary(
operation: Operation,
samples: &[Duration],
scale: Option<usize>,
) -> OperationResult {
debug_assert!(
!samples.is_empty(),
"bench operation produced no samples; iterations must be > 0"
);
let mut sorted: Vec<f64> = samples.iter().map(duration_ms).collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let p50 = percentile(&sorted, 0.50);
let p95 = percentile(&sorted, 0.95);
let p99 = percentile(&sorted, 0.99);
let target = operation.target_p95_ms_at_scale(scale);
let effective_target = operation.effective_target_p95_ms_at_scale(scale);
let status = if p95 <= effective_target * P95_TOLERANCE {
Status::Pass
} else {
Status::Fail
};
OperationResult {
operation,
label: operation.label(),
target_p95_ms: target,
measured_p50_ms: p50,
measured_p95_ms: p95,
measured_p99_ms: p99,
samples: sorted.len(),
status,
}
}
fn duration_ms(d: &Duration) -> f64 {
let secs = d.as_secs_f64();
secs * 1000.0
}
#[allow(
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
fn percentile(sorted: &[f64], q: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
if sorted.len() == 1 {
return sorted[0];
}
let rank = q * (sorted.len() as f64 - 1.0);
let lo = rank.floor() as usize;
let hi = rank.ceil() as usize;
if lo == hi {
return sorted[lo];
}
let frac = rank - lo as f64;
sorted[lo] + (sorted[hi] - sorted[lo]) * frac
}
#[must_use]
pub fn render_table(results: &[OperationResult]) -> String {
let mut out = String::new();
out.push_str(
"Operation Target (p95) Measured (p95) p50 p99 Status\n",
);
out.push_str(
"─────────────────────────────────────────────────────────────────────────────────────────\n",
);
for r in results {
let status_str = match r.status {
Status::Pass => "PASS",
Status::Fail => "FAIL",
};
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let target_ms = r.target_p95_ms.round() as i64;
let line = format!(
"{:<30} < {:>4} ms {:>7.1} ms {:>5.1} {:>5.1} {}\n",
r.label, target_ms, r.measured_p95_ms, r.measured_p50_ms, r.measured_p99_ms, status_str
);
out.push_str(&line);
}
out
}
#[derive(Debug, Clone, Deserialize)]
pub struct BaselineRecord {
pub operation: Operation,
pub measured_p95_ms: f64,
}
#[derive(Debug, Clone, Deserialize)]
struct BaselineFile {
results: Vec<BaselineRecord>,
}
#[derive(Debug, Clone, Serialize)]
pub struct Regression {
pub operation: Operation,
pub label: &'static str,
pub baseline_p95_ms: f64,
pub measured_p95_ms: f64,
pub delta_pct: f64,
pub threshold_pct: f64,
pub regressed: bool,
}
pub fn load_baseline(path: &Path) -> Result<Vec<BaselineRecord>> {
let raw = std::fs::read_to_string(path)
.with_context(|| format!("failed to read baseline file: {}", path.display()))?;
let file: BaselineFile = serde_json::from_str(&raw)
.with_context(|| format!("failed to parse baseline JSON: {}", path.display()))?;
Ok(file.results)
}
#[must_use]
pub fn compare_against_baseline(
current: &[OperationResult],
baseline: &[BaselineRecord],
threshold_pct: f64,
) -> Vec<Regression> {
let mut out = Vec::with_capacity(current.len());
for r in current {
let Some(b) = baseline.iter().find(|b| b.operation == r.operation) else {
continue;
};
let delta_pct = if b.measured_p95_ms > 0.0 {
(r.measured_p95_ms - b.measured_p95_ms) / b.measured_p95_ms * 100.0
} else {
0.0
};
let regressed = delta_pct > threshold_pct;
out.push(Regression {
operation: r.operation,
label: r.operation.label(),
baseline_p95_ms: b.measured_p95_ms,
measured_p95_ms: r.measured_p95_ms,
delta_pct,
threshold_pct,
regressed,
});
}
out
}
#[must_use]
pub fn render_regression_table(rows: &[Regression]) -> String {
let mut out = String::new();
out.push_str(
"Operation Baseline (p95) Measured (p95) Delta Status\n",
);
out.push_str(
"─────────────────────────────────────────────────────────────────────────────────\n",
);
for r in rows {
let status_str = if r.regressed { "REGRESSION" } else { "OK" };
let line = format!(
"{:<30} {:>10.1} ms {:>10.1} ms {:>+6.1}% {}\n",
r.label, r.baseline_p95_ms, r.measured_p95_ms, r.delta_pct, status_str
);
out.push_str(&line);
}
out
}
pub fn append_history(
path: &std::path::Path,
captured_at: &str,
iterations: usize,
warmup: usize,
scale: Option<usize>,
results: &[OperationResult],
) -> Result<()> {
use std::fs::OpenOptions;
use std::io::Write;
if let Some(parent) = path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent)?;
}
let entry = serde_json::json!({
"captured_at": captured_at,
"iterations": iterations,
"warmup": warmup,
"scale": scale,
"results": results,
});
let mut file = OpenOptions::new().create(true).append(true).open(path)?;
writeln!(file, "{}", serde_json::to_string(&entry)?)?;
Ok(())
}
#[allow(clippy::wildcard_imports)]
mod tests {
use super::*;
use crate::db;
#[allow(dead_code)]
fn fresh_conn() -> Connection {
db::open(Path::new(":memory:")).unwrap()
}
#[allow(dead_code)]
fn small_config() -> BenchConfig {
BenchConfig {
iterations: 30,
warmup: 5,
namespace: "bench-test".to_string(),
scale: None,
}
}
#[test]
fn percentile_interpolates() {
let s = vec![1.0, 2.0, 3.0, 4.0];
assert!((percentile(&s, 0.50) - 2.5).abs() < 1e-9);
assert!((percentile(&s, 0.0) - 1.0).abs() < 1e-9);
assert!((percentile(&s, 1.0) - 4.0).abs() < 1e-9);
}
#[test]
fn percentile_handles_singleton_and_empty() {
assert!((percentile(&[], 0.5) - 0.0).abs() < 1e-9);
assert!((percentile(&[42.0], 0.99) - 42.0).abs() < 1e-9);
}
#[test]
fn run_returns_all_seven_results() {
let conn = fresh_conn();
let results = run(&conn, &small_config()).unwrap();
assert_eq!(results.len(), 7);
assert_eq!(results[0].operation, Operation::StoreNoEmbedding);
assert_eq!(results[1].operation, Operation::SearchFts);
assert_eq!(results[2].operation, Operation::RecallHot);
assert_eq!(results[3].operation, Operation::KgQueryDepth1);
assert_eq!(results[4].operation, Operation::KgQueryDepth3);
assert_eq!(results[5].operation, Operation::KgQueryDepth5);
assert_eq!(results[6].operation, Operation::KgTimeline);
for r in &results {
assert_eq!(r.samples, 30);
assert!(r.measured_p50_ms <= r.measured_p95_ms);
assert!(r.measured_p95_ms <= r.measured_p99_ms);
assert!(r.target_p95_ms > 0.0);
}
}
#[test]
fn status_is_fail_when_p95_over_tolerance() {
let r = OperationResult {
operation: Operation::StoreNoEmbedding,
label: Operation::StoreNoEmbedding.label(),
target_p95_ms: 20.0,
measured_p50_ms: 5.0,
measured_p95_ms: 25.0,
measured_p99_ms: 30.0,
samples: 100,
status: Status::Fail,
};
assert_eq!(r.status, Status::Fail);
let recomputed = if 25.0_f64 <= 20.0 * P95_TOLERANCE {
Status::Pass
} else {
Status::Fail
};
assert_eq!(recomputed, Status::Fail);
}
#[test]
fn status_is_pass_within_tolerance() {
let recomputed = if 21.0_f64 <= 20.0 * P95_TOLERANCE {
Status::Pass
} else {
Status::Fail
};
assert_eq!(recomputed, Status::Pass);
}
#[test]
fn render_table_includes_all_operations() {
let conn = fresh_conn();
let results = run(&conn, &small_config()).unwrap();
let table = render_table(&results);
assert!(table.contains("memory_store (no embedding)"));
assert!(table.contains("memory_search (FTS5)"));
assert!(table.contains("memory_recall (hot, depth=1)"));
assert!(table.contains("memory_kg_query (depth=1)"));
assert!(table.contains("memory_kg_query (depth=3)"));
assert!(table.contains("memory_kg_query (depth=5)"));
assert!(table.contains("memory_kg_timeline"));
assert!(table.contains("Status"));
}
#[test]
fn operation_targets_match_performance_md() {
assert!((Operation::StoreNoEmbedding.target_p95_ms() - 20.0).abs() < 1e-9);
assert!((Operation::SearchFts.target_p95_ms() - 100.0).abs() < 1e-9);
assert!((Operation::RecallHot.target_p95_ms() - 50.0).abs() < 1e-9);
assert!((Operation::KgQueryDepth1.target_p95_ms() - 100.0).abs() < 1e-9);
assert!((Operation::KgQueryDepth3.target_p95_ms() - 100.0).abs() < 1e-9);
assert!((Operation::KgQueryDepth5.target_p95_ms() - 250.0).abs() < 1e-9);
assert!((Operation::KgTimeline.target_p95_ms() - 100.0).abs() < 1e-9);
}
#[test]
fn effective_target_applies_macos_multiplier() {
for op in [
Operation::StoreNoEmbedding,
Operation::SearchFts,
Operation::RecallHot,
Operation::KgQueryDepth1,
Operation::KgQueryDepth3,
Operation::KgQueryDepth5,
Operation::KgTimeline,
] {
let expected = op.target_p95_ms() * MACOS_BUDGET_MULT;
assert!(
(op.effective_target_p95_ms() - expected).abs() < 1e-9,
"effective budget for {:?} = {} (expected {})",
op,
op.effective_target_p95_ms(),
expected,
);
}
#[cfg(target_os = "macos")]
assert!((MACOS_BUDGET_MULT - 3.0).abs() < 1e-9);
#[cfg(not(target_os = "macos"))]
assert!((MACOS_BUDGET_MULT - 1.0).abs() < 1e-9);
}
#[test]
fn operation_scale_targets_match_performance_md() {
let at_gate_scale = Some(CI_SCALE_GATE_ROWS);
assert!(
(Operation::StoreNoEmbedding.target_p95_ms_at_scale(at_gate_scale) - 120.0).abs()
< 1e-9
);
assert!((Operation::SearchFts.target_p95_ms_at_scale(at_gate_scale) - 60.0).abs() < 1e-9);
assert!((Operation::RecallHot.target_p95_ms_at_scale(at_gate_scale) - 80.0).abs() < 1e-9);
for op in [
Operation::KgQueryDepth1,
Operation::KgQueryDepth3,
Operation::KgQueryDepth5,
Operation::KgTimeline,
] {
assert!(
(op.target_p95_ms_at_scale(at_gate_scale) - op.target_p95_ms()).abs() < 1e-9,
"{op:?} must keep its canonical budget at scale"
);
}
assert!((Operation::RecallHot.target_p95_ms_at_scale(None) - 50.0).abs() < 1e-9);
}
#[test]
fn issue_1579_b8_scale_budget_bucket_resolution() {
assert_eq!(scale_budgets_for(500).scale, CI_SCALE_GATE_ROWS);
assert_eq!(
scale_budgets_for(CI_SCALE_GATE_ROWS).scale,
CI_SCALE_GATE_ROWS
);
assert_eq!(scale_budgets_for(MAX_SCALE).scale, CI_SCALE_GATE_ROWS);
}
#[test]
fn issue_1579_b8_scale_run_seeds_corpus_and_uses_scale_budgets() {
let conn = fresh_conn();
let ns = "bench-scale-test";
let config = BenchConfig {
iterations: 10,
warmup: 2,
namespace: ns.to_string(),
scale: Some(300),
};
let results = run(&conn, &config).unwrap();
assert_eq!(results.len(), 7);
let seeded: i64 = conn
.query_row(
"SELECT COUNT(*) FROM memories WHERE namespace = ?1",
[ns],
|r| r.get(0),
)
.unwrap();
assert!(
seeded >= 300,
"scale run must seed the scratch corpus; found {seeded} rows"
);
let store = &results[0];
assert_eq!(store.operation, Operation::StoreNoEmbedding);
assert!((store.target_p95_ms - 120.0).abs() < 1e-9);
let search = &results[1];
assert!((search.target_p95_ms - 60.0).abs() < 1e-9);
let recall = &results[2];
assert!((recall.target_p95_ms - 80.0).abs() < 1e-9);
assert!((results[3].target_p95_ms - 100.0).abs() < 1e-9);
assert!((results[5].target_p95_ms - 250.0).abs() < 1e-9);
}
#[test]
fn seed_kg_chain_fixture_traverses_to_max_depth() {
let conn = fresh_conn();
let sources = seed_kg_chain_fixture(&conn, "kg-chain-fixture-test").unwrap();
assert_eq!(sources.len(), KG_CHAIN_FIXTURE_CHAINS);
for src in &sources {
let depth5 =
db::kg_query(&conn, src, KG_CHAIN_FIXTURE_HOPS, None, None, None, false).unwrap();
assert_eq!(
depth5.len(),
KG_CHAIN_FIXTURE_HOPS,
"depth={KG_CHAIN_FIXTURE_HOPS} on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach every node"
);
let depth3 = db::kg_query(&conn, src, 3, None, None, None, false).unwrap();
assert_eq!(
depth3.len(),
3,
"depth=3 on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach exactly 3 follow-on nodes"
);
}
}
#[test]
fn seed_kg_fixture_populates_sources_and_links() {
let conn = fresh_conn();
let sources = seed_kg_fixture(&conn, "kg-fixture-test").unwrap();
assert_eq!(sources.len(), KG_FIXTURE_SOURCES);
for src in &sources {
let nodes = db::kg_query(&conn, src, 1, None, None, None, false).unwrap();
assert_eq!(nodes.len(), KG_FIXTURE_LINKS_PER_SOURCE);
let timeline = db::kg_timeline(&conn, src, None, None, None).unwrap();
assert_eq!(timeline.len(), KG_FIXTURE_LINKS_PER_SOURCE);
for ev in &timeline {
assert!(
!ev.valid_from.is_empty(),
"kg fixture must stamp valid_from on every link"
);
}
}
}
#[allow(dead_code)]
fn synthetic_result(op: Operation, p95: f64) -> OperationResult {
OperationResult {
operation: op,
label: op.label(),
target_p95_ms: op.target_p95_ms(),
measured_p50_ms: p95 / 2.0,
measured_p95_ms: p95,
measured_p99_ms: p95 * 1.1,
samples: 100,
status: Status::Pass,
}
}
#[allow(dead_code)]
fn synthetic_baseline(op: Operation, p95: f64) -> BaselineRecord {
BaselineRecord {
operation: op,
measured_p95_ms: p95,
}
}
#[test]
fn baseline_compare_flags_above_threshold() {
let current = vec![synthetic_result(Operation::StoreNoEmbedding, 11.2)];
let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
let rows = compare_against_baseline(¤t, &baseline, 10.0);
assert_eq!(rows.len(), 1);
assert!(rows[0].regressed);
assert!((rows[0].delta_pct - 12.0).abs() < 1e-9);
}
#[test]
fn baseline_compare_passes_within_threshold() {
let current = vec![synthetic_result(Operation::StoreNoEmbedding, 10.8)];
let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
let rows = compare_against_baseline(¤t, &baseline, 10.0);
assert_eq!(rows.len(), 1);
assert!(!rows[0].regressed);
}
#[test]
fn baseline_compare_speedup_is_negative_delta() {
let current = vec![synthetic_result(Operation::SearchFts, 8.0)];
let baseline = vec![synthetic_baseline(Operation::SearchFts, 10.0)];
let rows = compare_against_baseline(¤t, &baseline, 10.0);
assert_eq!(rows.len(), 1);
assert!(!rows[0].regressed);
assert!((rows[0].delta_pct + 20.0).abs() < 1e-9);
}
#[test]
fn baseline_compare_skips_ops_missing_in_baseline() {
let current = vec![
synthetic_result(Operation::StoreNoEmbedding, 10.0),
synthetic_result(Operation::KgQueryDepth5, 200.0),
];
let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
let rows = compare_against_baseline(¤t, &baseline, 10.0);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].operation, Operation::StoreNoEmbedding);
}
#[test]
fn baseline_compare_handles_zero_baseline() {
let current = vec![synthetic_result(Operation::SearchFts, 5.0)];
let baseline = vec![synthetic_baseline(Operation::SearchFts, 0.0)];
let rows = compare_against_baseline(¤t, &baseline, 10.0);
assert_eq!(rows.len(), 1);
assert!(!rows[0].regressed);
assert!((rows[0].delta_pct - 0.0).abs() < 1e-9);
}
#[test]
fn load_baseline_round_trips_json_payload() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("baseline.json");
let payload = serde_json::json!({
"iterations": 200,
"warmup": 20,
"results": [
{
"operation": "store_no_embedding",
"label": "memory_store (no embedding)",
"target_p95_ms": 20.0,
"measured_p50_ms": 4.0,
"measured_p95_ms": 9.0,
"measured_p99_ms": 11.0,
"samples": 200,
"status": "pass"
},
{
"operation": "search_fts",
"label": "memory_search (FTS5)",
"target_p95_ms": 100.0,
"measured_p50_ms": 12.0,
"measured_p95_ms": 31.0,
"measured_p99_ms": 45.0,
"samples": 200,
"status": "pass"
}
]
});
std::fs::write(&path, serde_json::to_string_pretty(&payload).unwrap()).unwrap();
let loaded = load_baseline(&path).unwrap();
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0].operation, Operation::StoreNoEmbedding);
assert!((loaded[0].measured_p95_ms - 9.0).abs() < 1e-9);
assert_eq!(loaded[1].operation, Operation::SearchFts);
assert!((loaded[1].measured_p95_ms - 31.0).abs() < 1e-9);
}
#[test]
fn render_regression_table_marks_regressions() {
let rows = vec![
Regression {
operation: Operation::StoreNoEmbedding,
label: Operation::StoreNoEmbedding.label(),
baseline_p95_ms: 10.0,
measured_p95_ms: 12.0,
delta_pct: 20.0,
threshold_pct: 10.0,
regressed: true,
},
Regression {
operation: Operation::SearchFts,
label: Operation::SearchFts.label(),
baseline_p95_ms: 30.0,
measured_p95_ms: 31.0,
delta_pct: 3.3,
threshold_pct: 10.0,
regressed: false,
},
];
let table = render_regression_table(&rows);
assert!(table.contains("memory_store (no embedding)"));
assert!(table.contains("memory_search (FTS5)"));
assert!(table.contains("REGRESSION"));
assert!(table.contains("OK"));
}
}