1use crate::models::ConfidenceSource;
32use anyhow::{Context, Result};
33use rusqlite::Connection;
34use serde::{Deserialize, Serialize};
35use std::path::Path;
36use std::time::{Duration, Instant};
37
38use crate::db;
39use crate::models::{Memory, Tier};
40
41pub const P95_TOLERANCE: f64 = 1.10;
44
45#[cfg(target_os = "macos")]
61pub const MACOS_BUDGET_MULT: f64 = 3.0;
62#[cfg(not(target_os = "macos"))]
63pub const MACOS_BUDGET_MULT: f64 = 1.0;
64
65pub const BENCH_NAMESPACE: &str = "ai-memory-bench";
67
68pub const DEFAULT_ITERATIONS: usize = 200;
71
72pub const DEFAULT_WARMUP: usize = 20;
74
75pub const MAX_ITERATIONS: usize = 100_000;
78
79pub const MAX_WARMUP: usize = 10_000;
81
82pub const MAX_REGRESSION_THRESHOLD_PCT: f64 = 1000.0;
85
86pub const CI_SCALE_GATE_ROWS: usize = 10_000;
92
93pub const MAX_SCALE: usize = 1_000_000;
97
98#[derive(Debug, Clone, Copy)]
105pub struct ScaleBudgets {
106 pub scale: usize,
110 pub store_no_embedding_ms: f64,
112 pub search_fts_ms: f64,
114 pub recall_hot_ms: f64,
116}
117
118pub const SCALE_BUDGETS: &[ScaleBudgets] = &[ScaleBudgets {
128 scale: CI_SCALE_GATE_ROWS,
129 store_no_embedding_ms: 120.0,
130 search_fts_ms: 60.0,
131 recall_hot_ms: 80.0,
132}];
133
134#[must_use]
138pub fn scale_budgets_for(requested: usize) -> ScaleBudgets {
139 for row in SCALE_BUDGETS {
140 if row.scale >= requested {
141 return *row;
142 }
143 }
144 *SCALE_BUDGETS
145 .last()
146 .expect("SCALE_BUDGETS table must be non-empty")
147}
148
149pub const DEFAULT_REGRESSION_THRESHOLD_PCT: f64 = 10.0;
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Operation {
161 StoreNoEmbedding,
163 SearchFts,
165 RecallHot,
167 KgQueryDepth1,
170 KgQueryDepth3,
175 KgQueryDepth5,
179 KgTimeline,
181}
182
183impl Operation {
184 #[must_use]
185 pub fn label(self) -> &'static str {
186 match self {
187 Self::StoreNoEmbedding => "memory_store (no embedding)",
188 Self::SearchFts => "memory_search (FTS5)",
189 Self::RecallHot => "memory_recall (hot, depth=1)",
190 Self::KgQueryDepth1 => "memory_kg_query (depth=1)",
191 Self::KgQueryDepth3 => "memory_kg_query (depth=3)",
192 Self::KgQueryDepth5 => "memory_kg_query (depth=5)",
193 Self::KgTimeline => crate::mcp::registry::tool_names::MEMORY_KG_TIMELINE,
194 }
195 }
196
197 #[must_use]
209 #[allow(clippy::match_same_arms)]
210 pub fn target_p95_ms(self) -> f64 {
211 match self {
212 Self::StoreNoEmbedding => 20.0,
213 Self::SearchFts => 100.0,
214 Self::RecallHot => 50.0,
215 Self::KgQueryDepth1 => 100.0,
216 Self::KgQueryDepth3 => 100.0,
217 Self::KgQueryDepth5 => 250.0,
218 Self::KgTimeline => 100.0,
219 }
220 }
221
222 #[must_use]
229 pub fn effective_target_p95_ms(self) -> f64 {
230 self.target_p95_ms() * MACOS_BUDGET_MULT
231 }
232
233 #[must_use]
241 pub fn target_p95_ms_at_scale(self, scale: Option<usize>) -> f64 {
242 let Some(rows) = scale else {
243 return self.target_p95_ms();
244 };
245 let budgets = scale_budgets_for(rows);
246 match self {
247 Self::StoreNoEmbedding => budgets.store_no_embedding_ms,
248 Self::SearchFts => budgets.search_fts_ms,
249 Self::RecallHot => budgets.recall_hot_ms,
250 Self::KgQueryDepth1 | Self::KgQueryDepth3 | Self::KgQueryDepth5 | Self::KgTimeline => {
251 self.target_p95_ms()
252 }
253 }
254 }
255
256 #[must_use]
260 pub fn effective_target_p95_ms_at_scale(self, scale: Option<usize>) -> f64 {
261 self.target_p95_ms_at_scale(scale) * MACOS_BUDGET_MULT
262 }
263}
264
265#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
266#[serde(rename_all = "snake_case")]
267pub enum Status {
268 Pass,
269 Fail,
270}
271
272#[derive(Debug, Clone, Serialize)]
273pub struct OperationResult {
274 pub operation: Operation,
275 pub label: &'static str,
277 pub target_p95_ms: f64,
278 pub measured_p50_ms: f64,
279 pub measured_p95_ms: f64,
280 pub measured_p99_ms: f64,
281 pub samples: usize,
282 pub status: Status,
283}
284
285#[derive(Debug, Clone)]
286pub struct BenchConfig {
287 pub iterations: usize,
288 pub warmup: usize,
289 pub namespace: String,
290 pub scale: Option<usize>,
296}
297
298impl Default for BenchConfig {
299 fn default() -> Self {
300 Self {
301 iterations: DEFAULT_ITERATIONS,
302 warmup: DEFAULT_WARMUP,
303 namespace: BENCH_NAMESPACE.to_string(),
304 scale: None,
305 }
306 }
307}
308
309pub fn run(conn: &Connection, config: &BenchConfig) -> Result<Vec<OperationResult>> {
320 if let Some(rows) = config.scale {
328 seed_corpus(conn, &config.namespace, "scale", rows)?;
329 }
330 let store = run_store_no_embedding(conn, config)?;
331 let search = run_search_fts(conn, config)?;
332 let recall = run_recall_hot(conn, config)?;
333 let kg_sources = seed_kg_fixture(conn, &config.namespace)?;
334 let kg_query = run_kg_query_depth1(conn, config, &kg_sources)?;
335 let kg_chain_sources = seed_kg_chain_fixture(conn, &config.namespace)?;
336 let kg_query_d3 =
337 run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth3, 3)?;
338 let kg_query_d5 =
339 run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth5, 5)?;
340 let kg_timeline = run_kg_timeline(conn, config, &kg_sources)?;
341 Ok(vec![
342 store,
343 search,
344 recall,
345 kg_query,
346 kg_query_d3,
347 kg_query_d5,
348 kg_timeline,
349 ])
350}
351
352fn run_store_no_embedding(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
353 let total = config.warmup + config.iterations;
354 let mut samples = Vec::with_capacity(config.iterations);
355 for i in 0..total {
356 let mem = synth_memory(&config.namespace, i, "store");
357 let start = Instant::now();
358 db::insert(conn, &mem)?;
359 let elapsed = start.elapsed();
360 if i >= config.warmup {
361 samples.push(elapsed);
362 }
363 }
364 Ok(percentile_summary(
365 Operation::StoreNoEmbedding,
366 &samples,
367 config.scale,
368 ))
369}
370
371fn run_search_fts(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
372 seed_corpus(conn, &config.namespace, "search", 200)?;
373 let total = config.warmup + config.iterations;
374 let mut samples = Vec::with_capacity(config.iterations);
375 for i in 0..total {
376 let query = format!("topic-{}", i % 50);
377 let start = Instant::now();
378 let _ = db::search(
379 conn,
380 &query,
381 Some(&config.namespace),
382 None,
383 10,
384 None,
385 None,
386 None,
387 None,
388 None,
389 None,
390 false,
391 )?;
392 let elapsed = start.elapsed();
393 if i >= config.warmup {
394 samples.push(elapsed);
395 }
396 }
397 Ok(percentile_summary(
398 Operation::SearchFts,
399 &samples,
400 config.scale,
401 ))
402}
403
404fn run_recall_hot(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
405 seed_corpus(conn, &config.namespace, "recall", 200)?;
406 let warmup_query = "topic 0 category 0";
407 for _ in 0..config.warmup {
408 let _ = db::recall(
409 conn,
410 warmup_query,
411 Some(&config.namespace),
412 10,
413 None,
414 None,
415 None,
416 0,
417 0,
418 None,
419 None,
420 false,
421 None,
422 )?;
423 }
424 let mut samples = Vec::with_capacity(config.iterations);
425 for i in 0..config.iterations {
426 let query = format!("topic {} category {}", i % 50, i % 10);
427 let start = Instant::now();
428 let _ = db::recall(
429 conn,
430 &query,
431 Some(&config.namespace),
432 10,
433 None,
434 None,
435 None,
436 0,
437 0,
438 None,
439 None,
440 false,
441 None,
442 )?;
443 samples.push(start.elapsed());
444 }
445 Ok(percentile_summary(
446 Operation::RecallHot,
447 &samples,
448 config.scale,
449 ))
450}
451
452const KG_FIXTURE_SOURCES: usize = 50;
456const KG_FIXTURE_LINKS_PER_SOURCE: usize = 4;
457
458const KG_CHAIN_FIXTURE_CHAINS: usize = 50;
465const KG_CHAIN_FIXTURE_HOPS: usize = 5;
466
467fn run_kg_query_depth1(
468 conn: &Connection,
469 config: &BenchConfig,
470 sources: &[String],
471) -> Result<OperationResult> {
472 debug_assert!(
473 !sources.is_empty(),
474 "kg_query bench requires a seeded fixture"
475 );
476 let total = config.warmup + config.iterations;
477 let mut samples = Vec::with_capacity(config.iterations);
478 for i in 0..total {
479 let src = &sources[i % sources.len()];
480 let start = Instant::now();
481 let _ = db::kg_query(conn, src, 1, None, None, None, false)?;
482 let elapsed = start.elapsed();
483 if i >= config.warmup {
484 samples.push(elapsed);
485 }
486 }
487 Ok(percentile_summary(
488 Operation::KgQueryDepth1,
489 &samples,
490 config.scale,
491 ))
492}
493
494fn run_kg_query_chain(
495 conn: &Connection,
496 config: &BenchConfig,
497 sources: &[String],
498 operation: Operation,
499 max_depth: usize,
500) -> Result<OperationResult> {
501 debug_assert!(
502 !sources.is_empty(),
503 "kg_query chain bench requires a seeded fixture"
504 );
505 let total = config.warmup + config.iterations;
506 let mut samples = Vec::with_capacity(config.iterations);
507 for i in 0..total {
508 let src = &sources[i % sources.len()];
509 let start = Instant::now();
510 let _ = db::kg_query(conn, src, max_depth, None, None, None, false)?;
511 let elapsed = start.elapsed();
512 if i >= config.warmup {
513 samples.push(elapsed);
514 }
515 }
516 Ok(percentile_summary(operation, &samples, config.scale))
517}
518
519fn run_kg_timeline(
520 conn: &Connection,
521 config: &BenchConfig,
522 sources: &[String],
523) -> Result<OperationResult> {
524 debug_assert!(
525 !sources.is_empty(),
526 "kg_timeline bench requires a seeded fixture"
527 );
528 let total = config.warmup + config.iterations;
529 let mut samples = Vec::with_capacity(config.iterations);
530 for i in 0..total {
531 let src = &sources[i % sources.len()];
532 let start = Instant::now();
533 let _ = db::kg_timeline(conn, src, None, None, None)?;
534 let elapsed = start.elapsed();
535 if i >= config.warmup {
536 samples.push(elapsed);
537 }
538 }
539 Ok(percentile_summary(
540 Operation::KgTimeline,
541 &samples,
542 config.scale,
543 ))
544}
545
546fn seed_kg_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
552 let mut sources = Vec::with_capacity(KG_FIXTURE_SOURCES);
553 for s in 0..KG_FIXTURE_SOURCES {
554 let src = synth_memory(namespace, s, "kg-src");
555 let src_id = db::insert(conn, &src)?;
560 for t in 0..KG_FIXTURE_LINKS_PER_SOURCE {
561 let target_idx = s * KG_FIXTURE_LINKS_PER_SOURCE + t;
562 let tgt = synth_memory(namespace, target_idx, "kg-tgt");
563 let tgt_id = db::insert(conn, &tgt)?;
564 db::create_link(
568 conn,
569 &src_id,
570 &tgt_id,
571 crate::models::MemoryLinkRelation::RelatedTo.as_str(),
572 )?;
573 }
574 sources.push(src_id);
575 }
576 Ok(sources)
577}
578
579fn seed_kg_chain_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
587 let mut sources = Vec::with_capacity(KG_CHAIN_FIXTURE_CHAINS);
588 for c in 0..KG_CHAIN_FIXTURE_CHAINS {
589 let mut prev_id = {
590 let head = synth_memory(namespace, c, "kg-chain-src");
591 db::insert(conn, &head)?
592 };
593 let chain_head_id = prev_id.clone();
594 for h in 0..KG_CHAIN_FIXTURE_HOPS {
595 let node_idx = c * KG_CHAIN_FIXTURE_HOPS + h;
596 let next = synth_memory(namespace, node_idx, "kg-chain-node");
597 let next_id = db::insert(conn, &next)?;
598 db::create_link(
599 conn,
600 &prev_id,
601 &next_id,
602 crate::models::MemoryLinkRelation::RelatedTo.as_str(),
603 )?;
604 prev_id = next_id;
605 }
606 sources.push(chain_head_id);
607 }
608 Ok(sources)
609}
610
611fn seed_corpus(conn: &Connection, namespace: &str, prefix: &str, count: usize) -> Result<()> {
612 for i in 0..count {
613 let mem = synth_memory(namespace, i, prefix);
614 db::insert(conn, &mem)?;
615 }
616 Ok(())
617}
618
619fn synth_memory(namespace: &str, i: usize, prefix: &str) -> Memory {
620 let now = chrono::Utc::now().to_rfc3339();
621 Memory {
622 id: uuid::Uuid::new_v4().to_string(),
623 tier: Tier::Long,
624 namespace: namespace.to_string(),
625 title: format!("bench-{prefix}-{i}"),
626 content: format!(
627 "bench memory {i} content about topic {} category {} for {prefix} workload",
628 i % 50,
629 i % 10
630 ),
631 tags: vec![],
632 priority: i32::try_from((i % 9) + 1).unwrap_or(5),
633 confidence: 1.0,
634 source: "bench".to_string(),
635 access_count: 0,
636 created_at: now.clone(),
637 updated_at: now,
638 last_accessed_at: None,
639 expires_at: None,
640 metadata: serde_json::json!({"agent_id": "bench"}),
641 reflection_depth: 0,
642 memory_kind: crate::models::MemoryKind::Observation,
643 entity_id: None,
644 persona_version: None,
645 citations: Vec::new(),
646 source_uri: None,
647 source_span: None,
648 confidence_source: ConfidenceSource::CallerProvided,
649 confidence_signals: None,
650 confidence_decayed_at: None,
651 version: 1,
652 }
653}
654
655fn percentile_summary(
656 operation: Operation,
657 samples: &[Duration],
658 scale: Option<usize>,
660) -> OperationResult {
661 debug_assert!(
662 !samples.is_empty(),
663 "bench operation produced no samples; iterations must be > 0"
664 );
665 let mut sorted: Vec<f64> = samples.iter().map(duration_ms).collect();
666 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
667 let p50 = percentile(&sorted, 0.50);
668 let p95 = percentile(&sorted, 0.95);
669 let p99 = percentile(&sorted, 0.99);
670 let target = operation.target_p95_ms_at_scale(scale);
674 let effective_target = operation.effective_target_p95_ms_at_scale(scale);
679 let status = if p95 <= effective_target * P95_TOLERANCE {
680 Status::Pass
681 } else {
682 Status::Fail
683 };
684 OperationResult {
685 operation,
686 label: operation.label(),
687 target_p95_ms: target,
688 measured_p50_ms: p50,
689 measured_p95_ms: p95,
690 measured_p99_ms: p99,
691 samples: sorted.len(),
692 status,
693 }
694}
695
696fn duration_ms(d: &Duration) -> f64 {
697 let secs = d.as_secs_f64();
698 secs * 1000.0
699}
700
701#[allow(
702 clippy::cast_precision_loss,
703 clippy::cast_sign_loss,
704 clippy::cast_possible_truncation
705)]
706fn percentile(sorted: &[f64], q: f64) -> f64 {
707 if sorted.is_empty() {
708 return 0.0;
709 }
710 if sorted.len() == 1 {
711 return sorted[0];
712 }
713 let rank = q * (sorted.len() as f64 - 1.0);
714 let lo = rank.floor() as usize;
715 let hi = rank.ceil() as usize;
716 if lo == hi {
717 return sorted[lo];
718 }
719 let frac = rank - lo as f64;
720 sorted[lo] + (sorted[hi] - sorted[lo]) * frac
721}
722
723#[must_use]
726pub fn render_table(results: &[OperationResult]) -> String {
727 let mut out = String::new();
728 out.push_str(
729 "Operation Target (p95) Measured (p95) p50 p99 Status\n",
730 );
731 out.push_str(
732 "─────────────────────────────────────────────────────────────────────────────────────────\n",
733 );
734 for r in results {
735 let status_str = match r.status {
736 Status::Pass => "PASS",
737 Status::Fail => "FAIL",
738 };
739 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
744 let target_ms = r.target_p95_ms.round() as i64;
745 let line = format!(
746 "{:<30} < {:>4} ms {:>7.1} ms {:>5.1} {:>5.1} {}\n",
747 r.label, target_ms, r.measured_p95_ms, r.measured_p50_ms, r.measured_p99_ms, status_str
748 );
749 out.push_str(&line);
750 }
751 out
752}
753
754#[derive(Debug, Clone, Deserialize)]
759pub struct BaselineRecord {
760 pub operation: Operation,
761 pub measured_p95_ms: f64,
762}
763
764#[derive(Debug, Clone, Deserialize)]
770struct BaselineFile {
771 results: Vec<BaselineRecord>,
772}
773
774#[derive(Debug, Clone, Serialize)]
777pub struct Regression {
778 pub operation: Operation,
779 pub label: &'static str,
781 pub baseline_p95_ms: f64,
782 pub measured_p95_ms: f64,
783 pub delta_pct: f64,
784 pub threshold_pct: f64,
785 pub regressed: bool,
786}
787
788pub fn load_baseline(path: &Path) -> Result<Vec<BaselineRecord>> {
795 let raw = std::fs::read_to_string(path)
796 .with_context(|| format!("failed to read baseline file: {}", path.display()))?;
797 let file: BaselineFile = serde_json::from_str(&raw)
798 .with_context(|| format!("failed to parse baseline JSON: {}", path.display()))?;
799 Ok(file.results)
800}
801
802#[must_use]
807pub fn compare_against_baseline(
808 current: &[OperationResult],
809 baseline: &[BaselineRecord],
810 threshold_pct: f64,
811) -> Vec<Regression> {
812 let mut out = Vec::with_capacity(current.len());
813 for r in current {
814 let Some(b) = baseline.iter().find(|b| b.operation == r.operation) else {
815 continue;
816 };
817 let delta_pct = if b.measured_p95_ms > 0.0 {
823 (r.measured_p95_ms - b.measured_p95_ms) / b.measured_p95_ms * 100.0
824 } else {
825 0.0
826 };
827 let regressed = delta_pct > threshold_pct;
828 out.push(Regression {
829 operation: r.operation,
830 label: r.operation.label(),
831 baseline_p95_ms: b.measured_p95_ms,
832 measured_p95_ms: r.measured_p95_ms,
833 delta_pct,
834 threshold_pct,
835 regressed,
836 });
837 }
838 out
839}
840
841#[must_use]
844pub fn render_regression_table(rows: &[Regression]) -> String {
845 let mut out = String::new();
846 out.push_str(
847 "Operation Baseline (p95) Measured (p95) Delta Status\n",
848 );
849 out.push_str(
850 "─────────────────────────────────────────────────────────────────────────────────\n",
851 );
852 for r in rows {
853 let status_str = if r.regressed { "REGRESSION" } else { "OK" };
854 let line = format!(
855 "{:<30} {:>10.1} ms {:>10.1} ms {:>+6.1}% {}\n",
856 r.label, r.baseline_p95_ms, r.measured_p95_ms, r.delta_pct, status_str
857 );
858 out.push_str(&line);
859 }
860 out
861}
862
863pub fn append_history(
868 path: &std::path::Path,
869 captured_at: &str,
870 iterations: usize,
871 warmup: usize,
872 scale: Option<usize>,
876 results: &[OperationResult],
877) -> Result<()> {
878 use std::fs::OpenOptions;
879 use std::io::Write;
880
881 if let Some(parent) = path.parent()
883 && !parent.as_os_str().is_empty()
884 {
885 std::fs::create_dir_all(parent)?;
886 }
887
888 let entry = serde_json::json!({
889 "captured_at": captured_at,
890 "iterations": iterations,
891 "warmup": warmup,
892 "scale": scale,
893 "results": results,
894 });
895
896 let mut file = OpenOptions::new().create(true).append(true).open(path)?;
897
898 writeln!(file, "{}", serde_json::to_string(&entry)?)?;
899 Ok(())
900}
901
902#[allow(clippy::wildcard_imports)]
903mod tests {
904 use super::*;
905 use crate::db;
906
907 #[allow(dead_code)]
908 fn fresh_conn() -> Connection {
909 db::open(Path::new(":memory:")).unwrap()
910 }
911
912 #[allow(dead_code)]
913 fn small_config() -> BenchConfig {
914 BenchConfig {
915 iterations: 30,
916 warmup: 5,
917 namespace: "bench-test".to_string(),
918 scale: None,
919 }
920 }
921
922 #[test]
923 fn percentile_interpolates() {
924 let s = vec![1.0, 2.0, 3.0, 4.0];
925 assert!((percentile(&s, 0.50) - 2.5).abs() < 1e-9);
926 assert!((percentile(&s, 0.0) - 1.0).abs() < 1e-9);
927 assert!((percentile(&s, 1.0) - 4.0).abs() < 1e-9);
928 }
929
930 #[test]
931 fn percentile_handles_singleton_and_empty() {
932 assert!((percentile(&[], 0.5) - 0.0).abs() < 1e-9);
933 assert!((percentile(&[42.0], 0.99) - 42.0).abs() < 1e-9);
934 }
935
936 #[test]
937 fn run_returns_all_seven_results() {
938 let conn = fresh_conn();
939 let results = run(&conn, &small_config()).unwrap();
940 assert_eq!(results.len(), 7);
941 assert_eq!(results[0].operation, Operation::StoreNoEmbedding);
942 assert_eq!(results[1].operation, Operation::SearchFts);
943 assert_eq!(results[2].operation, Operation::RecallHot);
944 assert_eq!(results[3].operation, Operation::KgQueryDepth1);
945 assert_eq!(results[4].operation, Operation::KgQueryDepth3);
946 assert_eq!(results[5].operation, Operation::KgQueryDepth5);
947 assert_eq!(results[6].operation, Operation::KgTimeline);
948 for r in &results {
949 assert_eq!(r.samples, 30);
950 assert!(r.measured_p50_ms <= r.measured_p95_ms);
951 assert!(r.measured_p95_ms <= r.measured_p99_ms);
952 assert!(r.target_p95_ms > 0.0);
953 }
954 }
955
956 #[test]
957 fn status_is_fail_when_p95_over_tolerance() {
958 let r = OperationResult {
959 operation: Operation::StoreNoEmbedding,
960 label: Operation::StoreNoEmbedding.label(),
961 target_p95_ms: 20.0,
962 measured_p50_ms: 5.0,
963 measured_p95_ms: 25.0,
964 measured_p99_ms: 30.0,
965 samples: 100,
966 status: Status::Fail,
967 };
968 assert_eq!(r.status, Status::Fail);
969 let recomputed = if 25.0_f64 <= 20.0 * P95_TOLERANCE {
971 Status::Pass
972 } else {
973 Status::Fail
974 };
975 assert_eq!(recomputed, Status::Fail);
976 }
977
978 #[test]
979 fn status_is_pass_within_tolerance() {
980 let recomputed = if 21.0_f64 <= 20.0 * P95_TOLERANCE {
982 Status::Pass
983 } else {
984 Status::Fail
985 };
986 assert_eq!(recomputed, Status::Pass);
987 }
988
989 #[test]
990 fn render_table_includes_all_operations() {
991 let conn = fresh_conn();
992 let results = run(&conn, &small_config()).unwrap();
993 let table = render_table(&results);
994 assert!(table.contains("memory_store (no embedding)"));
995 assert!(table.contains("memory_search (FTS5)"));
996 assert!(table.contains("memory_recall (hot, depth=1)"));
997 assert!(table.contains("memory_kg_query (depth=1)"));
998 assert!(table.contains("memory_kg_query (depth=3)"));
999 assert!(table.contains("memory_kg_query (depth=5)"));
1000 assert!(table.contains("memory_kg_timeline"));
1001 assert!(table.contains("Status"));
1002 }
1003
1004 #[test]
1005 fn operation_targets_match_performance_md() {
1006 assert!((Operation::StoreNoEmbedding.target_p95_ms() - 20.0).abs() < 1e-9);
1008 assert!((Operation::SearchFts.target_p95_ms() - 100.0).abs() < 1e-9);
1009 assert!((Operation::RecallHot.target_p95_ms() - 50.0).abs() < 1e-9);
1010 assert!((Operation::KgQueryDepth1.target_p95_ms() - 100.0).abs() < 1e-9);
1011 assert!((Operation::KgQueryDepth3.target_p95_ms() - 100.0).abs() < 1e-9);
1012 assert!((Operation::KgQueryDepth5.target_p95_ms() - 250.0).abs() < 1e-9);
1013 assert!((Operation::KgTimeline.target_p95_ms() - 100.0).abs() < 1e-9);
1014 }
1015
1016 #[test]
1023 fn effective_target_applies_macos_multiplier() {
1024 for op in [
1025 Operation::StoreNoEmbedding,
1026 Operation::SearchFts,
1027 Operation::RecallHot,
1028 Operation::KgQueryDepth1,
1029 Operation::KgQueryDepth3,
1030 Operation::KgQueryDepth5,
1031 Operation::KgTimeline,
1032 ] {
1033 let expected = op.target_p95_ms() * MACOS_BUDGET_MULT;
1034 assert!(
1035 (op.effective_target_p95_ms() - expected).abs() < 1e-9,
1036 "effective budget for {:?} = {} (expected {})",
1037 op,
1038 op.effective_target_p95_ms(),
1039 expected,
1040 );
1041 }
1042 #[cfg(target_os = "macos")]
1043 assert!((MACOS_BUDGET_MULT - 3.0).abs() < 1e-9);
1044 #[cfg(not(target_os = "macos"))]
1045 assert!((MACOS_BUDGET_MULT - 1.0).abs() < 1e-9);
1046 }
1047
1048 #[test]
1052 fn operation_scale_targets_match_performance_md() {
1053 let at_gate_scale = Some(CI_SCALE_GATE_ROWS);
1054 assert!(
1055 (Operation::StoreNoEmbedding.target_p95_ms_at_scale(at_gate_scale) - 120.0).abs()
1056 < 1e-9
1057 );
1058 assert!((Operation::SearchFts.target_p95_ms_at_scale(at_gate_scale) - 60.0).abs() < 1e-9);
1059 assert!((Operation::RecallHot.target_p95_ms_at_scale(at_gate_scale) - 80.0).abs() < 1e-9);
1060 for op in [
1062 Operation::KgQueryDepth1,
1063 Operation::KgQueryDepth3,
1064 Operation::KgQueryDepth5,
1065 Operation::KgTimeline,
1066 ] {
1067 assert!(
1068 (op.target_p95_ms_at_scale(at_gate_scale) - op.target_p95_ms()).abs() < 1e-9,
1069 "{op:?} must keep its canonical budget at scale"
1070 );
1071 }
1072 assert!((Operation::RecallHot.target_p95_ms_at_scale(None) - 50.0).abs() < 1e-9);
1074 }
1075
1076 #[test]
1080 fn issue_1579_b8_scale_budget_bucket_resolution() {
1081 assert_eq!(scale_budgets_for(500).scale, CI_SCALE_GATE_ROWS);
1082 assert_eq!(
1083 scale_budgets_for(CI_SCALE_GATE_ROWS).scale,
1084 CI_SCALE_GATE_ROWS
1085 );
1086 assert_eq!(scale_budgets_for(MAX_SCALE).scale, CI_SCALE_GATE_ROWS);
1087 }
1088
1089 #[test]
1093 fn issue_1579_b8_scale_run_seeds_corpus_and_uses_scale_budgets() {
1094 let conn = fresh_conn();
1095 let ns = "bench-scale-test";
1096 let config = BenchConfig {
1097 iterations: 10,
1098 warmup: 2,
1099 namespace: ns.to_string(),
1100 scale: Some(300),
1101 };
1102 let results = run(&conn, &config).unwrap();
1103 assert_eq!(results.len(), 7);
1104 let seeded: i64 = conn
1105 .query_row(
1106 "SELECT COUNT(*) FROM memories WHERE namespace = ?1",
1107 [ns],
1108 |r| r.get(0),
1109 )
1110 .unwrap();
1111 assert!(
1112 seeded >= 300,
1113 "scale run must seed the scratch corpus; found {seeded} rows"
1114 );
1115 let store = &results[0];
1117 assert_eq!(store.operation, Operation::StoreNoEmbedding);
1118 assert!((store.target_p95_ms - 120.0).abs() < 1e-9);
1119 let search = &results[1];
1120 assert!((search.target_p95_ms - 60.0).abs() < 1e-9);
1121 let recall = &results[2];
1122 assert!((recall.target_p95_ms - 80.0).abs() < 1e-9);
1123 assert!((results[3].target_p95_ms - 100.0).abs() < 1e-9);
1125 assert!((results[5].target_p95_ms - 250.0).abs() < 1e-9);
1126 }
1127
1128 #[test]
1129 fn seed_kg_chain_fixture_traverses_to_max_depth() {
1130 let conn = fresh_conn();
1131 let sources = seed_kg_chain_fixture(&conn, "kg-chain-fixture-test").unwrap();
1132 assert_eq!(sources.len(), KG_CHAIN_FIXTURE_CHAINS);
1133 for src in &sources {
1137 let depth5 =
1138 db::kg_query(&conn, src, KG_CHAIN_FIXTURE_HOPS, None, None, None, false).unwrap();
1139 assert_eq!(
1140 depth5.len(),
1141 KG_CHAIN_FIXTURE_HOPS,
1142 "depth={KG_CHAIN_FIXTURE_HOPS} on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach every node"
1143 );
1144 let depth3 = db::kg_query(&conn, src, 3, None, None, None, false).unwrap();
1145 assert_eq!(
1146 depth3.len(),
1147 3,
1148 "depth=3 on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach exactly 3 follow-on nodes"
1149 );
1150 }
1151 }
1152
1153 #[test]
1154 fn seed_kg_fixture_populates_sources_and_links() {
1155 let conn = fresh_conn();
1156 let sources = seed_kg_fixture(&conn, "kg-fixture-test").unwrap();
1157 assert_eq!(sources.len(), KG_FIXTURE_SOURCES);
1158 for src in &sources {
1161 let nodes = db::kg_query(&conn, src, 1, None, None, None, false).unwrap();
1162 assert_eq!(nodes.len(), KG_FIXTURE_LINKS_PER_SOURCE);
1163 let timeline = db::kg_timeline(&conn, src, None, None, None).unwrap();
1164 assert_eq!(timeline.len(), KG_FIXTURE_LINKS_PER_SOURCE);
1165 for ev in &timeline {
1166 assert!(
1169 !ev.valid_from.is_empty(),
1170 "kg fixture must stamp valid_from on every link"
1171 );
1172 }
1173 }
1174 }
1175
1176 #[allow(dead_code)]
1177 fn synthetic_result(op: Operation, p95: f64) -> OperationResult {
1178 OperationResult {
1179 operation: op,
1180 label: op.label(),
1181 target_p95_ms: op.target_p95_ms(),
1182 measured_p50_ms: p95 / 2.0,
1183 measured_p95_ms: p95,
1184 measured_p99_ms: p95 * 1.1,
1185 samples: 100,
1186 status: Status::Pass,
1187 }
1188 }
1189
1190 #[allow(dead_code)]
1191 fn synthetic_baseline(op: Operation, p95: f64) -> BaselineRecord {
1192 BaselineRecord {
1193 operation: op,
1194 measured_p95_ms: p95,
1195 }
1196 }
1197
1198 #[test]
1199 fn baseline_compare_flags_above_threshold() {
1200 let current = vec![synthetic_result(Operation::StoreNoEmbedding, 11.2)];
1202 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
1203 let rows = compare_against_baseline(¤t, &baseline, 10.0);
1204 assert_eq!(rows.len(), 1);
1205 assert!(rows[0].regressed);
1206 assert!((rows[0].delta_pct - 12.0).abs() < 1e-9);
1207 }
1208
1209 #[test]
1210 fn baseline_compare_passes_within_threshold() {
1211 let current = vec![synthetic_result(Operation::StoreNoEmbedding, 10.8)];
1213 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
1214 let rows = compare_against_baseline(¤t, &baseline, 10.0);
1215 assert_eq!(rows.len(), 1);
1216 assert!(!rows[0].regressed);
1217 }
1218
1219 #[test]
1220 fn baseline_compare_speedup_is_negative_delta() {
1221 let current = vec![synthetic_result(Operation::SearchFts, 8.0)];
1223 let baseline = vec![synthetic_baseline(Operation::SearchFts, 10.0)];
1224 let rows = compare_against_baseline(¤t, &baseline, 10.0);
1225 assert_eq!(rows.len(), 1);
1226 assert!(!rows[0].regressed);
1227 assert!((rows[0].delta_pct + 20.0).abs() < 1e-9);
1228 }
1229
1230 #[test]
1231 fn baseline_compare_skips_ops_missing_in_baseline() {
1232 let current = vec![
1235 synthetic_result(Operation::StoreNoEmbedding, 10.0),
1236 synthetic_result(Operation::KgQueryDepth5, 200.0),
1237 ];
1238 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
1239 let rows = compare_against_baseline(¤t, &baseline, 10.0);
1240 assert_eq!(rows.len(), 1);
1241 assert_eq!(rows[0].operation, Operation::StoreNoEmbedding);
1242 }
1243
1244 #[test]
1245 fn baseline_compare_handles_zero_baseline() {
1246 let current = vec![synthetic_result(Operation::SearchFts, 5.0)];
1250 let baseline = vec![synthetic_baseline(Operation::SearchFts, 0.0)];
1251 let rows = compare_against_baseline(¤t, &baseline, 10.0);
1252 assert_eq!(rows.len(), 1);
1253 assert!(!rows[0].regressed);
1254 assert!((rows[0].delta_pct - 0.0).abs() < 1e-9);
1255 }
1256
1257 #[test]
1258 fn load_baseline_round_trips_json_payload() {
1259 let dir = tempfile::tempdir().unwrap();
1263 let path = dir.path().join("baseline.json");
1264 let payload = serde_json::json!({
1265 "iterations": 200,
1266 "warmup": 20,
1267 "results": [
1268 {
1269 "operation": "store_no_embedding",
1270 "label": "memory_store (no embedding)",
1271 "target_p95_ms": 20.0,
1272 "measured_p50_ms": 4.0,
1273 "measured_p95_ms": 9.0,
1274 "measured_p99_ms": 11.0,
1275 "samples": 200,
1276 "status": "pass"
1277 },
1278 {
1279 "operation": "search_fts",
1280 "label": "memory_search (FTS5)",
1281 "target_p95_ms": 100.0,
1282 "measured_p50_ms": 12.0,
1283 "measured_p95_ms": 31.0,
1284 "measured_p99_ms": 45.0,
1285 "samples": 200,
1286 "status": "pass"
1287 }
1288 ]
1289 });
1290 std::fs::write(&path, serde_json::to_string_pretty(&payload).unwrap()).unwrap();
1291 let loaded = load_baseline(&path).unwrap();
1292 assert_eq!(loaded.len(), 2);
1293 assert_eq!(loaded[0].operation, Operation::StoreNoEmbedding);
1294 assert!((loaded[0].measured_p95_ms - 9.0).abs() < 1e-9);
1295 assert_eq!(loaded[1].operation, Operation::SearchFts);
1296 assert!((loaded[1].measured_p95_ms - 31.0).abs() < 1e-9);
1297 }
1298
1299 #[test]
1300 fn render_regression_table_marks_regressions() {
1301 let rows = vec![
1302 Regression {
1303 operation: Operation::StoreNoEmbedding,
1304 label: Operation::StoreNoEmbedding.label(),
1305 baseline_p95_ms: 10.0,
1306 measured_p95_ms: 12.0,
1307 delta_pct: 20.0,
1308 threshold_pct: 10.0,
1309 regressed: true,
1310 },
1311 Regression {
1312 operation: Operation::SearchFts,
1313 label: Operation::SearchFts.label(),
1314 baseline_p95_ms: 30.0,
1315 measured_p95_ms: 31.0,
1316 delta_pct: 3.3,
1317 threshold_pct: 10.0,
1318 regressed: false,
1319 },
1320 ];
1321 let table = render_regression_table(&rows);
1322 assert!(table.contains("memory_store (no embedding)"));
1323 assert!(table.contains("memory_search (FTS5)"));
1324 assert!(table.contains("REGRESSION"));
1325 assert!(table.contains("OK"));
1326 }
1327}