1use anyhow::{Context, Result};
32use rusqlite::Connection;
33use serde::{Deserialize, Serialize};
34use std::path::Path;
35use std::time::{Duration, Instant};
36
37use crate::db;
38use crate::models::{Memory, Tier};
39
40pub const P95_TOLERANCE: f64 = 1.10;
43
44pub const BENCH_NAMESPACE: &str = "ai-memory-bench";
46
47pub const DEFAULT_ITERATIONS: usize = 200;
50
51pub const DEFAULT_WARMUP: usize = 20;
53
54pub const DEFAULT_REGRESSION_THRESHOLD_PCT: f64 = 10.0;
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum Operation {
66 StoreNoEmbedding,
68 SearchFts,
70 RecallHot,
72 KgQueryDepth1,
75 KgQueryDepth3,
80 KgQueryDepth5,
84 KgTimeline,
86}
87
88impl Operation {
89 #[must_use]
90 pub fn label(self) -> &'static str {
91 match self {
92 Self::StoreNoEmbedding => "memory_store (no embedding)",
93 Self::SearchFts => "memory_search (FTS5)",
94 Self::RecallHot => "memory_recall (hot, depth=1)",
95 Self::KgQueryDepth1 => "memory_kg_query (depth=1)",
96 Self::KgQueryDepth3 => "memory_kg_query (depth=3)",
97 Self::KgQueryDepth5 => "memory_kg_query (depth=5)",
98 Self::KgTimeline => "memory_kg_timeline",
99 }
100 }
101
102 #[must_use]
110 #[allow(clippy::match_same_arms)]
111 pub fn target_p95_ms(self) -> f64 {
112 match self {
113 Self::StoreNoEmbedding => 20.0,
114 Self::SearchFts => 100.0,
115 Self::RecallHot => 50.0,
116 Self::KgQueryDepth1 => 100.0,
117 Self::KgQueryDepth3 => 100.0,
118 Self::KgQueryDepth5 => 250.0,
119 Self::KgTimeline => 100.0,
120 }
121 }
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
125#[serde(rename_all = "snake_case")]
126pub enum Status {
127 Pass,
128 Fail,
129}
130
131#[derive(Debug, Clone, Serialize)]
132pub struct OperationResult {
133 pub operation: Operation,
134 pub label: &'static str,
136 pub target_p95_ms: f64,
137 pub measured_p50_ms: f64,
138 pub measured_p95_ms: f64,
139 pub measured_p99_ms: f64,
140 pub samples: usize,
141 pub status: Status,
142}
143
144#[derive(Debug, Clone)]
145pub struct BenchConfig {
146 pub iterations: usize,
147 pub warmup: usize,
148 pub namespace: String,
149}
150
151impl Default for BenchConfig {
152 fn default() -> Self {
153 Self {
154 iterations: DEFAULT_ITERATIONS,
155 warmup: DEFAULT_WARMUP,
156 namespace: BENCH_NAMESPACE.to_string(),
157 }
158 }
159}
160
161pub fn run(conn: &Connection, config: &BenchConfig) -> Result<Vec<OperationResult>> {
172 let store = run_store_no_embedding(conn, config)?;
173 let search = run_search_fts(conn, config)?;
174 let recall = run_recall_hot(conn, config)?;
175 let kg_sources = seed_kg_fixture(conn, &config.namespace)?;
176 let kg_query = run_kg_query_depth1(conn, config, &kg_sources)?;
177 let kg_chain_sources = seed_kg_chain_fixture(conn, &config.namespace)?;
178 let kg_query_d3 =
179 run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth3, 3)?;
180 let kg_query_d5 =
181 run_kg_query_chain(conn, config, &kg_chain_sources, Operation::KgQueryDepth5, 5)?;
182 let kg_timeline = run_kg_timeline(conn, config, &kg_sources)?;
183 Ok(vec![
184 store,
185 search,
186 recall,
187 kg_query,
188 kg_query_d3,
189 kg_query_d5,
190 kg_timeline,
191 ])
192}
193
194fn run_store_no_embedding(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
195 let total = config.warmup + config.iterations;
196 let mut samples = Vec::with_capacity(config.iterations);
197 for i in 0..total {
198 let mem = synth_memory(&config.namespace, i, "store");
199 let start = Instant::now();
200 db::insert(conn, &mem)?;
201 let elapsed = start.elapsed();
202 if i >= config.warmup {
203 samples.push(elapsed);
204 }
205 }
206 Ok(percentile_summary(Operation::StoreNoEmbedding, &samples))
207}
208
209fn run_search_fts(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
210 seed_corpus(conn, &config.namespace, "search", 200)?;
211 let total = config.warmup + config.iterations;
212 let mut samples = Vec::with_capacity(config.iterations);
213 for i in 0..total {
214 let query = format!("topic-{}", i % 50);
215 let start = Instant::now();
216 let _ = db::search(
217 conn,
218 &query,
219 Some(&config.namespace),
220 None,
221 10,
222 None,
223 None,
224 None,
225 None,
226 None,
227 None,
228 )?;
229 let elapsed = start.elapsed();
230 if i >= config.warmup {
231 samples.push(elapsed);
232 }
233 }
234 Ok(percentile_summary(Operation::SearchFts, &samples))
235}
236
237fn run_recall_hot(conn: &Connection, config: &BenchConfig) -> Result<OperationResult> {
238 seed_corpus(conn, &config.namespace, "recall", 200)?;
239 let warmup_query = "topic 0 category 0";
240 for _ in 0..config.warmup {
241 let _ = db::recall(
242 conn,
243 warmup_query,
244 Some(&config.namespace),
245 10,
246 None,
247 None,
248 None,
249 0,
250 0,
251 None,
252 None,
253 )?;
254 }
255 let mut samples = Vec::with_capacity(config.iterations);
256 for i in 0..config.iterations {
257 let query = format!("topic {} category {}", i % 50, i % 10);
258 let start = Instant::now();
259 let _ = db::recall(
260 conn,
261 &query,
262 Some(&config.namespace),
263 10,
264 None,
265 None,
266 None,
267 0,
268 0,
269 None,
270 None,
271 )?;
272 samples.push(start.elapsed());
273 }
274 Ok(percentile_summary(Operation::RecallHot, &samples))
275}
276
277const KG_FIXTURE_SOURCES: usize = 50;
281const KG_FIXTURE_LINKS_PER_SOURCE: usize = 4;
282
283const KG_CHAIN_FIXTURE_CHAINS: usize = 50;
290const KG_CHAIN_FIXTURE_HOPS: usize = 5;
291
292fn run_kg_query_depth1(
293 conn: &Connection,
294 config: &BenchConfig,
295 sources: &[String],
296) -> Result<OperationResult> {
297 debug_assert!(
298 !sources.is_empty(),
299 "kg_query bench requires a seeded fixture"
300 );
301 let total = config.warmup + config.iterations;
302 let mut samples = Vec::with_capacity(config.iterations);
303 for i in 0..total {
304 let src = &sources[i % sources.len()];
305 let start = Instant::now();
306 let _ = db::kg_query(conn, src, 1, None, None, None)?;
307 let elapsed = start.elapsed();
308 if i >= config.warmup {
309 samples.push(elapsed);
310 }
311 }
312 Ok(percentile_summary(Operation::KgQueryDepth1, &samples))
313}
314
315fn run_kg_query_chain(
316 conn: &Connection,
317 config: &BenchConfig,
318 sources: &[String],
319 operation: Operation,
320 max_depth: usize,
321) -> Result<OperationResult> {
322 debug_assert!(
323 !sources.is_empty(),
324 "kg_query chain bench requires a seeded fixture"
325 );
326 let total = config.warmup + config.iterations;
327 let mut samples = Vec::with_capacity(config.iterations);
328 for i in 0..total {
329 let src = &sources[i % sources.len()];
330 let start = Instant::now();
331 let _ = db::kg_query(conn, src, max_depth, None, None, None)?;
332 let elapsed = start.elapsed();
333 if i >= config.warmup {
334 samples.push(elapsed);
335 }
336 }
337 Ok(percentile_summary(operation, &samples))
338}
339
340fn run_kg_timeline(
341 conn: &Connection,
342 config: &BenchConfig,
343 sources: &[String],
344) -> Result<OperationResult> {
345 debug_assert!(
346 !sources.is_empty(),
347 "kg_timeline bench requires a seeded fixture"
348 );
349 let total = config.warmup + config.iterations;
350 let mut samples = Vec::with_capacity(config.iterations);
351 for i in 0..total {
352 let src = &sources[i % sources.len()];
353 let start = Instant::now();
354 let _ = db::kg_timeline(conn, src, None, None, None)?;
355 let elapsed = start.elapsed();
356 if i >= config.warmup {
357 samples.push(elapsed);
358 }
359 }
360 Ok(percentile_summary(Operation::KgTimeline, &samples))
361}
362
363fn seed_kg_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
369 let mut sources = Vec::with_capacity(KG_FIXTURE_SOURCES);
370 for s in 0..KG_FIXTURE_SOURCES {
371 let src = synth_memory(namespace, s, "kg-src");
372 let src_id = db::insert(conn, &src)?;
377 for t in 0..KG_FIXTURE_LINKS_PER_SOURCE {
378 let target_idx = s * KG_FIXTURE_LINKS_PER_SOURCE + t;
379 let tgt = synth_memory(namespace, target_idx, "kg-tgt");
380 let tgt_id = db::insert(conn, &tgt)?;
381 db::create_link(conn, &src_id, &tgt_id, "related_to")?;
385 }
386 sources.push(src_id);
387 }
388 Ok(sources)
389}
390
391fn seed_kg_chain_fixture(conn: &Connection, namespace: &str) -> Result<Vec<String>> {
399 let mut sources = Vec::with_capacity(KG_CHAIN_FIXTURE_CHAINS);
400 for c in 0..KG_CHAIN_FIXTURE_CHAINS {
401 let mut prev_id = {
402 let head = synth_memory(namespace, c, "kg-chain-src");
403 db::insert(conn, &head)?
404 };
405 let chain_head_id = prev_id.clone();
406 for h in 0..KG_CHAIN_FIXTURE_HOPS {
407 let node_idx = c * KG_CHAIN_FIXTURE_HOPS + h;
408 let next = synth_memory(namespace, node_idx, "kg-chain-node");
409 let next_id = db::insert(conn, &next)?;
410 db::create_link(conn, &prev_id, &next_id, "related_to")?;
411 prev_id = next_id;
412 }
413 sources.push(chain_head_id);
414 }
415 Ok(sources)
416}
417
418fn seed_corpus(conn: &Connection, namespace: &str, prefix: &str, count: usize) -> Result<()> {
419 for i in 0..count {
420 let mem = synth_memory(namespace, i, prefix);
421 db::insert(conn, &mem)?;
422 }
423 Ok(())
424}
425
426fn synth_memory(namespace: &str, i: usize, prefix: &str) -> Memory {
427 let now = chrono::Utc::now().to_rfc3339();
428 Memory {
429 id: uuid::Uuid::new_v4().to_string(),
430 tier: Tier::Long,
431 namespace: namespace.to_string(),
432 title: format!("bench-{prefix}-{i}"),
433 content: format!(
434 "bench memory {i} content about topic {} category {} for {prefix} workload",
435 i % 50,
436 i % 10
437 ),
438 tags: vec![],
439 priority: i32::try_from((i % 9) + 1).unwrap_or(5),
440 confidence: 1.0,
441 source: "bench".to_string(),
442 access_count: 0,
443 created_at: now.clone(),
444 updated_at: now,
445 last_accessed_at: None,
446 expires_at: None,
447 metadata: serde_json::json!({"agent_id": "bench"}),
448 }
449}
450
451fn percentile_summary(operation: Operation, samples: &[Duration]) -> OperationResult {
452 debug_assert!(
453 !samples.is_empty(),
454 "bench operation produced no samples; iterations must be > 0"
455 );
456 let mut sorted: Vec<f64> = samples.iter().map(duration_ms).collect();
457 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
458 let p50 = percentile(&sorted, 0.50);
459 let p95 = percentile(&sorted, 0.95);
460 let p99 = percentile(&sorted, 0.99);
461 let target = operation.target_p95_ms();
462 let status = if p95 <= target * P95_TOLERANCE {
463 Status::Pass
464 } else {
465 Status::Fail
466 };
467 OperationResult {
468 operation,
469 label: operation.label(),
470 target_p95_ms: target,
471 measured_p50_ms: p50,
472 measured_p95_ms: p95,
473 measured_p99_ms: p99,
474 samples: sorted.len(),
475 status,
476 }
477}
478
479fn duration_ms(d: &Duration) -> f64 {
480 let secs = d.as_secs_f64();
481 secs * 1000.0
482}
483
484#[allow(
485 clippy::cast_precision_loss,
486 clippy::cast_sign_loss,
487 clippy::cast_possible_truncation
488)]
489fn percentile(sorted: &[f64], q: f64) -> f64 {
490 if sorted.is_empty() {
491 return 0.0;
492 }
493 if sorted.len() == 1 {
494 return sorted[0];
495 }
496 let rank = q * (sorted.len() as f64 - 1.0);
497 let lo = rank.floor() as usize;
498 let hi = rank.ceil() as usize;
499 if lo == hi {
500 return sorted[lo];
501 }
502 let frac = rank - lo as f64;
503 sorted[lo] + (sorted[hi] - sorted[lo]) * frac
504}
505
506#[must_use]
509pub fn render_table(results: &[OperationResult]) -> String {
510 let mut out = String::new();
511 out.push_str(
512 "Operation Target (p95) Measured (p95) p50 p99 Status\n",
513 );
514 out.push_str(
515 "─────────────────────────────────────────────────────────────────────────────────────────\n",
516 );
517 for r in results {
518 let status_str = match r.status {
519 Status::Pass => "PASS",
520 Status::Fail => "FAIL",
521 };
522 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
527 let target_ms = r.target_p95_ms.round() as i64;
528 let line = format!(
529 "{:<30} < {:>4} ms {:>7.1} ms {:>5.1} {:>5.1} {}\n",
530 r.label, target_ms, r.measured_p95_ms, r.measured_p50_ms, r.measured_p99_ms, status_str
531 );
532 out.push_str(&line);
533 }
534 out
535}
536
537#[derive(Debug, Clone, Deserialize)]
542pub struct BaselineRecord {
543 pub operation: Operation,
544 pub measured_p95_ms: f64,
545}
546
547#[derive(Debug, Clone, Deserialize)]
553struct BaselineFile {
554 results: Vec<BaselineRecord>,
555}
556
557#[derive(Debug, Clone, Serialize)]
560pub struct Regression {
561 pub operation: Operation,
562 pub label: &'static str,
564 pub baseline_p95_ms: f64,
565 pub measured_p95_ms: f64,
566 pub delta_pct: f64,
567 pub threshold_pct: f64,
568 pub regressed: bool,
569}
570
571pub fn load_baseline(path: &Path) -> Result<Vec<BaselineRecord>> {
578 let raw = std::fs::read_to_string(path)
579 .with_context(|| format!("failed to read baseline file: {}", path.display()))?;
580 let file: BaselineFile = serde_json::from_str(&raw)
581 .with_context(|| format!("failed to parse baseline JSON: {}", path.display()))?;
582 Ok(file.results)
583}
584
585#[must_use]
590pub fn compare_against_baseline(
591 current: &[OperationResult],
592 baseline: &[BaselineRecord],
593 threshold_pct: f64,
594) -> Vec<Regression> {
595 let mut out = Vec::with_capacity(current.len());
596 for r in current {
597 let Some(b) = baseline.iter().find(|b| b.operation == r.operation) else {
598 continue;
599 };
600 let delta_pct = if b.measured_p95_ms > 0.0 {
606 (r.measured_p95_ms - b.measured_p95_ms) / b.measured_p95_ms * 100.0
607 } else {
608 0.0
609 };
610 let regressed = delta_pct > threshold_pct;
611 out.push(Regression {
612 operation: r.operation,
613 label: r.operation.label(),
614 baseline_p95_ms: b.measured_p95_ms,
615 measured_p95_ms: r.measured_p95_ms,
616 delta_pct,
617 threshold_pct,
618 regressed,
619 });
620 }
621 out
622}
623
624#[must_use]
627pub fn render_regression_table(rows: &[Regression]) -> String {
628 let mut out = String::new();
629 out.push_str(
630 "Operation Baseline (p95) Measured (p95) Delta Status\n",
631 );
632 out.push_str(
633 "─────────────────────────────────────────────────────────────────────────────────\n",
634 );
635 for r in rows {
636 let status_str = if r.regressed { "REGRESSION" } else { "OK" };
637 let line = format!(
638 "{:<30} {:>10.1} ms {:>10.1} ms {:>+6.1}% {}\n",
639 r.label, r.baseline_p95_ms, r.measured_p95_ms, r.delta_pct, status_str
640 );
641 out.push_str(&line);
642 }
643 out
644}
645
646pub fn append_history(
651 path: &std::path::Path,
652 captured_at: &str,
653 iterations: usize,
654 warmup: usize,
655 results: &[OperationResult],
656) -> Result<()> {
657 use std::fs::OpenOptions;
658 use std::io::Write;
659
660 if let Some(parent) = path.parent()
662 && !parent.as_os_str().is_empty()
663 {
664 std::fs::create_dir_all(parent)?;
665 }
666
667 let entry = serde_json::json!({
668 "captured_at": captured_at,
669 "iterations": iterations,
670 "warmup": warmup,
671 "results": results,
672 });
673
674 let mut file = OpenOptions::new().create(true).append(true).open(path)?;
675
676 writeln!(file, "{}", serde_json::to_string(&entry)?)?;
677 Ok(())
678}
679
680#[allow(clippy::wildcard_imports)]
681mod tests {
682 use super::*;
683 use crate::db;
684
685 #[allow(dead_code)]
686 fn fresh_conn() -> Connection {
687 db::open(Path::new(":memory:")).unwrap()
688 }
689
690 #[allow(dead_code)]
691 fn small_config() -> BenchConfig {
692 BenchConfig {
693 iterations: 30,
694 warmup: 5,
695 namespace: "bench-test".to_string(),
696 }
697 }
698
699 #[test]
700 fn percentile_interpolates() {
701 let s = vec![1.0, 2.0, 3.0, 4.0];
702 assert!((percentile(&s, 0.50) - 2.5).abs() < 1e-9);
703 assert!((percentile(&s, 0.0) - 1.0).abs() < 1e-9);
704 assert!((percentile(&s, 1.0) - 4.0).abs() < 1e-9);
705 }
706
707 #[test]
708 fn percentile_handles_singleton_and_empty() {
709 assert!((percentile(&[], 0.5) - 0.0).abs() < 1e-9);
710 assert!((percentile(&[42.0], 0.99) - 42.0).abs() < 1e-9);
711 }
712
713 #[test]
714 fn run_returns_all_seven_results() {
715 let conn = fresh_conn();
716 let results = run(&conn, &small_config()).unwrap();
717 assert_eq!(results.len(), 7);
718 assert_eq!(results[0].operation, Operation::StoreNoEmbedding);
719 assert_eq!(results[1].operation, Operation::SearchFts);
720 assert_eq!(results[2].operation, Operation::RecallHot);
721 assert_eq!(results[3].operation, Operation::KgQueryDepth1);
722 assert_eq!(results[4].operation, Operation::KgQueryDepth3);
723 assert_eq!(results[5].operation, Operation::KgQueryDepth5);
724 assert_eq!(results[6].operation, Operation::KgTimeline);
725 for r in &results {
726 assert_eq!(r.samples, 30);
727 assert!(r.measured_p50_ms <= r.measured_p95_ms);
728 assert!(r.measured_p95_ms <= r.measured_p99_ms);
729 assert!(r.target_p95_ms > 0.0);
730 }
731 }
732
733 #[test]
734 fn status_is_fail_when_p95_over_tolerance() {
735 let r = OperationResult {
736 operation: Operation::StoreNoEmbedding,
737 label: Operation::StoreNoEmbedding.label(),
738 target_p95_ms: 20.0,
739 measured_p50_ms: 5.0,
740 measured_p95_ms: 25.0,
741 measured_p99_ms: 30.0,
742 samples: 100,
743 status: Status::Fail,
744 };
745 assert_eq!(r.status, Status::Fail);
746 let recomputed = if 25.0_f64 <= 20.0 * P95_TOLERANCE {
748 Status::Pass
749 } else {
750 Status::Fail
751 };
752 assert_eq!(recomputed, Status::Fail);
753 }
754
755 #[test]
756 fn status_is_pass_within_tolerance() {
757 let recomputed = if 21.0_f64 <= 20.0 * P95_TOLERANCE {
759 Status::Pass
760 } else {
761 Status::Fail
762 };
763 assert_eq!(recomputed, Status::Pass);
764 }
765
766 #[test]
767 fn render_table_includes_all_operations() {
768 let conn = fresh_conn();
769 let results = run(&conn, &small_config()).unwrap();
770 let table = render_table(&results);
771 assert!(table.contains("memory_store (no embedding)"));
772 assert!(table.contains("memory_search (FTS5)"));
773 assert!(table.contains("memory_recall (hot, depth=1)"));
774 assert!(table.contains("memory_kg_query (depth=1)"));
775 assert!(table.contains("memory_kg_query (depth=3)"));
776 assert!(table.contains("memory_kg_query (depth=5)"));
777 assert!(table.contains("memory_kg_timeline"));
778 assert!(table.contains("Status"));
779 }
780
781 #[test]
782 fn operation_targets_match_performance_md() {
783 assert!((Operation::StoreNoEmbedding.target_p95_ms() - 20.0).abs() < 1e-9);
785 assert!((Operation::SearchFts.target_p95_ms() - 100.0).abs() < 1e-9);
786 assert!((Operation::RecallHot.target_p95_ms() - 50.0).abs() < 1e-9);
787 assert!((Operation::KgQueryDepth1.target_p95_ms() - 100.0).abs() < 1e-9);
788 assert!((Operation::KgQueryDepth3.target_p95_ms() - 100.0).abs() < 1e-9);
789 assert!((Operation::KgQueryDepth5.target_p95_ms() - 250.0).abs() < 1e-9);
790 assert!((Operation::KgTimeline.target_p95_ms() - 100.0).abs() < 1e-9);
791 }
792
793 #[test]
794 fn seed_kg_chain_fixture_traverses_to_max_depth() {
795 let conn = fresh_conn();
796 let sources = seed_kg_chain_fixture(&conn, "kg-chain-fixture-test").unwrap();
797 assert_eq!(sources.len(), KG_CHAIN_FIXTURE_CHAINS);
798 for src in &sources {
802 let depth5 = db::kg_query(&conn, src, KG_CHAIN_FIXTURE_HOPS, None, None, None).unwrap();
803 assert_eq!(
804 depth5.len(),
805 KG_CHAIN_FIXTURE_HOPS,
806 "depth={KG_CHAIN_FIXTURE_HOPS} on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach every node"
807 );
808 let depth3 = db::kg_query(&conn, src, 3, None, None, None).unwrap();
809 assert_eq!(
810 depth3.len(),
811 3,
812 "depth=3 on a {KG_CHAIN_FIXTURE_HOPS}-hop chain must reach exactly 3 follow-on nodes"
813 );
814 }
815 }
816
817 #[test]
818 fn seed_kg_fixture_populates_sources_and_links() {
819 let conn = fresh_conn();
820 let sources = seed_kg_fixture(&conn, "kg-fixture-test").unwrap();
821 assert_eq!(sources.len(), KG_FIXTURE_SOURCES);
822 for src in &sources {
825 let nodes = db::kg_query(&conn, src, 1, None, None, None).unwrap();
826 assert_eq!(nodes.len(), KG_FIXTURE_LINKS_PER_SOURCE);
827 let timeline = db::kg_timeline(&conn, src, None, None, None).unwrap();
828 assert_eq!(timeline.len(), KG_FIXTURE_LINKS_PER_SOURCE);
829 for ev in &timeline {
830 assert!(
833 !ev.valid_from.is_empty(),
834 "kg fixture must stamp valid_from on every link"
835 );
836 }
837 }
838 }
839
840 #[allow(dead_code)]
841 fn synthetic_result(op: Operation, p95: f64) -> OperationResult {
842 OperationResult {
843 operation: op,
844 label: op.label(),
845 target_p95_ms: op.target_p95_ms(),
846 measured_p50_ms: p95 / 2.0,
847 measured_p95_ms: p95,
848 measured_p99_ms: p95 * 1.1,
849 samples: 100,
850 status: Status::Pass,
851 }
852 }
853
854 #[allow(dead_code)]
855 fn synthetic_baseline(op: Operation, p95: f64) -> BaselineRecord {
856 BaselineRecord {
857 operation: op,
858 measured_p95_ms: p95,
859 }
860 }
861
862 #[test]
863 fn baseline_compare_flags_above_threshold() {
864 let current = vec![synthetic_result(Operation::StoreNoEmbedding, 11.2)];
866 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
867 let rows = compare_against_baseline(¤t, &baseline, 10.0);
868 assert_eq!(rows.len(), 1);
869 assert!(rows[0].regressed);
870 assert!((rows[0].delta_pct - 12.0).abs() < 1e-9);
871 }
872
873 #[test]
874 fn baseline_compare_passes_within_threshold() {
875 let current = vec![synthetic_result(Operation::StoreNoEmbedding, 10.8)];
877 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
878 let rows = compare_against_baseline(¤t, &baseline, 10.0);
879 assert_eq!(rows.len(), 1);
880 assert!(!rows[0].regressed);
881 }
882
883 #[test]
884 fn baseline_compare_speedup_is_negative_delta() {
885 let current = vec![synthetic_result(Operation::SearchFts, 8.0)];
887 let baseline = vec![synthetic_baseline(Operation::SearchFts, 10.0)];
888 let rows = compare_against_baseline(¤t, &baseline, 10.0);
889 assert_eq!(rows.len(), 1);
890 assert!(!rows[0].regressed);
891 assert!((rows[0].delta_pct + 20.0).abs() < 1e-9);
892 }
893
894 #[test]
895 fn baseline_compare_skips_ops_missing_in_baseline() {
896 let current = vec![
899 synthetic_result(Operation::StoreNoEmbedding, 10.0),
900 synthetic_result(Operation::KgQueryDepth5, 200.0),
901 ];
902 let baseline = vec![synthetic_baseline(Operation::StoreNoEmbedding, 10.0)];
903 let rows = compare_against_baseline(¤t, &baseline, 10.0);
904 assert_eq!(rows.len(), 1);
905 assert_eq!(rows[0].operation, Operation::StoreNoEmbedding);
906 }
907
908 #[test]
909 fn baseline_compare_handles_zero_baseline() {
910 let current = vec![synthetic_result(Operation::SearchFts, 5.0)];
914 let baseline = vec![synthetic_baseline(Operation::SearchFts, 0.0)];
915 let rows = compare_against_baseline(¤t, &baseline, 10.0);
916 assert_eq!(rows.len(), 1);
917 assert!(!rows[0].regressed);
918 assert!((rows[0].delta_pct - 0.0).abs() < 1e-9);
919 }
920
921 #[test]
922 fn load_baseline_round_trips_json_payload() {
923 let dir = tempfile::tempdir().unwrap();
927 let path = dir.path().join("baseline.json");
928 let payload = serde_json::json!({
929 "iterations": 200,
930 "warmup": 20,
931 "results": [
932 {
933 "operation": "store_no_embedding",
934 "label": "memory_store (no embedding)",
935 "target_p95_ms": 20.0,
936 "measured_p50_ms": 4.0,
937 "measured_p95_ms": 9.0,
938 "measured_p99_ms": 11.0,
939 "samples": 200,
940 "status": "pass"
941 },
942 {
943 "operation": "search_fts",
944 "label": "memory_search (FTS5)",
945 "target_p95_ms": 100.0,
946 "measured_p50_ms": 12.0,
947 "measured_p95_ms": 31.0,
948 "measured_p99_ms": 45.0,
949 "samples": 200,
950 "status": "pass"
951 }
952 ]
953 });
954 std::fs::write(&path, serde_json::to_string_pretty(&payload).unwrap()).unwrap();
955 let loaded = load_baseline(&path).unwrap();
956 assert_eq!(loaded.len(), 2);
957 assert_eq!(loaded[0].operation, Operation::StoreNoEmbedding);
958 assert!((loaded[0].measured_p95_ms - 9.0).abs() < 1e-9);
959 assert_eq!(loaded[1].operation, Operation::SearchFts);
960 assert!((loaded[1].measured_p95_ms - 31.0).abs() < 1e-9);
961 }
962
963 #[test]
964 fn render_regression_table_marks_regressions() {
965 let rows = vec![
966 Regression {
967 operation: Operation::StoreNoEmbedding,
968 label: Operation::StoreNoEmbedding.label(),
969 baseline_p95_ms: 10.0,
970 measured_p95_ms: 12.0,
971 delta_pct: 20.0,
972 threshold_pct: 10.0,
973 regressed: true,
974 },
975 Regression {
976 operation: Operation::SearchFts,
977 label: Operation::SearchFts.label(),
978 baseline_p95_ms: 30.0,
979 measured_p95_ms: 31.0,
980 delta_pct: 3.3,
981 threshold_pct: 10.0,
982 regressed: false,
983 },
984 ];
985 let table = render_regression_table(&rows);
986 assert!(table.contains("memory_store (no embedding)"));
987 assert!(table.contains("memory_search (FTS5)"));
988 assert!(table.contains("REGRESSION"));
989 assert!(table.contains("OK"));
990 }
991}