use std::path::{Path, PathBuf};
use std::process::Command as StdCommand;
use std::time::{Duration, Instant};
use assert_cmd::cargo::CommandCargoExt;
const BUDGET_FM_QUERY: Duration = Duration::from_millis(300);
const BUDGET_SEARCH_TYPED: Duration = Duration::from_millis(300);
const BUDGET_LOG_TAIL: Duration = Duration::from_millis(50);
const BUDGET_GRAPH_UNSCOPED: Duration = Duration::from_millis(200);
const BUDGET_VALIDATE_WORKING: Duration = Duration::from_millis(1000);
const BUDGET_VALIDATE_ALL: Duration = Duration::from_secs(5);
const BUDGET_INDEX_REBUILD: Duration = Duration::from_secs(10);
const BUDGET_STATS: Duration = Duration::from_secs(5);
const BUDGET_SLACK: u32 = 6;
const LOOP_ITERS: (usize, usize) = (2, 5);
const SWEEP_ITERS: (usize, usize) = (1, 3);
fn gen_scale_src() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("tests")
.join("gen-scale.rs")
}
fn build_scale_corpus(dir: &Path) -> PathBuf {
let src = gen_scale_src();
assert!(
src.is_file(),
"tests/gen-scale.rs must exist at {} — it is the scale-corpus generator this guard runs",
src.display()
);
let bin = dir.join(if cfg!(windows) {
"gen-scale.exe"
} else {
"gen-scale"
});
let compile = StdCommand::new("rustc")
.arg("-O")
.arg(&src)
.arg("-o")
.arg(&bin)
.status()
.unwrap_or_else(|e| {
panic!(
"failed to spawn `rustc` to compile {}: {e}. \
Set DBMD_SKIP_PERF=1 to skip the perf guard on a runner without rustc.",
src.display()
)
});
assert!(
compile.success(),
"`rustc -O {}` failed (exit {:?}) — the scale generator must compile",
src.display(),
compile.code()
);
let store = dir.join("corpus-d-scale");
let run = StdCommand::new(&bin)
.arg("10k")
.arg(&store)
.arg("--force") .status()
.unwrap_or_else(|e| panic!("failed to spawn the generated gen-scale binary: {e}"));
assert!(
run.success(),
"gen-scale 10k failed (exit {:?}) — the scale corpus must generate cleanly",
run.code()
);
let md_count = count_md_files(&store);
assert!(
md_count >= 10_000,
"scale corpus has {md_count} .md files, expected ~10,021 (>= 10,000) — \
a 10k-tier corpus is the premise of every budget below"
);
assert!(
store.join("DB.md").is_file(),
"generated store is missing its DB.md marker — not a valid db.md store"
);
assert!(
store.join("sources/emails").is_dir(),
"generated store is missing the sources/emails overflow folder"
);
let rebuild = dbmd_status(&[
"index",
"rebuild",
"--dir",
store.to_str().expect("store path is UTF-8"),
]);
assert!(
rebuild.success(),
"`dbmd index rebuild` on the fresh scale corpus failed (exit {:?})",
rebuild.code()
);
let validate = dbmd_status(&[
"validate",
"--all",
store.to_str().expect("store path is UTF-8"),
]);
assert!(
validate.success(),
"the scale corpus does not validate clean after `index rebuild` (exit {:?}) — \
the perf guard requires a valid fixed-point store to time against",
validate.code()
);
store
}
fn dbmd_status(args: &[&str]) -> std::process::ExitStatus {
StdCommand::cargo_bin("dbmd")
.expect("the `dbmd` binary builds for tests")
.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.expect("spawn dbmd")
}
fn count_md_files(root: &Path) -> usize {
fn walk(dir: &Path, n: &mut usize) {
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
walk(&path, n);
} else if path.extension().and_then(|e| e.to_str()) == Some("md") {
*n += 1;
}
}
}
let mut n = 0;
walk(root, &mut n);
n
}
fn first_company_target(store: &Path) -> String {
let dir = store.join("records").join("companies");
let mut names: Vec<String> = std::fs::read_dir(&dir)
.expect("the 10k corpus has a records/companies folder")
.flatten()
.map(|e| e.path())
.filter(|p| {
p.extension().and_then(|x| x.to_str()) == Some("md")
&& p.file_name().and_then(|n| n.to_str()) != Some("index.md")
})
.filter_map(|p| {
p.file_stem()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
})
.collect();
names.sort();
let slug = names
.first()
.expect("records/companies has at least one company record");
format!("records/companies/{slug}")
}
fn copy_dir_all(src: &Path, dst: &Path) {
std::fs::create_dir_all(dst).expect("create dest dir");
for entry in std::fs::read_dir(src).expect("read source dir") {
let entry = entry.expect("dir entry");
let target = dst.join(entry.file_name());
if entry.file_type().expect("file type").is_dir() {
copy_dir_all(&entry.path(), &target);
} else {
std::fs::copy(entry.path(), &target).expect("copy file");
}
}
}
fn first_content_targets(store: &Path, limit: usize) -> Vec<String> {
fn walk(dir: &Path, store: &Path, out: &mut Vec<String>) {
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
walk(&path, store, out);
} else if path.extension().and_then(|e| e.to_str()) == Some("md") {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if matches!(name, "index.md" | "log.md" | "DB.md") {
continue;
}
if let Ok(rel) = path.strip_prefix(store) {
let bare = rel.to_string_lossy().replace('\\', "/");
let bare = bare.strip_suffix(".md").unwrap_or(&bare).to_string();
out.push(bare);
}
}
}
}
let mut out = Vec::new();
for layer in ["records", "sources", "wiki"] {
walk(&store.join(layer), store, &mut out);
}
out.sort();
out.truncate(limit);
out
}
fn grow_changed_set(store: &Path, count: usize) {
let targets = first_content_targets(store, count);
assert!(
targets.len() >= count,
"the 10k corpus must have at least {count} content files to grow the \
changed set; found {}",
targets.len()
);
let log_path = store.join("log.md");
let mut log = std::fs::read_to_string(&log_path).expect("read active log.md");
if !log.ends_with('\n') {
log.push('\n');
}
log.push('\n');
for (i, bare) in targets.iter().enumerate() {
let day = (i % 28) + 1;
let minute = i % 60;
log.push_str(&format!(
"## [2026-05-{day:02} 11:{minute:02}] update | [[{bare}]]\nTouched for the perf changed-set.\n\n"
));
}
std::fs::write(&log_path, log).expect("write grown log.md");
}
type ArgsFn<'a> = dyn Fn() -> Vec<String> + 'a;
fn time_once(args: &[String]) -> Duration {
let mut cmd = StdCommand::cargo_bin("dbmd").expect("the `dbmd` binary builds for tests");
cmd.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null());
let start = Instant::now();
let status = cmd.status().expect("spawn dbmd");
let elapsed = start.elapsed();
assert!(
status.success(),
"`dbmd {}` exited {:?} — a perf op must also be a CORRECT op (a crash/hang is the \
worst regression)",
args.join(" "),
status.code()
);
elapsed
}
fn median_time(iters: (usize, usize), make_args: &ArgsFn) -> Duration {
let (warmup, timed) = iters;
for _ in 0..warmup {
let _ = time_once(&make_args());
}
let mut samples: Vec<Duration> = (0..timed).map(|_| time_once(&make_args())).collect();
samples.sort();
samples[samples.len() / 2]
}
fn assert_within_budget(label: &str, median: Duration, budget: Duration) {
let guard = budget * BUDGET_SLACK;
assert!(
median <= guard,
"PERF REGRESSION: `{label}` median {median:?} exceeds the guard {guard:?} \
(plan budget {budget:?} × {BUDGET_SLACK} CI headroom) at the 10k tier. \
This op is supposed to be {}. See tests/PERF.md for the measured baseline.",
if budget >= Duration::from_secs(1) {
"an O(store) sweep that stays sub-budget"
} else {
"an O(changed) loop read that stays flat in store size"
}
);
}
fn skip_perf() -> bool {
std::env::var_os("DBMD_SKIP_PERF").is_some()
}
#[test]
fn budgets_hold_on_the_10k_scale_corpus() {
if skip_perf() {
eprintln!("[perf] DBMD_SKIP_PERF set — skipping the 10k budget guard");
return;
}
let tmp = tempfile::TempDir::new().expect("create tempdir for the scale corpus");
let store = build_scale_corpus(tmp.path());
let store_str = store.to_str().expect("store path is UTF-8").to_string();
eprintln!(
"[perf] generated 10k scale corpus at {} ({} .md files)",
store.display(),
count_md_files(&store)
);
let median = median_time(LOOP_ITERS, &|| {
vec![
"fm".into(),
"query".into(),
"status=active".into(),
"--type".into(),
"company".into(),
"--dir".into(),
store_str.clone(),
]
});
eprintln!("[perf] fm query --type company: median {median:?} (budget {BUDGET_FM_QUERY:?})");
assert_within_budget(
"fm query status=active --type company",
median,
BUDGET_FM_QUERY,
);
let median = median_time(LOOP_ITERS, &|| {
vec![
"search".into(),
"Kickoff".into(),
"--type".into(),
"email".into(),
"--dir".into(),
store_str.clone(),
]
});
eprintln!(
"[perf] search Kickoff --type email: median {median:?} (budget {BUDGET_SEARCH_TYPED:?})"
);
assert_within_budget("search Kickoff --type email", median, BUDGET_SEARCH_TYPED);
let median = median_time(LOOP_ITERS, &|| {
vec![
"log".into(),
"tail".into(),
"20".into(),
"--dir".into(),
store_str.clone(),
]
});
eprintln!("[perf] log tail 20: median {median:?} (budget {BUDGET_LOG_TAIL:?})");
assert_within_budget("log tail 20", median, BUDGET_LOG_TAIL);
let target = first_company_target(&store);
let median = median_time(LOOP_ITERS, &|| {
vec![
"graph".into(),
"backlinks".into(),
target.clone(),
"--dir".into(),
store_str.clone(),
]
});
eprintln!(
"[perf] graph backlinks (unscoped): median {median:?} (budget {BUDGET_GRAPH_UNSCOPED:?})"
);
assert_within_budget("graph backlinks (unscoped)", median, BUDGET_GRAPH_UNSCOPED);
let median = median_time(LOOP_ITERS, &|| {
vec![
"graph".into(),
"neighborhood".into(),
target.clone(),
"--hops".into(),
"1".into(),
"--dir".into(),
store_str.clone(),
]
});
eprintln!(
"[perf] graph neighborhood --hops 1: median {median:?} (budget {BUDGET_GRAPH_UNSCOPED:?})"
);
assert_within_budget("graph neighborhood --hops 1", median, BUDGET_GRAPH_UNSCOPED);
const GROWN_CHANGED: usize = 250;
let validate_store = tmp.path().join("validate-working-target");
copy_dir_all(&store, &validate_store);
grow_changed_set(&validate_store, GROWN_CHANGED);
let validate_str = validate_store
.to_str()
.expect("validate-working path is UTF-8")
.to_string();
let median = median_time(LOOP_ITERS, &|| {
vec![
"validate".into(),
"--since".into(),
"2020-01-01".into(),
validate_str.clone(),
]
});
eprintln!(
"[perf] validate (working set, ~{GROWN_CHANGED} changed): median {median:?} \
(budget {BUDGET_VALIDATE_WORKING:?})"
);
assert_within_budget(
"validate (working set, grown changed set)",
median,
BUDGET_VALIDATE_WORKING,
);
let median = median_time(SWEEP_ITERS, &|| {
vec!["validate".into(), "--all".into(), store_str.clone()]
});
eprintln!("[perf] validate --all: median {median:?} (budget {BUDGET_VALIDATE_ALL:?})");
assert_within_budget("validate --all", median, BUDGET_VALIDATE_ALL);
let median = median_time(SWEEP_ITERS, &|| vec!["stats".into(), store_str.clone()]);
eprintln!("[perf] stats: median {median:?} (budget {BUDGET_STATS:?})");
assert_within_budget("stats", median, BUDGET_STATS);
let rebuild_store = tmp.path().join("rebuild-target");
copy_dir_all(&store, &rebuild_store);
let rebuild_str = rebuild_store
.to_str()
.expect("rebuild path is UTF-8")
.to_string();
let median = median_time(SWEEP_ITERS, &|| {
vec![
"index".into(),
"rebuild".into(),
"--dir".into(),
rebuild_str.clone(),
]
});
eprintln!("[perf] index rebuild: median {median:?} (budget {BUDGET_INDEX_REBUILD:?})");
assert_within_budget("index rebuild", median, BUDGET_INDEX_REBUILD);
}