use crate::core::{MemoryBudget, WorkingSet};
#[derive(Debug, Clone)]
pub struct BuildMemoryModel {
pub components: Vec<(String, u64)>,
pub total_bytes: u64,
}
impl BuildMemoryModel {
pub fn from_reference_len(n: u64) -> Self {
const SA_ELEM: u64 = 4; const USIZE: u64 = 8; let lms = n / 2; let comps: Vec<(&str, u64)> = vec![
("text(u32)", n.saturating_mul(SA_ELEM)),
("types(1B)", n),
("lms_positions(usize)", lms.saturating_mul(USIZE)),
("induce-sort suffix array(i32)", n.saturating_mul(SA_ELEM)),
("lms_in_sa_order(usize)", lms.saturating_mul(USIZE)),
("lms_name(u32)", n.saturating_mul(SA_ELEM)),
("reduced string(u32)", lms.saturating_mul(SA_ELEM)),
("returned suffix array(u32)", n.saturating_mul(SA_ELEM)),
("fm-index (bwt + rank + C-table)", n.saturating_mul(3)),
];
let level0: u64 = comps
.iter()
.fold(0u64, |acc, (_, b)| acc.saturating_add(*b));
let recursion = n.saturating_mul(15);
let mut components: Vec<(String, u64)> =
comps.into_iter().map(|(s, b)| (s.to_string(), b)).collect();
components.push(("recursion (geometric tail)".to_string(), recursion));
let total_bytes = level0.saturating_add(recursion);
Self {
components,
total_bytes,
}
}
pub fn working_set(&self) -> WorkingSet {
WorkingSet {
bytes: self.total_bytes,
}
}
pub fn render(&self, total_bp: u64) -> String {
let mut out = String::from("build memory model (n-scale peak set):\n");
let denom = total_bp.max(1);
for (name, bytes) in &self.components {
out.push_str(&format!(
" {name:<34} {:>6} MiB ({} B/base)\n",
bytes / (1 << 20),
bytes / denom
));
}
out.push_str(&format!(
" {:-<34} {:>6} MiB ({} B/base)\n",
"total ",
self.total_bytes / (1 << 20),
self.total_bytes / denom
));
out
}
}
#[derive(Debug, Clone)]
pub struct IndexBuildReport {
pub index_path: String,
pub contigs: Vec<(String, u32)>,
pub total_bp: u64,
pub reference_blake3: [u8; 32],
pub index_bytes: u64,
}
impl IndexBuildReport {
pub fn render(&self) -> String {
let mut out = String::new();
out.push_str(&format!("index: {}\n", self.index_path));
out.push_str(&format!(
"contigs: {} ({} bp total)\n",
self.contigs.len(),
self.total_bp
));
for (name, length) in &self.contigs {
out.push_str(&format!(" {name}\t{length}\n"));
}
out.push_str(&format!(
"reference_blake3: {}\n",
hex32(&self.reference_blake3)
));
out.push_str(&format!("index_bytes: {}\n", self.index_bytes));
out
}
}
pub fn render_plan_line(estimate: WorkingSet, budget: MemoryBudget) -> String {
let verdict = if estimate.fits(budget) { "OK" } else { "OVER" };
format!(
"plan: est. build peak ~{} MiB / budget {} MiB [{}]",
estimate.bytes / (1 << 20),
budget.bytes / (1 << 20),
verdict
)
}
fn hex32(bytes: &[u8; 32]) -> String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = vec![0u8; 64];
for (i, b) in bytes.iter().enumerate() {
out[2 * i] = HEX[(b >> 4) as usize];
out[2 * i + 1] = HEX[(b & 0x0f) as usize];
}
String::from_utf8(out).expect("hex digits are valid ASCII")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn model_envelopes_the_d0_measured_realized_peak() {
let n = 100_000_000u64; let total = BuildMemoryModel::from_reference_len(n).total_bytes;
assert!(
total >= 41 * n,
"model {} B/base must envelope the D0-measured 41 B/base realized peak",
total / n
);
assert!(
total <= 50 * n,
"model {} B/base is implausibly large (formula drift?)",
total / n
);
assert_eq!(
BuildMemoryModel::from_reference_len(n).working_set().bytes,
total
);
}
#[test]
fn receipt_render_is_deterministic_and_contains_fields() {
let report = IndexBuildReport {
index_path: "ref.idx".to_string(),
contigs: vec![("chr1".to_string(), 100), ("chr2".to_string(), 50)],
total_bp: 150,
reference_blake3: [0xab; 32],
index_bytes: 4096,
};
let a = report.render();
assert_eq!(a, report.render(), "render must be deterministic");
assert!(a.contains("index: ref.idx"));
assert!(a.contains("contigs: 2 (150 bp total)"));
assert!(a.contains(" chr1\t100"));
assert!(a.contains(" chr2\t50"));
assert!(a.contains(&format!("reference_blake3: {}", "ab".repeat(32))));
assert!(a.contains("index_bytes: 4096"));
}
#[test]
fn plan_line_reports_ok_and_over() {
let budget = MemoryBudget::from_mb(100);
let under = WorkingSet {
bytes: 50 * (1 << 20),
};
let over = WorkingSet {
bytes: 200 * (1 << 20),
};
assert!(render_plan_line(under, budget).ends_with("[OK]"));
assert!(render_plan_line(over, budget).ends_with("[OVER]"));
assert!(render_plan_line(under, budget).contains("budget 100 MiB"));
}
#[test]
fn build_model_breaks_down_and_sums() {
let m = BuildMemoryModel::from_reference_len(1_000_000);
let sum: u64 = m.components.iter().map(|(_, b)| b).sum();
assert_eq!(sum, m.total_bytes, "components must sum to total");
let names: Vec<&str> = m.components.iter().map(|(n, _)| n.as_str()).collect();
for needed in ["text(u32)", "suffix array", "lms_name"] {
assert!(
names.iter().any(|n| n.contains(needed)),
"missing component {needed}: {names:?}"
);
}
assert!(
m.total_bytes >= 20_000_000,
"model must be ≥20 B/base (~{} B/base)",
m.total_bytes / 1_000_000
);
}
#[test]
fn build_model_is_monotonic_and_overflow_safe() {
assert!(
BuildMemoryModel::from_reference_len(2_000_000).total_bytes
> BuildMemoryModel::from_reference_len(1_000_000).total_bytes
);
let _ = BuildMemoryModel::from_reference_len(u64::MAX); }
}