use std::time::Instant;
use tari_ootle_transaction::args;
use tari_template_test_tooling::TemplateTest;
const CRATE_PATH: &str = env!("CARGO_MANIFEST_DIR");
const TEMPLATE: &str = "tests/templates/metering_bench";
const INNER: f64 = 64.0;
const R1: u64 = 35_000;
const R2: u64 = 70_000;
const TRIALS: usize = 9;
struct Bench {
label: &'static str,
func: &'static str,
current: u64,
float: bool,
}
const fn b(label: &'static str, func: &'static str, current: u64, float: bool) -> Bench {
Bench {
label,
func,
current,
float,
}
}
fn main() {
let benches = [
b("i64.and (ref)", "bench_and", 1, false),
b("i64.add", "bench_add", 1, false),
b("i64.sub", "bench_sub", 1, false),
b("i64.or", "bench_or", 1, false),
b("i64.xor", "bench_xor", 1, false),
b("i64.shl", "bench_shl", 1, false),
b("i64.mul", "bench_mul", 1, false),
b("i64.div_u", "bench_div_u64", 1, false),
b("i64.div_s", "bench_div_s64", 1, false),
b("i64.rem_u", "bench_rem_u64", 1, false),
b("i32.div_u", "bench_div_u32", 1, false),
b("f64.add", "bench_fadd", 4, true),
b("f64.mul", "bench_fmul", 4, true),
b("f64.div", "bench_fdiv", 4, true),
];
eprintln!("Compiling metering_bench template and warming up...");
let mut test = TemplateTest::new(CRATE_PATH, [TEMPLATE]);
let step_ns = |test: &mut TemplateTest, func: &str| -> f64 {
let time = |test: &mut TemplateTest, rounds: u64| -> f64 {
let _: u64 = test.call_function("MeteringBench", func, args![rounds], vec![]);
let mut best = f64::MAX;
for _ in 0..TRIALS {
let start = Instant::now();
let _: u64 = test.call_function("MeteringBench", func, args![rounds], vec![]);
best = best.min(start.elapsed().as_nanos() as f64);
}
best
};
let slope = (time(test, R2) - time(test, R1)) / (R2 - R1) as f64;
slope / INNER
};
let noop = step_ns(&mut test, "bench_noop");
let fnoop = step_ns(&mut test, "bench_fnoop");
let cheap_op_ns = (step_ns(&mut test, "bench_cheap8") - noop) / 8.0;
println!();
println!("Per-op cost (lower bound; dependent-chain latency). 1 cheap op ≈ {cheap_op_ns:.3} ns");
println!();
println!(
"{:<14} {:>10} {:>10} {:>9} {:>10}",
"op", "ns/op", "x cheap", "current", "suggested"
);
println!("{}", "-".repeat(57));
for bench in &benches {
let step = step_ns(&mut test, bench.func);
let op_ns = (if bench.float { step - fnoop } else { step - noop }).max(0.0);
let ratio = op_ns / cheap_op_ns;
let suggested = ratio.round().max(1.0) as u64;
println!(
"{:<14} {:>10.3} {:>10.1} {:>9} {:>10}",
bench.label, op_ns, ratio, bench.current, suggested
);
}
println!();
println!("Notes:");
println!("- Ops at ~1x are below the single-op timing noise floor; they are priced at the 1-point");
println!(" minimum. The signal is reliable for the expensive ops (div/rem/mul/float).");
println!("- Run on your slowest validator class: division's ratio is much higher on x86 (~30-90");
println!(" cycles) than on Apple Silicon (~2-9), so re-cost against the worst case.");
println!("- Numbers are dependent-chain latency (worst case a busy loop can realise), the");
println!(" conservative figure to price against.");
println!("- f64.sqrt and memory/cache costs are not covered here and need separate measurement.");
}