mod bench_support;
use bench_support::{
BenchHost, Scenario, benchmark_program, benchmark_surface, linked_benchmark_program,
projected_bindings, seeded_state_for,
};
use lashlang::{
CompiledProcessCache, CompiledProgramCache, ExecutionEnvironment, ExecutionOutcome,
ExecutionScratch, InMemoryLashlangArtifactStore, LashlangArtifactStore, LinkedProgramCache,
ProjectedBindings, State, compile_linked, execute, prewarm,
};
use std::alloc::{GlobalAlloc, Layout, System};
use std::env;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Instant;
#[global_allocator]
static ALLOCATOR: CountingAllocator = CountingAllocator;
static ALLOCATED_BYTES: AtomicU64 = AtomicU64::new(0);
static LIVE_BYTES: AtomicU64 = AtomicU64::new(0);
static PEAK_LIVE_BYTES: AtomicU64 = AtomicU64::new(0);
static ALLOCATIONS: AtomicU64 = AtomicU64::new(0);
static DEALLOCATIONS: AtomicU64 = AtomicU64::new(0);
struct CountingAllocator;
unsafe impl GlobalAlloc for CountingAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let ptr = unsafe { System.alloc(layout) };
if !ptr.is_null() {
record_alloc(layout.size() as u64);
}
ptr
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
unsafe { System.dealloc(ptr, layout) };
DEALLOCATIONS.fetch_add(1, Ordering::Relaxed);
record_dealloc(layout.size() as u64);
}
unsafe fn realloc(&self, ptr: *mut u8, old_layout: Layout, new_size: usize) -> *mut u8 {
let ptr = unsafe { System.realloc(ptr, old_layout, new_size) };
if ptr.is_null() {
return ptr;
}
let old_size = old_layout.size() as u64;
let new_size = new_size as u64;
if new_size > old_size {
record_alloc(new_size - old_size);
} else {
record_dealloc(old_size - new_size);
}
ptr
}
}
fn record_alloc(bytes: u64) {
ALLOCATIONS.fetch_add(1, Ordering::Relaxed);
ALLOCATED_BYTES.fetch_add(bytes, Ordering::Relaxed);
let live = LIVE_BYTES.fetch_add(bytes, Ordering::Relaxed) + bytes;
let mut peak = PEAK_LIVE_BYTES.load(Ordering::Relaxed);
while live > peak {
match PEAK_LIVE_BYTES.compare_exchange_weak(
peak,
live,
Ordering::Relaxed,
Ordering::Relaxed,
) {
Ok(_) => break,
Err(next) => peak = next,
}
}
}
fn record_dealloc(bytes: u64) {
let _ = LIVE_BYTES.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |live| {
Some(live.saturating_sub(bytes))
});
}
#[derive(Clone, Copy, Debug)]
enum Mode {
OneShot,
PrewarmedOneShot,
LinkArtifact,
CompiledExecute,
Snapshot,
ArtifactRoundtrip,
CompiledProcessCache,
CompiledProgramCache,
LinkedProgramCache,
}
fn main() {
let mut args = env::args().skip(1);
if matches!(args.next().as_deref(), Some("--list-scenarios")) {
for scenario in Scenario::ALL {
println!("{scenario}");
}
return;
}
let mut args = env::args().skip(1);
let mode = args
.next()
.as_deref()
.map(parse_mode)
.unwrap_or(Mode::OneShot);
let scenario_arg = args.next();
let iterations = args
.next()
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(match mode {
Mode::OneShot | Mode::PrewarmedOneShot => 25_000,
Mode::CompiledExecute | Mode::Snapshot | Mode::CompiledProcessCache => 100_000,
Mode::LinkArtifact => 25_000,
Mode::ArtifactRoundtrip => 10_000,
Mode::CompiledProgramCache | Mode::LinkedProgramCache => 25_000,
});
let scenarios = parse_scenarios(scenario_arg.as_deref());
for (index, scenario) in scenarios.iter().copied().enumerate() {
if index > 0 {
println!();
}
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("tokio runtime");
run_perf(&rt, mode, scenario, iterations);
}
}
fn run_perf(rt: &tokio::runtime::Runtime, mode: Mode, scenario: Scenario, iterations: usize) {
let source = benchmark_program(scenario);
let projected = projected_bindings(scenario);
let host = BenchHost;
let mut scratch = ExecutionScratch::new();
let mut process_cache_stats = None;
let mut program_cache_stats = None;
let mut linked_cache_stats = None;
let mut artifact_bytes = None;
reset_alloc_counters();
let mut started = Instant::now();
match mode {
Mode::OneShot => {
for _ in 0..iterations {
let mut state = seeded_state_for(scenario);
let mut scratch = ExecutionScratch::new();
let linked = linked_benchmark_program(std::hint::black_box(source.as_str()));
let compiled = compile_linked(&linked);
let outcome =
execute_benchmark(rt, &compiled, &mut state, &host, &mut scratch, &projected);
expect_finished(outcome);
}
}
Mode::PrewarmedOneShot => {
prewarm();
reset_alloc_counters();
started = Instant::now();
for _ in 0..iterations {
let mut state = seeded_state_for(scenario);
let mut scratch = ExecutionScratch::new();
let linked = linked_benchmark_program(std::hint::black_box(source.as_str()));
let compiled = compile_linked(&linked);
let outcome =
execute_benchmark(rt, &compiled, &mut state, &host, &mut scratch, &projected);
expect_finished(outcome);
}
}
Mode::LinkArtifact => {
for _ in 0..iterations {
let linked = linked_benchmark_program(std::hint::black_box(source.as_str()));
std::hint::black_box((&linked.module_ref, &linked.required_surface_ref));
}
}
Mode::CompiledExecute => {
let linked = linked_benchmark_program(source.as_str());
let compiled = compile_linked(&linked);
for _ in 0..iterations {
let mut state = seeded_state_for(scenario);
let outcome =
execute_benchmark(rt, &compiled, &mut state, &host, &mut scratch, &projected);
expect_finished(outcome);
}
}
Mode::Snapshot => {
let linked = linked_benchmark_program(source.as_str());
let compiled = compile_linked(&linked);
for _ in 0..iterations {
let mut state = seeded_state_for(scenario);
let snapshot = state.snapshot();
let encoded = serde_json::to_vec(&snapshot).expect("snapshot encode");
let decoded = serde_json::from_slice(&encoded).expect("snapshot decode");
state = State::from_snapshot(decoded);
let outcome =
execute_benchmark(rt, &compiled, &mut state, &host, &mut scratch, &projected);
expect_finished(outcome);
}
}
Mode::ArtifactRoundtrip => {
let linked = linked_benchmark_program(source.as_str());
artifact_bytes = Some(
linked
.artifact
.to_store_bytes()
.expect("artifact should encode")
.len(),
);
let store = InMemoryLashlangArtifactStore::new();
for _ in 0..iterations {
rt.block_on(store.put_module_artifact(&linked.artifact))
.expect("artifact store put should succeed");
let artifact = rt
.block_on(store.get_module_artifact(&linked.module_ref))
.expect("artifact store get should succeed")
.expect("artifact should exist");
std::hint::black_box(artifact);
}
}
Mode::CompiledProcessCache => {
let linked = linked_benchmark_program(source.as_str());
let process_ref = linked
.artifact
.process_ref("echo")
.expect("benchmark module should export echo process")
.clone();
let mut cache = CompiledProcessCache::new();
for _ in 0..iterations {
let compiled = cache
.get_or_compile(&linked.artifact, &process_ref, &linked.required_surface_ref)
.expect("process cache compile should succeed");
std::hint::black_box(compiled.compile_stats());
}
process_cache_stats = Some(cache.stats());
}
Mode::CompiledProgramCache => {
let mut cache = CompiledProgramCache::new();
for _ in 0..iterations {
let compiled = cache
.get_or_compile(std::hint::black_box(source.as_str()))
.expect("program cache compile should succeed");
std::hint::black_box(compiled.compile_stats());
}
program_cache_stats = Some(cache.stats());
}
Mode::LinkedProgramCache => {
let mut cache = LinkedProgramCache::new();
let surface = benchmark_surface();
for _ in 0..iterations {
let compiled = cache
.get_or_compile(std::hint::black_box(source.as_str()), surface)
.expect("linked program cache compile should succeed");
std::hint::black_box(compiled.compiled_program().compile_stats());
}
linked_cache_stats = Some(cache.stats());
}
}
let elapsed = started.elapsed();
let allocs = alloc_snapshot();
println!("lashlang perf");
println!("mode: {mode:?}");
println!("scenario: {scenario}");
println!("iterations: {iterations}");
println!("program_bytes: {}", source.len());
if let Some(bytes) = artifact_bytes {
println!("artifact_bytes: {bytes}");
}
println!("elapsed_ms: {:.3}", elapsed.as_secs_f64() * 1_000.0);
println!(
"ns_per_iter: {:.1}",
elapsed.as_nanos() as f64 / iterations as f64
);
println!("allocations: {}", allocs.allocations);
println!("deallocations: {}", allocs.deallocations);
println!("allocated_bytes: {}", allocs.allocated_bytes);
println!(
"allocations_per_iter: {:.3}",
allocs.allocations as f64 / iterations as f64
);
println!(
"allocated_bytes_per_iter: {:.1}",
allocs.allocated_bytes as f64 / iterations as f64
);
println!("peak_live_bytes: {}", allocs.peak_live_bytes);
if let Some(stats) = process_cache_stats {
println!("process_cache_hits: {}", stats.hits);
println!("process_cache_misses: {}", stats.misses);
println!("process_cache_evictions: {}", stats.evictions);
println!("process_cache_entries: {}", stats.entries);
}
if let Some(stats) = program_cache_stats {
println!("program_cache_hits: {}", stats.hits);
println!("program_cache_misses: {}", stats.misses);
println!("program_cache_evictions: {}", stats.evictions);
println!("program_cache_entries: {}", stats.entries);
}
if let Some(stats) = linked_cache_stats {
println!("linked_cache_hits: {}", stats.hits);
println!("linked_cache_misses: {}", stats.misses);
println!("linked_cache_evictions: {}", stats.evictions);
println!("linked_cache_entries: {}", stats.entries);
}
}
fn execute_benchmark(
rt: &tokio::runtime::Runtime,
compiled: &lashlang::CompiledProgram,
state: &mut State,
host: &BenchHost,
scratch: &mut ExecutionScratch,
projected: &ProjectedBindings,
) -> ExecutionOutcome {
let env = ExecutionEnvironment::new(host)
.with_scratch(std::mem::take(scratch))
.with_projected_bindings(projected.clone());
let outcome = rt
.block_on(execute(compiled, state, &env))
.expect("benchmark execution should succeed");
*scratch = env.take_recycled_scratch().unwrap_or_default();
outcome
}
fn parse_scenarios(value: Option<&str>) -> Vec<Scenario> {
match value {
Some("all") => Scenario::ALL.to_vec(),
Some(value) => vec![Scenario::parse(value).unwrap_or_else(|| {
panic!(
"unknown scenario `{value}`; expected {}",
Scenario::expected_values()
)
})],
None => vec![Scenario::Baseline],
}
}
fn parse_mode(value: &str) -> Mode {
match value {
"one_shot" => Mode::OneShot,
"prewarmed_one_shot" => Mode::PrewarmedOneShot,
"link_artifact" => Mode::LinkArtifact,
"compiled_execute" => Mode::CompiledExecute,
"snapshot" => Mode::Snapshot,
"artifact_roundtrip" => Mode::ArtifactRoundtrip,
"compiled_process_cache" => Mode::CompiledProcessCache,
"compiled_program_cache" => Mode::CompiledProgramCache,
"linked_program_cache" => Mode::LinkedProgramCache,
other => panic!(
"unknown mode `{other}`; expected one_shot, prewarmed_one_shot, link_artifact, compiled_execute, snapshot, artifact_roundtrip, compiled_process_cache, compiled_program_cache, or linked_program_cache"
),
}
}
fn reset_alloc_counters() {
ALLOCATED_BYTES.store(0, Ordering::Relaxed);
LIVE_BYTES.store(0, Ordering::Relaxed);
PEAK_LIVE_BYTES.store(0, Ordering::Relaxed);
ALLOCATIONS.store(0, Ordering::Relaxed);
DEALLOCATIONS.store(0, Ordering::Relaxed);
}
fn alloc_snapshot() -> AllocSnapshot {
AllocSnapshot {
allocated_bytes: ALLOCATED_BYTES.load(Ordering::Relaxed),
peak_live_bytes: PEAK_LIVE_BYTES.load(Ordering::Relaxed),
allocations: ALLOCATIONS.load(Ordering::Relaxed),
deallocations: DEALLOCATIONS.load(Ordering::Relaxed),
}
}
struct AllocSnapshot {
allocated_bytes: u64,
peak_live_bytes: u64,
allocations: u64,
deallocations: u64,
}
fn expect_finished(outcome: ExecutionOutcome) {
let ExecutionOutcome::Finished(value) = outcome else {
panic!("benchmark program must finish");
};
std::hint::black_box(value);
}