use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use seq_geom_parser::{parse_geometry, CompiledGeom};
fn make_read(len: usize, seed: u8) -> Vec<u8> {
let bases = [b'A', b'C', b'G', b'T'];
(0..len)
.map(|i| bases[((i as u8).wrapping_add(seed)) as usize % 4])
.collect()
}
fn make_flex_v1_r2(sample_bc: &[u8; 8]) -> Vec<u8> {
let mut r2 = make_read(50, 42); r2.extend_from_slice(&[b'N'; 18]); r2.extend_from_slice(sample_bc);
r2.extend_from_slice(&[b'N'; 10]); r2
}
fn make_flex_v2_r1(gap_len: usize, anchor: &[u8], sample_bc: &[u8; 10]) -> Vec<u8> {
let mut r1 = make_read(16, 1); r1.extend(make_read(12, 2)); r1.extend(vec![b'N'; gap_len]); r1.extend_from_slice(anchor); r1.extend_from_slice(sample_bc);
r1.extend(vec![b'N'; 20]); r1
}
fn make_boundary_resolved_r1(prefix_len: usize, anchor: &[u8], barcode: &[u8]) -> Vec<u8> {
let mut r1 = make_read(prefix_len, 7);
r1.extend_from_slice(anchor);
r1.extend_from_slice(barcode);
r1
}
fn bench_parse(c: &mut Criterion) {
let mut group = c.benchmark_group("parse");
let geometries = [
("chromium_v3", "1{b[16]u[12]x:}2{r:}"),
("flex_v1", "1{b[16]u[12]x:}2{r[50]x[18]s[8]x:}"),
("flex_v2", "1{b[16]u[12]x[0-3]f[TTGCTAGGACCG]s[10]x:}2{r:}"),
(
"flex_v2_hamming",
"1{b[16]u[12]x[0-3]hamming(f[TTGCTAGGACCG],1)s[10]x:}2{r:}",
),
];
for (name, geom_str) in &geometries {
group.bench_with_input(BenchmarkId::new("parse", name), geom_str, |b, g| {
b.iter(|| parse_geometry(black_box(g)).unwrap());
});
}
group.finish();
}
fn bench_compile(c: &mut Criterion) {
let mut group = c.benchmark_group("compile");
let geometries = [
("chromium_v3", "1{b[16]u[12]x:}2{r:}"),
("flex_v1", "1{b[16]u[12]x:}2{r[50]x[18]s[8]x:}"),
("flex_v2", "1{b[16]u[12]x[0-3]f[TTGCTAGGACCG]s[10]x:}2{r:}"),
];
for (name, geom_str) in &geometries {
let geom = parse_geometry(geom_str).unwrap();
group.bench_with_input(BenchmarkId::new("compile", name), &geom, |b, g| {
b.iter(|| CompiledGeom::from_fragment_geom(black_box(g)).unwrap());
});
}
group.finish();
}
fn bench_extract_fixed(c: &mut Criterion) {
let mut group = c.benchmark_group("extract_fixed");
group.throughput(Throughput::Elements(1));
let geom = parse_geometry("1{b[16]u[12]x:}2{r:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
let r1 = make_read(150, 1);
let r2 = make_read(150, 2);
group.bench_function("chromium_v3", |b| {
b.iter(|| compiled.extract(black_box(&r1), black_box(&r2)));
});
let geom = parse_geometry("1{b[16]u[12]x:}2{r[50]x[18]s[8]x:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
let r1 = make_read(150, 1);
let r2 = make_flex_v1_r2(b"ACTTTAGG");
group.bench_function("flex_v1", |b| {
b.iter(|| compiled.extract(black_box(&r1), black_box(&r2)));
});
group.finish();
}
fn bench_extract_anchor(c: &mut Criterion) {
let mut group = c.benchmark_group("extract_anchor");
group.throughput(Throughput::Elements(1));
let anchor = b"TTGCTAGGACCG";
let sample_bc = b"SAMPLEBC10";
let geom = parse_geometry("1{b[16]u[12]x[0-3]f[TTGCTAGGACCG]s[10]x:}2{r:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
let r2 = make_read(150, 2);
for gap in [0, 1, 2, 3] {
let r1 = make_flex_v2_r1(gap, anchor, sample_bc);
group.bench_with_input(
BenchmarkId::new("exact_gap", gap),
&(r1, r2.clone()),
|b, (r1, r2)| {
b.iter(|| compiled.extract(black_box(r1), black_box(r2)));
},
);
}
let geom = parse_geometry("1{b[16]u[12]x[0-3]hamming(f[TTGCTAGGACCG],1)s[10]x:}2{r:}").unwrap();
let compiled_h1 = CompiledGeom::from_fragment_geom(&geom).unwrap();
let r1_exact = make_flex_v2_r1(2, anchor, sample_bc);
group.bench_function("hamming1_exact", |b| {
b.iter(|| compiled_h1.extract(black_box(&r1_exact), black_box(&r2)));
});
let mut anchor_mut = anchor.to_vec();
anchor_mut[11] = b'A'; let r1_mismatch = make_flex_v2_r1(2, &anchor_mut, sample_bc);
group.bench_function("hamming1_mismatch", |b| {
b.iter(|| compiled_h1.extract(black_box(&r1_mismatch), black_box(&r2)));
});
let mut anchor_early_reject = anchor.to_vec();
anchor_early_reject[0] = b'C';
anchor_early_reject[1] = b'A';
let r1_early_reject = make_flex_v2_r1(2, &anchor_early_reject, sample_bc);
group.bench_function("hamming1_early_reject", |b| {
b.iter(|| compiled_h1.extract(black_box(&r1_early_reject), black_box(&r2)));
});
let r1_nofind = make_read(80, 99);
group.bench_function("anchor_not_found", |b| {
b.iter(|| compiled.extract(black_box(&r1_nofind), black_box(&r2)));
});
group.finish();
}
fn bench_extract_boundary_resolved(c: &mut Criterion) {
let mut group = c.benchmark_group("extract_boundary_resolved");
group.throughput(Throughput::Elements(1));
let geom = parse_geometry("1{r:f[ACAGT]b[9-11]}2{u[12]x:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
let anchor = b"ACAGT";
let umi_r2 = b"TTTTTTTTTTTT";
for barcode_len in [9usize, 10, 11] {
let barcode = &b"BARCODE12345"[..barcode_len];
let r1 = make_boundary_resolved_r1(24, anchor, barcode);
let mut r2 = Vec::new();
r2.extend_from_slice(umi_r2);
r2.extend_from_slice(b"tail");
group.bench_with_input(
BenchmarkId::new("single_anchor_suffix_bc", barcode_len),
&(r1, r2),
|b, (r1, r2)| {
b.iter(|| compiled.extract(black_box(r1), black_box(r2)));
},
);
}
let repeated_anchor_r1 = {
let mut r1 = make_read(20, 11);
r1.extend_from_slice(anchor);
r1.extend_from_slice(b"ACGT");
r1.extend_from_slice(anchor);
r1.extend_from_slice(b"BARCODE09");
r1
};
let mut r2 = Vec::new();
r2.extend_from_slice(umi_r2);
r2.extend_from_slice(b"tail");
group.bench_function("repeated_anchor_tiebreak", |b| {
b.iter(|| compiled.extract(black_box(&repeated_anchor_r1), black_box(&r2)));
});
group.finish();
}
#[inline(never)]
fn hardcoded_chromium_v3_extract<'a>(
r1: &'a [u8],
r2: &'a [u8],
) -> (Option<&'a [u8]>, Option<&'a [u8]>, &'a [u8]) {
let bc_len = 16;
let umi_len = 12;
let barcode = if r1.len() >= bc_len {
Some(&r1[..bc_len])
} else {
None
};
let umi = if r1.len() >= bc_len + umi_len {
Some(&r1[bc_len..bc_len + umi_len])
} else {
None
};
(barcode, umi, r2)
}
#[inline(never)]
fn hardcoded_chromium_v2_extract<'a>(
r1: &'a [u8],
r2: &'a [u8],
) -> (Option<&'a [u8]>, Option<&'a [u8]>, &'a [u8]) {
let bc_len = 16;
let umi_len = 10;
let barcode = if r1.len() >= bc_len {
Some(&r1[..bc_len])
} else {
None
};
let umi = if r1.len() >= bc_len + umi_len {
Some(&r1[bc_len..bc_len + umi_len])
} else {
None
};
(barcode, umi, r2)
}
fn bench_hardcoded_vs_compiled(c: &mut Criterion) {
let mut group = c.benchmark_group("hardcoded_vs_compiled");
group.throughput(Throughput::Elements(1));
let r1 = make_read(150, 1);
let r2 = make_read(150, 2);
group.bench_function("hardcoded_v3", |b| {
b.iter(|| hardcoded_chromium_v3_extract(black_box(&r1), black_box(&r2)));
});
let geom_v3 = parse_geometry("1{b[16]u[12]x:}2{r:}").unwrap();
let compiled_v3 = CompiledGeom::from_fragment_geom(&geom_v3).unwrap();
group.bench_function("compiled_v3_dispatch", |b| {
b.iter(|| compiled_v3.extract(black_box(&r1), black_box(&r2)));
});
let simple_v3 = match &compiled_v3 {
CompiledGeom::Simple(ext) => ext,
_ => panic!("expected Simple variant for v3"),
};
group.bench_function("compiled_v3_direct", |b| {
b.iter(|| simple_v3.extract(black_box(&r1), black_box(&r2)));
});
group.bench_function("hardcoded_v2", |b| {
b.iter(|| hardcoded_chromium_v2_extract(black_box(&r1), black_box(&r2)));
});
let geom_v2 = parse_geometry("1{b[16]u[10]x:}2{r:}").unwrap();
let compiled_v2 = CompiledGeom::from_fragment_geom(&geom_v2).unwrap();
group.bench_function("compiled_v2_dispatch", |b| {
b.iter(|| compiled_v2.extract(black_box(&r1), black_box(&r2)));
});
let simple_v2 = match &compiled_v2 {
CompiledGeom::Simple(ext) => ext,
_ => panic!("expected Simple variant for v2"),
};
group.bench_function("compiled_v2_direct", |b| {
b.iter(|| simple_v2.extract(black_box(&r1), black_box(&r2)));
});
group.finish();
}
fn bench_hardcoded_vs_compiled_batch(c: &mut Criterion) {
let mut group = c.benchmark_group("hardcoded_vs_compiled_batch");
let batch_size = 100_000usize;
group.throughput(Throughput::Elements(batch_size as u64));
let reads: Vec<(Vec<u8>, Vec<u8>)> = (0..batch_size)
.map(|i| (make_read(150, i as u8), make_read(150, (i + 1) as u8)))
.collect();
group.bench_function("hardcoded_v3_100k", |b| {
b.iter(|| {
for (r1, r2) in &reads {
black_box(hardcoded_chromium_v3_extract(r1, r2));
}
});
});
let geom = parse_geometry("1{b[16]u[12]x:}2{r:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
group.bench_function("compiled_v3_100k", |b| {
b.iter(|| {
for (r1, r2) in &reads {
black_box(compiled.extract(r1, r2));
}
});
});
group.finish();
}
fn bench_extract_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("throughput");
let batch_size = 10_000usize;
group.throughput(Throughput::Elements(batch_size as u64));
let geom = parse_geometry("1{b[16]u[12]x:}2{r:}").unwrap();
let compiled = CompiledGeom::from_fragment_geom(&geom).unwrap();
let reads: Vec<(Vec<u8>, Vec<u8>)> = (0..batch_size)
.map(|i| (make_read(150, i as u8), make_read(150, (i + 1) as u8)))
.collect();
group.bench_function("chromium_v3_10k", |b| {
b.iter(|| {
for (r1, r2) in &reads {
black_box(compiled.extract(r1, r2));
}
});
});
let anchor = b"TTGCTAGGACCG";
let sample_bc = b"SAMPLEBC10";
let geom = parse_geometry("1{b[16]u[12]x[0-3]f[TTGCTAGGACCG]s[10]x:}2{r:}").unwrap();
let compiled_v2 = CompiledGeom::from_fragment_geom(&geom).unwrap();
let reads_v2: Vec<(Vec<u8>, Vec<u8>)> = (0..batch_size)
.map(|i| {
let gap = i % 4; (
make_flex_v2_r1(gap, anchor, sample_bc),
make_read(150, (i + 1) as u8),
)
})
.collect();
group.bench_function("flex_v2_anchor_10k", |b| {
b.iter(|| {
for (r1, r2) in &reads_v2 {
black_box(compiled_v2.extract(r1, r2));
}
});
});
group.finish();
}
criterion_group!(
benches,
bench_parse,
bench_compile,
bench_extract_fixed,
bench_extract_anchor,
bench_extract_boundary_resolved,
bench_hardcoded_vs_compiled,
bench_hardcoded_vs_compiled_batch,
bench_extract_throughput,
);
criterion_main!(benches);