use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use memspan::{Needles, skip};
use std::hint::black_box;
const MICRO_LENGTHS: [usize; 8] = [15, 16, 17, 32, 64, 256, 4 * 1024, 64 * 1024];
const WORKLOAD_LENGTHS: [usize; 2] = [8 * 1024, 128 * 1024];
const DENSITY_GAPS: [usize; 5] = [4, 16, 64, 256, 1024];
fn input_with_match_at_end(len: usize, fill: u8, needle: u8) -> Vec<u8> {
let mut input = vec![fill; len];
if let Some(last) = input.last_mut() {
*last = needle;
}
input
}
fn input_with_periodic_match(len: usize, fill: u8, needle: u8, gap: usize) -> Vec<u8> {
assert!(gap >= 1, "gap must be at least 1");
let mut input = vec![fill; len];
let mut pos = gap.saturating_sub(1);
while pos < len {
input[pos] = needle;
pos += gap;
}
input
}
fn repeat_to_len(fragment: &[u8], target_len: usize) -> Vec<u8> {
let mut input = Vec::with_capacity(target_len + fragment.len());
while input.len() < target_len {
input.extend_from_slice(fragment);
}
input.truncate(target_len);
input
}
fn scalar_tail_find<Nd>(input: &[u8], needles: Nd) -> Option<usize>
where
Nd: Needles,
{
if needles.is_empty() {
return None;
}
needles.tail_find(input)
}
fn scan_all_dispatch<Nd>(input: &[u8], needles: Nd) -> usize
where
Nd: Needles + Copy,
{
let mut pos = 0;
let mut checksum = 0usize;
while pos < input.len() {
let Some(hit) = skip::skip_until(&input[pos..], needles) else {
break;
};
pos += hit;
checksum = checksum.wrapping_add(pos);
pos += 1;
}
checksum
}
fn scan_all_scalar<Nd>(input: &[u8], needles: Nd) -> usize
where
Nd: Needles + Copy,
{
let mut pos = 0;
let mut checksum = 0usize;
while pos < input.len() {
let Some(hit) = scalar_tail_find(&input[pos..], needles) else {
break;
};
pos += hit;
checksum = checksum.wrapping_add(pos);
pos += 1;
}
checksum
}
fn bench_micro_end_match(c: &mut Criterion) {
const NEEDLES: [u8; 5] = [b' ', b'\t', b'\r', b'\n', b','];
let mut group = c.benchmark_group("skip_until/micro/end_match");
for len in MICRO_LENGTHS {
let input = input_with_match_at_end(len, b'a', b'\n');
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(
BenchmarkId::new("dispatch_fixed", len),
&input,
|b, input| {
b.iter(|| {
black_box(skip::skip_until(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("scalar_tail_find", len),
&input,
|b, input| {
b.iter(|| {
black_box(scalar_tail_find(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
}
group.finish();
}
fn bench_density_sweep(c: &mut Criterion) {
const NEEDLES: [u8; 5] = [b' ', b'\t', b'\r', b'\n', b','];
const LEN: usize = 64 * 1024;
let mut group = c.benchmark_group("skip_until/density_sweep");
for gap in DENSITY_GAPS {
let input = input_with_periodic_match(LEN, b'a', b'\n', gap);
group.throughput(Throughput::Bytes(LEN as u64));
group.bench_with_input(
BenchmarkId::new("dispatch_fixed", gap),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("scalar_tail_find", gap),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_scalar(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
}
group.finish();
}
fn bench_graphql_ignored_workload(c: &mut Criterion) {
const NEEDLES: [u8; 5] = [b' ', b'\t', b'\r', b'\n', b','];
const FRAGMENT: &[u8] = br#"
query GetUser($id: ID!, $limit: Int = 10) {
user(id: $id) {
id
name
friends(first: $limit, after: null) {
nodes {
id
name
profile { avatarUrl(size: 64), bio }
}
}
}
}
fragment UserFields on User {
id
name
email
}
"#;
let mut group = c.benchmark_group("skip_until/workload/graphql_ignored_scan_all");
for len in WORKLOAD_LENGTHS {
let input = repeat_to_len(FRAGMENT, len);
let dynamic_needles: &[u8] = &NEEDLES;
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(
BenchmarkId::new("dispatch_fixed", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("dispatch_dynamic", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(dynamic_needles),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("scalar_tail_find", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_scalar(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
}
group.finish();
}
fn bench_quote_or_escape_workload(c: &mut Criterion) {
const NEEDLES: [u8; 2] = [b'"', b'\\'];
const FRAGMENT: &[u8] =
br#"name: "Ada Lovelace", bio: "first programmer \"notes\" \\ archive", city: "London"
"#;
let mut group = c.benchmark_group("skip_until/workload/quote_or_escape_scan_all");
for len in WORKLOAD_LENGTHS {
let input = repeat_to_len(FRAGMENT, len);
let dynamic_needles: &[u8] = &NEEDLES;
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(
BenchmarkId::new("dispatch_fixed", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("dispatch_dynamic", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(dynamic_needles),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("scalar_tail_find", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_scalar(
black_box(input.as_slice()),
black_box(NEEDLES),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("memchr2_direct", len),
&input,
|b, input| {
b.iter(|| {
let buf: &[u8] = black_box(input.as_slice());
let mut pos = 0;
let mut checksum = 0usize;
while pos < buf.len() {
let Some(hit) = memchr::memchr2(b'"', b'\\', &buf[pos..]) else {
break;
};
pos += hit;
checksum = checksum.wrapping_add(pos);
pos += 1;
}
black_box(checksum)
})
},
);
}
group.finish();
}
fn bench_comment_newline_workload(c: &mut Criterion) {
const FRAGMENT: &[u8] = br#"# warm cache comment with enough text to scan before newline
# another GraphQL comment that ends at the next line terminator
# short
"#;
let mut group = c.benchmark_group("skip_until/workload/comment_newline_scan_all");
for len in WORKLOAD_LENGTHS {
let input = repeat_to_len(FRAGMENT, len);
let dynamic_needle: &[u8] = b"\n";
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("dispatch_u8", len), &input, |b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(b'\n'),
))
})
});
group.bench_with_input(
BenchmarkId::new("dispatch_dynamic", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_dispatch(
black_box(input.as_slice()),
black_box(dynamic_needle),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("scalar_tail_find", len),
&input,
|b, input| {
b.iter(|| {
black_box(scan_all_scalar(
black_box(input.as_slice()),
black_box(b'\n'),
))
})
},
);
group.bench_with_input(
BenchmarkId::new("memchr_direct", len),
&input,
|b, input| {
b.iter(|| {
let buf: &[u8] = black_box(input.as_slice());
let mut pos = 0;
let mut checksum = 0usize;
while pos < buf.len() {
let Some(hit) = memchr::memchr(b'\n', &buf[pos..]) else {
break;
};
pos += hit;
checksum = checksum.wrapping_add(pos);
pos += 1;
}
black_box(checksum)
})
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_micro_end_match,
bench_density_sweep,
bench_graphql_ignored_workload,
bench_quote_or_escape_workload,
bench_comment_newline_workload
);
criterion_main!(benches);