use std::hint::black_box;
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use geo_types::Point;
use mlt_core::encoder::{
Codecs, Encoder, EncoderConfig, ExplicitEncoder, IntEncoder, LogicalEncoder, PhysicalEncoder,
Presence, StagedId, StagedLayer, StagedProperty, StagedSharedDict, StrEncoding,
};
use mlt_core::test_helpers::{dec, parser};
use mlt_core::{GeometryValues, LendingIterator, ParsedLayer01, PropValueRef};
use strum::IntoEnumIterator as _;
#[cfg(debug_assertions)]
pub const BENCHMARKED_LENGTHS: [usize; 1] = [1];
#[cfg(not(debug_assertions))]
pub const BENCHMARKED_LENGTHS: [usize; 6] = [1, 20, 64, 256, 1024, 2048];
fn limit<T>(values: impl Iterator<Item = T>) -> impl Iterator<Item = T> {
if cfg!(debug_assertions) {
values.take(1)
} else {
values.take(usize::MAX)
}
}
fn make_strings(n: usize) -> Vec<String> {
const VOCAB: &[&str] = &[
"highway",
"residential",
"motorway",
"primary",
"secondary",
"tertiary",
"water",
"forest",
"park",
"building",
"amenity",
"shop",
"landuse",
"natural",
"place",
"boundary",
];
black_box(
(0..n)
.map(|i| {
let idx = i % VOCAB.len();
if i.is_multiple_of(4) {
VOCAB[idx].to_string()
} else {
format!("{}_{}", VOCAB[idx], i % 32)
}
})
.collect(),
)
}
fn make_nullable_strings(n: usize) -> Vec<Option<String>> {
black_box(
make_strings(n)
.into_iter()
.enumerate()
.map(|(i, s)| if i.is_multiple_of(5) { None } else { Some(s) })
.collect(),
)
}
fn make_geometry(n: usize) -> GeometryValues {
let mut g = GeometryValues::default();
for _ in 0..n {
g.push_geom(&geo_types::Geometry::<i32>::Point(Point::new(0, 0)));
}
g
}
fn encode_layer(n: usize, props: Vec<StagedProperty>, cfg: ExplicitEncoder) -> Vec<u8> {
let mut codecs = Codecs::default();
StagedLayer {
name: "bench".into(),
extent: 4096,
id: StagedId::None,
geometry: make_geometry(n),
properties: props,
}
.encode_into(
Encoder::with_explicit(EncoderConfig::default(), cfg),
&mut codecs,
)
.expect("encode_layer failed")
.into_layer_bytes()
.expect("into_layer_bytes failed")
}
fn sum_str_lens(parsed: &ParsedLayer01<'_>) -> usize {
let mut total = 0;
let mut iter = parsed.iter_features();
while let Some(feat_res) = iter.next() {
total += feat_res
.unwrap()
.iter_all_properties()
.map(|v| {
if let Some(PropValueRef::Str(s)) = v {
s.len()
} else {
0
}
})
.sum::<usize>();
}
total
}
fn bench_plain_length_encoding(c: &mut Criterion) {
let mut group = c.benchmark_group("strings/plain/length_enc");
for n in BENCHMARKED_LENGTHS {
let col = make_strings(n);
group.throughput(Throughput::Elements(n as u64));
for logical in limit(LogicalEncoder::iter()) {
for physical in limit(PhysicalEncoder::iter()) {
let int_enc = IntEncoder::new(logical, physical);
let bytes = encode_layer(
n,
vec![StagedProperty::str("name", col.clone())],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(
BenchmarkId::new(format!("{logical:?}-{physical:?}"), n),
&bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
}
}
}
group.finish();
}
fn bench_fsst_length_encoding(c: &mut Criterion) {
let mut group = c.benchmark_group("strings/fsst/length_enc");
for n in BENCHMARKED_LENGTHS {
let col = make_strings(n);
group.throughput(Throughput::Elements(n as u64));
for logical in limit(LogicalEncoder::iter()) {
for physical in limit(PhysicalEncoder::iter()) {
let int_enc = IntEncoder::new(logical, physical);
let bytes = encode_layer(
n,
vec![StagedProperty::str("name", col.clone())],
ExplicitEncoder::all_with_str(int_enc, StrEncoding::Fsst),
);
group.bench_with_input(
BenchmarkId::new(format!("{logical:?}-{physical:?}"), n),
&bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
}
}
}
group.finish();
}
fn bench_encoding_type(c: &mut Criterion) {
let mut group = c.benchmark_group("strings/encoding_type");
let int_enc = IntEncoder::plain();
for n in BENCHMARKED_LENGTHS {
let col = make_strings(n);
group.throughput(Throughput::Elements(n as u64));
let plain_bytes = encode_layer(
n,
vec![StagedProperty::str("name", col.clone())],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(BenchmarkId::new("plain", n), &plain_bytes, |b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
});
let fsst_bytes = encode_layer(
n,
vec![StagedProperty::str("name", col)],
ExplicitEncoder::all_with_str(int_enc, StrEncoding::Fsst),
);
group.bench_with_input(BenchmarkId::new("fsst", n), &fsst_bytes, |b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
});
}
group.finish();
}
fn bench_presence(c: &mut Criterion) {
let mut group = c.benchmark_group("strings/presence");
let int_enc = IntEncoder::plain();
for n in BENCHMARKED_LENGTHS {
group.throughput(Throughput::Elements(n as u64));
let no_null_bytes = encode_layer(
n,
vec![StagedProperty::str("name", make_strings(n))],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(
BenchmarkId::new("no_nulls", n),
&no_null_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
let null_bytes = encode_layer(
n,
vec![StagedProperty::opt_str("name", make_nullable_strings(n))],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(
BenchmarkId::new("with_nulls", n),
&null_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
}
group.finish();
}
fn bench_vs_shared_dict(c: &mut Criterion) {
let mut group = c.benchmark_group("strings/vs_shared_dict");
let int_enc = IntEncoder::plain();
for n in BENCHMARKED_LENGTHS {
let total_entries = n * 2;
group.throughput(Throughput::Elements(total_entries as u64));
let col = make_strings(n);
let col_opt: Vec<Option<String>> = col.iter().map(|s| Some(s.clone())).collect();
let plain_x2_bytes = encode_layer(
n,
vec![
StagedProperty::str("col1", col.clone()),
StagedProperty::str("col2", col.clone()),
],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(
BenchmarkId::new("plain_x2", n),
&plain_x2_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
let col2: Vec<Option<String>> = col
.iter()
.enumerate()
.map(|(i, s)| if i % 3 == 0 { None } else { Some(s.clone()) })
.collect();
let make_sd = || {
StagedSharedDict::new(
"place:",
[
("type", col_opt.clone(), Presence::AllPresent),
("subtype", col2.clone(), Presence::Mixed),
],
)
.expect("StagedSharedDict::new failed")
};
let sd_plain_bytes = encode_layer(
n,
vec![StagedProperty::SharedDict(make_sd())],
ExplicitEncoder::all(int_enc),
);
group.bench_with_input(
BenchmarkId::new("shared_dict_plain", n),
&sd_plain_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
let sd_fsst_bytes = encode_layer(
n,
vec![StagedProperty::SharedDict(make_sd())],
ExplicitEncoder::all_with_str(int_enc, StrEncoding::Fsst),
);
group.bench_with_input(
BenchmarkId::new("shared_dict_fsst", n),
&sd_fsst_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
let fsst_x2_bytes = encode_layer(
n,
vec![
StagedProperty::str("col1", col.clone()),
StagedProperty::str("col2", col),
],
ExplicitEncoder::all_with_str(int_enc, StrEncoding::Fsst),
);
group.bench_with_input(
BenchmarkId::new("fsst_x2", n),
&fsst_x2_bytes,
|b, bytes| {
b.iter(|| {
let layer = parser()
.parse_layers(bytes)
.expect("parse")
.remove(0)
.into_layer01()
.expect("layer01");
let parsed = layer.decode_all(&mut dec()).expect("decode_all");
black_box(sum_str_lens(&parsed))
});
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_plain_length_encoding,
bench_fsst_length_encoding,
bench_encoding_type,
bench_presence,
bench_vs_shared_dict,
);
criterion_main!(benches);