#![allow(clippy::too_many_lines)]
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
#[cfg(target_os = "linux")]
use std::collections::BTreeSet;
#[cfg(target_os = "linux")]
use std::sync::OnceLock;
use std::time::{Duration, Instant};
static TUGBOAT_BIB: &str = include_str!("../tests/fixtures/tugboat.bib");
#[cfg(target_os = "linux")]
fn pin_benchmark_thread() {
set_thread_affinity(benchmark_cpu());
}
#[cfg(not(target_os = "linux"))]
fn pin_benchmark_thread() {}
#[cfg(target_os = "linux")]
fn benchmark_cpu() -> usize {
static BENCHMARK_CPU: OnceLock<usize> = OnceLock::new();
*BENCHMARK_CPU.get_or_init(detect_benchmark_cpu)
}
#[cfg(target_os = "linux")]
fn detect_benchmark_cpu() -> usize {
use bibtex_parser::Library;
if let Ok(cpu) = std::env::var("BIBTEX_BENCH_CPU") {
if let Ok(cpu) = cpu.parse::<usize>() {
return cpu;
}
}
let candidate_cpus = collect_candidate_cpus();
let Some(&first_cpu) = candidate_cpus.first() else {
return 0;
};
let mut selected_cpu = first_cpu;
let mut selected_elapsed = Duration::MAX;
for cpu in candidate_cpus {
set_thread_affinity(cpu);
let mut shortest_probe = Duration::MAX;
for _ in 0..3 {
let start = Instant::now();
let library = Library::parser().parse(black_box(TUGBOAT_BIB)).unwrap();
black_box(&library);
shortest_probe = shortest_probe.min(start.elapsed());
}
if shortest_probe < selected_elapsed {
selected_elapsed = shortest_probe;
selected_cpu = cpu;
}
}
selected_cpu
}
#[cfg(target_os = "linux")]
fn collect_candidate_cpus() -> Vec<usize> {
let mut candidates = Vec::new();
let mut seen_siblings = BTreeSet::new();
if let Ok(entries) = std::fs::read_dir("/sys/devices/system/cpu") {
for entry in entries.flatten() {
let file_name = entry.file_name();
let name = file_name.to_string_lossy();
let Some(cpu_id) = name
.strip_prefix("cpu")
.and_then(|id| id.parse::<usize>().ok())
else {
continue;
};
let siblings_path = entry.path().join("topology/thread_siblings_list");
let Ok(siblings) = std::fs::read_to_string(siblings_path) else {
continue;
};
let siblings = siblings.trim().to_string();
if !seen_siblings.insert(siblings) {
continue;
}
candidates.push(cpu_id);
}
}
candidates.sort_unstable();
candidates
}
#[cfg(target_os = "linux")]
fn set_thread_affinity(cpu: usize) {
unsafe {
let mut cpu_set: libc::cpu_set_t = std::mem::zeroed();
libc::CPU_ZERO(&mut cpu_set);
libc::CPU_SET(cpu, &mut cpu_set);
let _ = libc::sched_setaffinity(0, std::mem::size_of::<libc::cpu_set_t>(), &cpu_set);
}
}
fn stabilize_system() {
use bibtex_parser::Library;
pin_benchmark_thread();
let deadline = Instant::now() + Duration::from_secs(3);
while Instant::now() < deadline {
let library = Library::parser().parse(black_box(TUGBOAT_BIB)).unwrap();
black_box(&library);
}
}
fn bench_parser_comparison(c: &mut Criterion) {
use bibtex_parser::Library;
let mut group = c.benchmark_group("throughput");
group.measurement_time(Duration::from_secs(20));
group.warm_up_time(Duration::from_secs(12));
group.sample_size(200);
group.significance_level(0.01);
group.confidence_level(0.99);
group.noise_threshold(0.02);
let input_bytes = TUGBOAT_BIB.len() as u64;
group.throughput(Throughput::Bytes(input_bytes));
stabilize_system();
for _ in 0..50 {
let _ = Library::parser().parse(TUGBOAT_BIB);
std::hint::black_box(());
}
group.bench_function("bibtex-parser", |b| {
b.iter(|| {
let library = Library::parser().parse(black_box(TUGBOAT_BIB)).unwrap();
black_box(&library);
assert!(!library.entries().is_empty());
});
});
group.bench_function("bibtex-parser-tolerant", |b| {
b.iter(|| {
let library = Library::parser()
.tolerant()
.parse(black_box(TUGBOAT_BIB))
.unwrap();
black_box(&library);
assert!(!library.entries().is_empty());
});
});
group.bench_function("bibtex-parser-source-preserving", |b| {
let parser = Library::parser().preserve_raw();
b.iter(|| {
let document = parser.parse_document(black_box(TUGBOAT_BIB)).unwrap();
black_box(&document);
assert!(!document.entries().is_empty());
});
});
group.bench_function("bibtex-parser-streaming", |b| {
use bibtex_parser::{ParseEvent, ParseFlow};
let parser = Library::parser();
b.iter(|| {
let mut entries = 0usize;
let summary = parser
.parse_events(black_box(TUGBOAT_BIB), |event| {
if matches!(event, ParseEvent::Entry(_)) {
entries += 1;
}
Ok(ParseFlow::Continue)
})
.unwrap();
black_box(entries);
black_box(summary);
assert!(entries > 0);
});
});
bench_serde_bibtex_ignore(&mut group);
bench_serde_bibtex_borrow(&mut group);
bench_serde_bibtex_struct(&mut group);
bench_serde_bibtex_copy(&mut group);
bench_nom_bibtex(&mut group);
bench_biblatex(&mut group);
group.finish();
}
fn bench_writing(c: &mut Criterion) {
use bibtex_parser::{document_to_string, to_string, Library};
let library = Library::parser().parse(TUGBOAT_BIB).unwrap();
let document = Library::parser()
.preserve_raw()
.parse_document(TUGBOAT_BIB)
.unwrap();
let raw_document_output = document_to_string(&document).unwrap();
let raw_document_reparsed = Library::parser()
.preserve_raw()
.parse_document(&raw_document_output)
.unwrap();
assert_eq!(
raw_document_reparsed.entries().len(),
document.entries().len()
);
assert_eq!(
raw_document_reparsed.comments().len(),
document.comments().len()
);
assert_eq!(
raw_document_reparsed.preambles().len(),
document.preambles().len()
);
assert_eq!(
raw_document_reparsed.strings().len(),
document.strings().len()
);
let mut group = c.benchmark_group("writing");
group.measurement_time(Duration::from_secs(12));
group.warm_up_time(Duration::from_secs(5));
group.sample_size(120);
group.throughput(Throughput::Bytes(TUGBOAT_BIB.len() as u64));
stabilize_system();
group.bench_function("library_to_string", |b| {
b.iter(|| {
let output = to_string(black_box(&library)).unwrap();
black_box(&output);
assert!(!output.is_empty());
});
});
group.bench_function("raw_document_to_string", |b| {
b.iter(|| {
let output = document_to_string(black_box(&document)).unwrap();
black_box(&output);
assert_eq!(output.len(), raw_document_output.len());
});
});
group.finish();
}
fn bench_serde_bibtex_ignore(
group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
) {
use serde::de::IgnoredAny;
use serde::Deserialize;
use serde_bibtex::de::Deserializer;
for _ in 0..10 {
let _ = IgnoredAny::deserialize(&mut Deserializer::from_str(TUGBOAT_BIB));
}
group.bench_function("serde_bibtex-ignore", |b| {
b.iter(|| {
let result =
IgnoredAny::deserialize(&mut Deserializer::from_str(black_box(TUGBOAT_BIB)));
black_box(&result);
});
});
}
fn bench_serde_bibtex_borrow(
group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
) {
use serde::Deserialize;
use serde_bibtex::de::Deserializer;
use serde_bibtex::entry::BorrowEntry;
type RawBibliography<'r> = Vec<BorrowEntry<'r>>;
for _ in 0..10 {
let _ = RawBibliography::deserialize(&mut Deserializer::from_str(TUGBOAT_BIB));
}
group.bench_function("serde_bibtex-borrow", |b| {
b.iter(|| {
let result: Result<RawBibliography, _> =
RawBibliography::deserialize(&mut Deserializer::from_str(black_box(TUGBOAT_BIB)));
match result {
Ok(entries) => {
black_box(&entries);
assert!(!entries.is_empty());
}
Err(e) => panic!("serde_bibtex-borrow parsing failed: {}", e),
}
});
});
}
fn bench_serde_bibtex_struct(
group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
) {
use serde::Deserialize;
use serde_bibtex::de::Deserializer;
use std::borrow::Cow;
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct Fields<'r> {
#[serde(borrow)]
author: Option<Cow<'r, str>>,
#[serde(borrow)]
title: Option<Cow<'r, str>>,
#[serde(borrow)]
year: Option<Cow<'r, str>>,
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct TugboatEntry<'r> {
entry_key: &'r str,
#[serde(borrow)]
fields: Fields<'r>,
}
for _ in 0..10 {
let de_iter = Deserializer::from_str(TUGBOAT_BIB).into_iter_regular_entry();
let _: Vec<Result<TugboatEntry, _>> = de_iter.collect();
}
group.bench_function("serde_bibtex-struct", |b| {
b.iter(|| {
let de_iter = Deserializer::from_str(black_box(TUGBOAT_BIB)).into_iter_regular_entry();
let result: Vec<Result<TugboatEntry, _>> = de_iter.collect();
black_box(&result);
});
});
}
fn bench_serde_bibtex_copy(
group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
) {
use serde::Deserialize;
use serde_bibtex::de::Deserializer;
use serde_bibtex::entry::Entry;
use serde_bibtex::MacroDictionary;
type OwnedBibliography = Vec<Entry>;
for _ in 0..10 {
let mut macros = MacroDictionary::default();
macros.set_month_macros();
let _ = OwnedBibliography::deserialize(&mut Deserializer::from_str_with_macros(
TUGBOAT_BIB,
macros,
));
}
group.bench_function("serde_bibtex-copy", |b| {
b.iter(|| {
let mut macros = MacroDictionary::default();
macros.set_month_macros();
let result = OwnedBibliography::deserialize(&mut Deserializer::from_str_with_macros(
black_box(TUGBOAT_BIB),
macros,
));
match result {
Ok(entries) => {
black_box(&entries);
assert!(!entries.is_empty());
}
Err(e) => panic!("serde_bibtex-copy parsing failed: {}", e),
}
});
});
}
fn bench_nom_bibtex(group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>) {
use nom_bibtex::Bibtex;
for _ in 0..10 {
let _ = Bibtex::parse(TUGBOAT_BIB);
}
group.bench_function("nom-bibtex", |b| {
b.iter(|| {
let result = Bibtex::parse(black_box(TUGBOAT_BIB));
match result {
Ok(bib) => {
black_box(&bib);
assert!(!bib.bibliographies().is_empty());
}
Err(e) => panic!("nom-bibtex parsing failed: {:?}", e),
}
});
});
}
fn bench_biblatex(group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>) {
use biblatex::RawBibliography;
for _ in 0..10 {
let _ = RawBibliography::parse(TUGBOAT_BIB);
}
group.bench_function("biblatex", |b| {
b.iter(|| {
let result = RawBibliography::parse(black_box(TUGBOAT_BIB));
match result {
Ok(bib) => {
black_box(&bib);
assert!(!bib.entries.is_empty());
}
Err(e) => panic!("biblatex parsing failed: {:?}", e),
}
});
});
}
fn bench_critical_operations(c: &mut Criterion) {
use bibtex_parser::Library;
let mut group = c.benchmark_group("operations");
group.measurement_time(Duration::from_secs(15));
group.warm_up_time(Duration::from_secs(6));
group.sample_size(150);
let library = Library::parser().parse(TUGBOAT_BIB).unwrap();
stabilize_system();
group.bench_function("entry_iteration", |b| {
b.iter(|| {
let mut count = 0;
for entry in library.entries() {
if !entry.key().is_empty() {
count += 1;
}
}
black_box(count);
});
});
group.bench_function("field_access", |b| {
b.iter(|| {
let mut total_len = 0;
for entry in library.entries().iter().take(1000) {
if let Some(author) = entry.get("author") {
total_len += author.len();
}
if let Some(title) = entry.get("title") {
total_len += title.len();
}
}
black_box(total_len);
});
});
group.bench_function("type_filtering", |b| {
use bibtex_parser::EntryType;
let entries: Vec<_> = library.entries().iter().collect();
b.iter(|| {
let mut articles = 0;
for entry in &entries {
if matches!(entry.entry_type(), EntryType::Article) {
articles += 1;
}
}
black_box(articles);
});
});
group.finish();
}
fn bench_memory_efficiency(c: &mut Criterion) {
use bibtex_parser::Library;
let mut group = c.benchmark_group("memory");
group.measurement_time(Duration::from_secs(10));
group.warm_up_time(Duration::from_secs(4));
stabilize_system();
let sizes = [
("100_entries", extract_entries(TUGBOAT_BIB, 100)),
("500_entries", extract_entries(TUGBOAT_BIB, 500)),
("1000_entries", extract_entries(TUGBOAT_BIB, 1000)),
];
for (name, input) in &sizes {
let input_bytes = input.len() as u64;
group.throughput(Throughput::Bytes(input_bytes));
group.bench_with_input(
BenchmarkId::new("parse", name),
input.as_str(),
|b, input| {
b.iter(|| {
let library = Library::parser().parse(black_box(input)).unwrap();
assert!(!library.entries().is_empty());
black_box(&library);
});
},
);
}
group.finish();
}
fn bench_corpus_parsing(c: &mut Criterion) {
use bibtex_parser::{CorpusSource, Library};
let source_texts = [
extract_entries(TUGBOAT_BIB, 250),
extract_entries(TUGBOAT_BIB, 500),
extract_entries(TUGBOAT_BIB, 750),
extract_entries(TUGBOAT_BIB, 1000),
];
let source_names = ["a.bib", "b.bib", "c.bib", "d.bib"];
let sources = source_names
.iter()
.zip(source_texts.iter())
.map(|(name, input)| CorpusSource::new(name, input))
.collect::<Vec<_>>();
let total_bytes = source_texts.iter().map(String::len).sum::<usize>() as u64;
let mut group = c.benchmark_group("corpus");
group.measurement_time(Duration::from_secs(15));
group.warm_up_time(Duration::from_secs(6));
group.sample_size(120);
group.throughput(Throughput::Bytes(total_bytes));
stabilize_system();
let parser = Library::parser();
group.bench_function("parse_sources", |b| {
b.iter(|| {
let corpus = parser.parse_sources(black_box(&sources)).unwrap();
black_box(&corpus);
assert!(!corpus.documents().is_empty());
});
});
group.finish();
}
fn extract_entries(input: &str, max_entries: usize) -> String {
let mut result = String::with_capacity(input.len() / 10);
let mut entry_count = 0;
let mut depth = 0;
let mut in_entry = false;
for line in input.lines() {
let trimmed = line.trim_start();
if !in_entry && trimmed.starts_with('@') {
let entry_type = trimmed
.split_once(|c: char| c == '{' || c.is_whitespace())
.map(|(t, _)| t.to_lowercase())
.unwrap_or_default();
if entry_type == "@comment" || entry_type == "@preamble" || entry_type == "@string" {
result.push_str(line);
result.push('\n');
continue;
}
if entry_count >= max_entries {
break;
}
in_entry = true;
}
if in_entry {
result.push_str(line);
result.push('\n');
for ch in line.chars() {
match ch {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
in_entry = false;
entry_count += 1;
break;
}
}
_ => {}
}
}
} else if !in_entry {
result.push_str(line);
result.push('\n');
}
}
result
}
criterion_group! {
name = benches;
config = Criterion::default()
.warm_up_time(Duration::from_secs(10))
.measurement_time(Duration::from_secs(20))
.sample_size(100)
.significance_level(0.02)
.confidence_level(0.98)
.noise_threshold(0.03);
targets = bench_parser_comparison, bench_critical_operations, bench_memory_efficiency, bench_corpus_parsing, bench_writing
}
criterion_main!(benches);