use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::io::Cursor;
fn create_test_docx(paragraph_count: usize) -> Vec<u8> {
use std::io::Write;
use zip::write::SimpleFileOptions;
use zip::ZipWriter;
let mut buffer = Vec::new();
let mut zip = ZipWriter::new(Cursor::new(&mut buffer));
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
zip.start_file("[Content_Types].xml", options).unwrap();
zip.write_all(
br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
</Types>"#,
)
.unwrap();
zip.start_file("_rels/.rels", options).unwrap();
zip.write_all(
br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
</Relationships>"#,
)
.unwrap();
zip.start_file("word/_rels/document.xml.rels", options)
.unwrap();
zip.write_all(
br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
</Relationships>"#,
)
.unwrap();
let mut content = String::from(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>"#,
);
for i in 0..paragraph_count {
content.push_str(&format!(
r#"
<w:p>
<w:r>
<w:t>This is paragraph {} with some test content for benchmarking purposes.</w:t>
</w:r>
</w:p>"#,
i
));
}
content.push_str(
r#"
</w:body>
</w:document>"#,
);
zip.start_file("word/document.xml", options).unwrap();
zip.write_all(content.as_bytes()).unwrap();
zip.finish().unwrap();
buffer
}
fn bench_docx_parsing(c: &mut Criterion) {
let mut group = c.benchmark_group("docx_parsing");
for para_count in [10, 100, 500, 1000].iter() {
let data = create_test_docx(*para_count);
let size = data.len() as u64;
group.throughput(Throughput::Bytes(size));
group.bench_with_input(
BenchmarkId::new("paragraphs", para_count),
&data,
|b, data| {
b.iter(|| {
let _ = undoc::parse_bytes(black_box(data));
});
},
);
}
group.finish();
}
fn bench_markdown_rendering(c: &mut Criterion) {
let mut group = c.benchmark_group("markdown_rendering");
for para_count in [10, 100, 500].iter() {
let data = create_test_docx(*para_count);
let document = undoc::parse_bytes(&data).unwrap();
group.bench_with_input(
BenchmarkId::new("paragraphs", para_count),
&document,
|b, doc| {
b.iter(|| {
let options = undoc::render::RenderOptions::default();
let _ = undoc::render::to_markdown(black_box(doc), &options);
});
},
);
}
group.finish();
}
fn bench_text_extraction(c: &mut Criterion) {
let mut group = c.benchmark_group("text_extraction");
for para_count in [10, 100, 500, 1000].iter() {
let data = create_test_docx(*para_count);
let document = undoc::parse_bytes(&data).unwrap();
group.bench_with_input(
BenchmarkId::new("paragraphs", para_count),
&document,
|b, doc| {
b.iter(|| {
let _ = black_box(doc).plain_text();
});
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_docx_parsing,
bench_markdown_rendering,
bench_text_extraction,
);
criterion_main!(benches);