diff_core 0.1.3

Semantic PDF comparison engine for matching document blocks and reporting meaningful changes.
Documentation
use criterion::{Criterion, criterion_group, criterion_main};
use diff_core::{DiffConfig, diff_semantic_documents};
use pdf_semantic::{SemanticAnchor, SemanticDocument, SemanticNode, SemanticNodeKind};
use spdfdiff_types::Provenance;

fn fifty_page_semantic_document(fingerprint: &str, changed: bool) -> SemanticDocument {
    let mut nodes = Vec::new();
    for page_index in 0..50 {
        for paragraph_index in 0..4 {
            let text = if changed && page_index == 25 && paragraph_index == 2 {
                "Benchmark paragraph with revised content".to_owned()
            } else {
                format!("Benchmark paragraph page {page_index} block {paragraph_index}")
            };
            nodes.push(SemanticNode {
                id: format!("n{page_index:02}_{paragraph_index:02}"),
                kind: SemanticNodeKind::Paragraph,
                page_index,
                bbox: None,
                normalized_text: Some(text),
                table: None,
                anchor: SemanticAnchor::unknown(),
                source: vec![Provenance::unknown()],
                confidence: 1.0,
            });
        }
    }
    SemanticDocument {
        fingerprint: fingerprint.to_owned(),
        nodes,
        diagnostics: Vec::new(),
        tagged_structure: None,
    }
}

fn bench_fifty_page_diff(c: &mut Criterion) {
    let old = fifty_page_semantic_document("old", false);
    let new = fifty_page_semantic_document("new", true);
    c.bench_function("diff_50_page_semantic_documents", |b| {
        b.iter(|| diff_semantic_documents(&old, &new, DiffConfig::default()));
    });
}

criterion_group!(benches, bench_fifty_page_diff);
criterion_main!(benches);