use std::hint::black_box;
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
use laurus::analysis::analyzer::analyzer::Analyzer;
use laurus::analysis::analyzer::standard::StandardAnalyzer;
use laurus::spelling::corrector::SpellingCorrector;
use laurus::vector::core::distance::DistanceMetric;
use laurus::vector::core::vector::Vector;
use laurus::vector::index::config::HnswIndexConfig;
use laurus::vector::index::hnsw::writer::HnswIndexWriter;
use laurus::vector::writer::VectorIndexWriter;
fn generate_test_documents(count: usize) -> Vec<String> {
let words = vec![
"search",
"engine",
"full",
"text",
"index",
"query",
"document",
"field",
"term",
"phrase",
"boolean",
"vector",
"similarity",
"relevance",
"score",
"analysis",
"tokenization",
"stemming",
"normalization",
"clustering",
"machine",
"learning",
"algorithm",
"data",
"structure",
"performance",
"optimization",
"memory",
"storage",
"retrieval",
"ranking",
"filtering",
];
let mut documents = Vec::with_capacity(count);
for i in 0..count {
let doc_length = 50 + (i % 100); let mut doc_words = Vec::with_capacity(doc_length);
for j in 0..doc_length {
let word_idx = (i * 7 + j * 13) % words.len(); doc_words.push(words[word_idx]);
}
documents.push(doc_words.join(" "));
}
documents
}
fn generate_test_vectors(count: usize, dimension: usize) -> Vec<Vector> {
let mut vectors = Vec::with_capacity(count);
for i in 0..count {
let mut data = Vec::with_capacity(dimension);
for j in 0..dimension {
let value = ((i as f32 * 0.1 + j as f32 * 0.01).sin() * 0.5 + 0.5) * 2.0 - 1.0;
data.push(value);
}
vectors.push(Vector::new(data));
}
vectors
}
fn bench_text_analysis(c: &mut Criterion) {
let mut group = c.benchmark_group("text_analysis");
let analyzer = StandardAnalyzer::new().unwrap();
let texts = generate_test_documents(1000);
group.bench_function("analyze_single_document", |b| {
b.iter(|| {
let result = analyzer.analyze(black_box(&texts[0]));
black_box(result)
})
});
group.throughput(Throughput::Elements(100));
group.bench_function("analyze_batch_documents", |b| {
b.iter(|| {
for text in texts.iter().take(100) {
let result = analyzer.analyze(black_box(text));
let _ = black_box(result);
}
})
});
group.finish();
}
fn bench_vector_search(c: &mut Criterion) {
let mut group = c.benchmark_group("vector_search");
group.sample_size(20);
let dimension = 128;
let vectors = generate_test_vectors(1000, dimension);
group.throughput(Throughput::Elements(100));
group.bench_function("hnsw_index_construction", |b| {
b.iter_with_setup(
|| {
let index_config = HnswIndexConfig {
dimension,
distance_metric: DistanceMetric::Cosine,
m: 16,
ef_construction: 200,
..Default::default()
};
let writer_config = laurus::vector::writer::VectorIndexWriterConfig::default();
HnswIndexWriter::new(index_config, writer_config, "bench_vectors").unwrap()
},
|mut builder| {
let field_name = "default".to_string();
let indexed_vectors: Vec<(u64, String, Vector)> = vectors
.iter()
.take(100)
.enumerate()
.map(|(i, v)| (i as u64, field_name.clone(), v.clone()))
.collect();
let _ = builder.build(indexed_vectors);
black_box(builder);
},
)
});
group.bench_function("vector_operations_basic", |b| {
let query_vector = vectors[0].clone();
b.iter(|| {
let mut results = Vec::with_capacity(50);
for (i, vector) in vectors.iter().take(50).enumerate() {
let distance = DistanceMetric::Cosine
.distance(black_box(&query_vector.data), black_box(&vector.data))
.unwrap();
results.push((i as u64, distance));
}
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
black_box(results)
})
});
group.bench_function("cosine_distance_batch", |b| {
let query = &vectors[0];
let targets = &vectors[1..101];
b.iter(|| {
for target in targets {
let distance = DistanceMetric::Cosine
.distance(black_box(&query.data), black_box(&target.data))
.unwrap();
black_box(distance);
}
})
});
group.throughput(Throughput::Elements(100));
group.bench_function("vector_normalization", |b| {
b.iter_with_setup(
|| vectors[0..100].to_vec(),
|mut test_vectors| {
for vector in &mut test_vectors {
vector.normalize();
}
black_box(test_vectors);
},
)
});
group.finish();
}
fn bench_spell_correction(c: &mut Criterion) {
let mut group = c.benchmark_group("spell_correction");
group.sample_size(20);
let mut corrector = SpellingCorrector::new();
let misspellings = vec!["searc", "engin", "documnet", "qurey", "algortihm"];
group.bench_function("correct_single_word", |b| {
b.iter(|| {
let result = corrector.correct(black_box("searc"));
black_box(result)
})
});
group.throughput(Throughput::Elements(misspellings.len() as u64));
group.bench_function("correct_batch_words", |b| {
b.iter(|| {
for word in &misspellings {
let result = corrector.correct(black_box(word));
black_box(result);
}
})
});
group.finish();
}
fn bench_parallel_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_operations");
let vectors = generate_test_vectors(1000, 128);
let query_vector = &vectors[0];
group.throughput(Throughput::Elements(500));
group.bench_function("parallel_distance_calculation", |b| {
b.iter(|| {
use rayon::prelude::*;
let distances: Vec<_> = vectors[1..501]
.par_iter()
.map(|v| {
DistanceMetric::Cosine
.distance(&query_vector.data, &v.data)
.unwrap()
})
.collect();
black_box(distances);
})
});
group.bench_function("sequential_distance_calculation", |b| {
b.iter(|| {
let distances: Vec<_> = vectors[1..501]
.iter()
.map(|v| {
DistanceMetric::Cosine
.distance(&query_vector.data, &v.data)
.unwrap()
})
.collect();
black_box(distances);
})
});
group.throughput(Throughput::Elements(500));
group.bench_function("parallel_vector_normalization", |b| {
b.iter_with_setup(
|| vectors[0..500].to_vec(),
|mut test_vectors| {
use rayon::prelude::*;
test_vectors.par_iter_mut().for_each(|v| v.normalize());
black_box(test_vectors);
},
)
});
group.finish();
}
fn bench_memory_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_operations");
group.throughput(Throughput::Elements(1000));
group.bench_function("vector_allocation", |b| {
b.iter(|| {
let mut vectors = Vec::new();
for i in 0..1000 {
let data: Vec<f32> = (0..128).map(|j| (i + j) as f32).collect();
vectors.push(Vector::new(data));
}
black_box(vectors);
})
});
group.finish();
}
fn bench_scalability(c: &mut Criterion) {
let mut group = c.benchmark_group("scalability");
group.sample_size(10);
for size in [100, 200].iter() {
group.bench_with_input(
format!("vector_index_{size}_vectors"),
size,
|b, &vector_count| {
let vectors = generate_test_vectors(vector_count, 128);
b.iter_with_setup(
|| {
let index_config = HnswIndexConfig {
dimension: 128,
distance_metric: DistanceMetric::Cosine,
m: 16,
ef_construction: 200,
..Default::default()
};
let writer_config =
laurus::vector::writer::VectorIndexWriterConfig::default();
HnswIndexWriter::new(
index_config,
writer_config,
"bench_scalability_vectors",
)
.unwrap()
},
|mut builder| {
let field_name = "default".to_string();
let indexed_vectors: Vec<(u64, String, Vector)> = vectors
.iter()
.enumerate()
.map(|(i, v)| (i as u64, field_name.clone(), v.clone()))
.collect();
let _ = builder.build(indexed_vectors);
black_box(builder);
},
)
},
);
}
group.finish();
}
fn bench_synonym_dictionary(c: &mut Criterion) {
let mut group = c.benchmark_group("synonym_dictionary");
let small_dict = create_test_dictionary(100);
let medium_dict = create_test_dictionary(1000);
let large_dict = create_test_dictionary(10000);
group.bench_function("lookup_small_100", |b| {
b.iter(|| {
let result = small_dict.get_synonyms(black_box("term_50"));
black_box(result)
})
});
group.bench_function("lookup_medium_1k", |b| {
b.iter(|| {
let result = medium_dict.get_synonyms(black_box("term_500"));
black_box(result)
})
});
group.bench_function("lookup_large_10k", |b| {
b.iter(|| {
let result = large_dict.get_synonyms(black_box("term_5000"));
black_box(result)
})
});
group.throughput(Throughput::Elements(100));
group.bench_function("batch_lookup_100", |b| {
b.iter(|| {
for i in 0..100 {
let term = format!("term_{}", i);
let result = large_dict.get_synonyms(black_box(&term));
black_box(result);
}
})
});
group.bench_function("build_dict_1k", |b| {
b.iter(|| {
let dict = create_test_dictionary(1000);
black_box(dict)
})
});
group.finish();
}
fn create_test_dictionary(
num_groups: usize,
) -> laurus::analysis::synonym::dictionary::SynonymDictionary {
use laurus::analysis::synonym::dictionary::SynonymDictionary;
let mut groups = Vec::new();
for i in 0..num_groups {
groups.push(vec![
format!("term_{}", i),
format!("synonym_a_{}", i),
format!("synonym_b_{}", i),
]);
}
let mut dict = SynonymDictionary::new(None).unwrap();
for group in groups {
dict.add_synonym_group(group);
}
dict
}
criterion_group!(
benches,
bench_text_analysis,
bench_vector_search,
bench_parallel_operations,
bench_memory_operations,
bench_synonym_dictionary
);
criterion_group!(slow_benches, bench_spell_correction, bench_scalability);
criterion_main!(benches, slow_benches);