#![allow(clippy::unwrap_used)]
use vicinity::hnsw::HNSWIndex;
const DIM: usize = 64;
fn embed(text: &str, dim: usize) -> Vec<f32> {
let mut v = vec![0.0_f32; dim];
for word in text.split_whitespace() {
let h: u64 = word
.bytes()
.enumerate()
.map(|(i, b)| (i as u64 + 1) * b as u64)
.sum();
for (d, val) in v.iter_mut().enumerate() {
let angle = (h.wrapping_mul(d as u64 + 1)) as f32 * 0.01;
*val += angle.sin();
}
}
normalize(&v)
}
fn normalize(v: &[f32]) -> Vec<f32> {
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm < f32::EPSILON {
return v.to_vec();
}
v.iter().map(|x| x / norm).collect()
}
fn main() -> vicinity::Result<()> {
let documents = [
"Rust is a systems programming language focused on safety and performance",
"Python is popular for data science and machine learning workflows",
"JavaScript powers interactive web applications and server-side code",
"The Rust borrow checker prevents data races at compile time",
"Neural networks learn representations from large training datasets",
"PostgreSQL is a relational database with strong ACID guarantees",
"Transformers revolutionized natural language processing tasks",
"WebAssembly enables near-native performance in the browser",
"Gradient descent optimizes model parameters during training",
"Redis provides in-memory key-value storage for caching",
"HNSW indexes enable fast approximate nearest neighbor search",
"Kubernetes orchestrates containerized application deployments",
"Attention mechanisms let models focus on relevant input tokens",
"Rust async/await enables efficient concurrent IO without threads",
"Vector databases store embeddings for semantic retrieval",
];
let embeddings: Vec<Vec<f32>> = documents.iter().map(|doc| embed(doc, DIM)).collect();
let mut index = HNSWIndex::new(DIM, 16, 32)?;
for (i, emb) in embeddings.iter().enumerate() {
index.add_slice(i as u32, emb)?;
}
index.build()?;
println!("Indexed {} documents (dim={})\n", documents.len(), DIM);
let queries = [
"safe systems programming with compile-time checks",
"machine learning model training",
"fast similarity search over vectors",
];
for query in &queries {
let query_emb = embed(query, DIM);
let results = index.search(&query_emb, 5, 50)?;
println!("Query: \"{query}\"");
for (rank, (doc_id, dist)) in results.iter().enumerate() {
let sim = 1.0 - dist;
println!(
" {}. [{:.3}] {}",
rank + 1,
sim,
documents[*doc_id as usize]
);
}
println!();
}
Ok(())
}