use manifoldb::{Database, Error, Value};
fn main() -> Result<(), Error> {
let db = Database::in_memory()?;
println!("ManifoldDB Vector Search Example");
println!("=================================\n");
let docs = {
let mut tx = db.begin()?;
let documents = vec![
("Introduction to Machine Learning", vec![0.9f32, 0.8, 0.7, 0.1], "technology"),
("Deep Learning Fundamentals", vec![0.85f32, 0.82, 0.75, 0.15], "technology"),
("Neural Networks in Practice", vec![0.88f32, 0.79, 0.72, 0.12], "technology"),
("Cooking Italian Pasta", vec![0.1f32, 0.2, 0.9, 0.85], "food"),
("French Cuisine Basics", vec![0.12f32, 0.18, 0.88, 0.82], "food"),
("Baking Sourdough Bread", vec![0.15f32, 0.22, 0.92, 0.78], "food"),
("History of Rome", vec![0.3f32, 0.1, 0.2, 0.95], "history"),
("Ancient Greece", vec![0.28f32, 0.12, 0.18, 0.92], "history"),
];
let mut doc_ids = Vec::new();
for (title, embedding, category) in documents {
let doc = tx
.create_entity()?
.with_label("Document")
.with_label(category)
.with_property("title", title)
.with_property("embedding", embedding);
doc_ids.push((doc.id, title.to_string()));
tx.put_entity(&doc)?;
}
tx.commit()?;
println!("Created {} documents with embeddings", doc_ids.len());
doc_ids
};
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
0.0
} else {
dot_product / (norm_a * norm_b)
}
}
fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum::<f32>().sqrt()
}
println!("\n--- Similarity Search ---\n");
let query_embedding = vec![0.87f32, 0.81, 0.73, 0.13];
println!("Query: 'machine learning related topics'");
println!("Query embedding: {:?}\n", query_embedding);
{
let tx = db.begin_read()?;
let mut results: Vec<(String, f32, f32)> = Vec::new();
for (id, title) in &docs {
if let Some(entity) = tx.get_entity(*id)? {
if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
let cosine = cosine_similarity(&query_embedding, embedding);
let distance = euclidean_distance(&query_embedding, embedding);
results.push((title.clone(), cosine, distance));
}
}
}
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
println!("Results sorted by cosine similarity:");
for (title, cosine, distance) in &results {
println!(" {:.4} (dist: {:.4}) - {}", cosine, distance, title);
}
println!("\nTop 3 most similar documents:");
for (i, (title, cosine, _)) in results.iter().take(3).enumerate() {
println!(" {}. {} (similarity: {:.4})", i + 1, title, cosine);
}
}
println!("\n--- Category-based Vector Search ---\n");
let food_query = vec![0.11f32, 0.19, 0.89, 0.83];
println!("Query: 'cooking and food'");
{
let tx = db.begin_read()?;
let mut results: Vec<(String, f32)> = Vec::new();
for (id, title) in &docs {
if let Some(entity) = tx.get_entity(*id)? {
if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
let similarity = cosine_similarity(&food_query, embedding);
results.push((title.clone(), similarity));
}
}
}
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
println!("Top 3 results for food query:");
for (i, (title, similarity)) in results.iter().take(3).enumerate() {
println!(" {}. {} ({:.4})", i + 1, title, similarity);
}
}
println!("\n--- Find Similar Documents ---\n");
{
let tx = db.begin_read()?;
let reference_id = docs[0].0;
let reference_title = &docs[0].1;
if let Some(reference) = tx.get_entity(reference_id)? {
if let Some(Value::Vector(reference_embedding)) = reference.get_property("embedding") {
println!("Finding documents similar to: '{}'", reference_title);
let mut similar: Vec<(String, f32)> = Vec::new();
for (id, title) in &docs {
if *id == reference_id {
continue; }
if let Some(entity) = tx.get_entity(*id)? {
if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
let similarity = cosine_similarity(reference_embedding, embedding);
similar.push((title.clone(), similarity));
}
}
}
similar.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
println!("\nMost similar documents:");
for (title, similarity) in similar.iter().take(3) {
println!(" {:.4} - {}", similarity, title);
}
}
}
}
println!("\n--- Filtered Vector Search ---\n");
{
let tx = db.begin_read()?;
let query = vec![0.3f32, 0.1, 0.2, 0.9];
println!("Query embedding: {:?}", query);
println!("Filter: only 'food' labeled documents\n");
let mut results: Vec<(String, f32)> = Vec::new();
for (id, title) in &docs {
if let Some(entity) = tx.get_entity(*id)? {
if entity.has_label("food") {
if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
let similarity = cosine_similarity(&query, embedding);
results.push((title.clone(), similarity));
}
}
}
}
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
println!("Results (food documents only):");
for (title, similarity) in &results {
println!(" {:.4} - {}", similarity, title);
}
}
println!("\nVector search example complete!");
println!("\nNote: Full vector indexing (HNSW) is available in the query engine.");
println!("Use SQL with ORDER BY embedding <-> $query for indexed searches.");
Ok(())
}