manifoldb 0.1.4

A multi-paradigm embedded database for graph, vector, and relational data
Documentation
//! Vector search example for ManifoldDB.
//!
//! This example demonstrates:
//! - Storing vector embeddings as entity properties
//! - Computing similarity between vectors
//! - Organizing documents with embeddings
//!
//! Run with: `cargo run --example vector_search`

use manifoldb::{Database, Error, Value};

fn main() -> Result<(), Error> {
    let db = Database::in_memory()?;

    println!("ManifoldDB Vector Search Example");
    println!("=================================\n");

    // Create some documents with embeddings
    // In a real application, these would come from an embedding model
    let docs = {
        let mut tx = db.begin()?;

        // Simulated embeddings (normally generated by a model like OpenAI, sentence-transformers, etc.)
        let documents = vec![
            ("Introduction to Machine Learning", vec![0.9f32, 0.8, 0.7, 0.1], "technology"),
            ("Deep Learning Fundamentals", vec![0.85f32, 0.82, 0.75, 0.15], "technology"),
            ("Neural Networks in Practice", vec![0.88f32, 0.79, 0.72, 0.12], "technology"),
            ("Cooking Italian Pasta", vec![0.1f32, 0.2, 0.9, 0.85], "food"),
            ("French Cuisine Basics", vec![0.12f32, 0.18, 0.88, 0.82], "food"),
            ("Baking Sourdough Bread", vec![0.15f32, 0.22, 0.92, 0.78], "food"),
            ("History of Rome", vec![0.3f32, 0.1, 0.2, 0.95], "history"),
            ("Ancient Greece", vec![0.28f32, 0.12, 0.18, 0.92], "history"),
        ];

        let mut doc_ids = Vec::new();

        for (title, embedding, category) in documents {
            let doc = tx
                .create_entity()?
                .with_label("Document")
                .with_label(category)
                .with_property("title", title)
                .with_property("embedding", embedding);

            doc_ids.push((doc.id, title.to_string()));
            tx.put_entity(&doc)?;
        }

        tx.commit()?;
        println!("Created {} documents with embeddings", doc_ids.len());
        doc_ids
    };

    // Helper function to compute cosine similarity
    fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
        let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
        if norm_a == 0.0 || norm_b == 0.0 {
            0.0
        } else {
            dot_product / (norm_a * norm_b)
        }
    }

    // Helper function to compute Euclidean distance
    fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
        a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum::<f32>().sqrt()
    }

    // Search for documents similar to a query
    println!("\n--- Similarity Search ---\n");

    // Query: something about machine learning (high values in first two dimensions)
    let query_embedding = vec![0.87f32, 0.81, 0.73, 0.13];
    println!("Query: 'machine learning related topics'");
    println!("Query embedding: {:?}\n", query_embedding);

    // Compute similarity with all documents
    {
        let tx = db.begin_read()?;

        let mut results: Vec<(String, f32, f32)> = Vec::new();

        for (id, title) in &docs {
            if let Some(entity) = tx.get_entity(*id)? {
                if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
                    let cosine = cosine_similarity(&query_embedding, embedding);
                    let distance = euclidean_distance(&query_embedding, embedding);
                    results.push((title.clone(), cosine, distance));
                }
            }
        }

        // Sort by cosine similarity (descending)
        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

        println!("Results sorted by cosine similarity:");
        for (title, cosine, distance) in &results {
            println!("  {:.4} (dist: {:.4}) - {}", cosine, distance, title);
        }

        // Get top 3
        println!("\nTop 3 most similar documents:");
        for (i, (title, cosine, _)) in results.iter().take(3).enumerate() {
            println!("  {}. {} (similarity: {:.4})", i + 1, title, cosine);
        }
    }

    // Search for food-related documents
    println!("\n--- Category-based Vector Search ---\n");

    let food_query = vec![0.11f32, 0.19, 0.89, 0.83];
    println!("Query: 'cooking and food'");

    {
        let tx = db.begin_read()?;

        let mut results: Vec<(String, f32)> = Vec::new();

        for (id, title) in &docs {
            if let Some(entity) = tx.get_entity(*id)? {
                if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
                    let similarity = cosine_similarity(&food_query, embedding);
                    results.push((title.clone(), similarity));
                }
            }
        }

        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

        println!("Top 3 results for food query:");
        for (i, (title, similarity)) in results.iter().take(3).enumerate() {
            println!("  {}. {} ({:.4})", i + 1, title, similarity);
        }
    }

    // Find documents in the same category using embeddings
    println!("\n--- Find Similar Documents ---\n");

    {
        let tx = db.begin_read()?;

        // Get the embedding for "Introduction to Machine Learning"
        let reference_id = docs[0].0;
        let reference_title = &docs[0].1;

        if let Some(reference) = tx.get_entity(reference_id)? {
            if let Some(Value::Vector(reference_embedding)) = reference.get_property("embedding") {
                println!("Finding documents similar to: '{}'", reference_title);

                let mut similar: Vec<(String, f32)> = Vec::new();

                for (id, title) in &docs {
                    if *id == reference_id {
                        continue; // Skip the reference document
                    }

                    if let Some(entity) = tx.get_entity(*id)? {
                        if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
                            let similarity = cosine_similarity(reference_embedding, embedding);
                            similar.push((title.clone(), similarity));
                        }
                    }
                }

                similar.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

                println!("\nMost similar documents:");
                for (title, similarity) in similar.iter().take(3) {
                    println!("  {:.4} - {}", similarity, title);
                }
            }
        }
    }

    // Demonstrate filtering by label + similarity
    println!("\n--- Filtered Vector Search ---\n");
    {
        let tx = db.begin_read()?;

        let query = vec![0.3f32, 0.1, 0.2, 0.9];
        println!("Query embedding: {:?}", query);
        println!("Filter: only 'food' labeled documents\n");

        let mut results: Vec<(String, f32)> = Vec::new();

        for (id, title) in &docs {
            if let Some(entity) = tx.get_entity(*id)? {
                // Filter by label
                if entity.has_label("food") {
                    if let Some(Value::Vector(embedding)) = entity.get_property("embedding") {
                        let similarity = cosine_similarity(&query, embedding);
                        results.push((title.clone(), similarity));
                    }
                }
            }
        }

        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

        println!("Results (food documents only):");
        for (title, similarity) in &results {
            println!("  {:.4} - {}", similarity, title);
        }
    }

    println!("\nVector search example complete!");
    println!("\nNote: Full vector indexing (HNSW) is available in the query engine.");
    println!("Use SQL with ORDER BY embedding <-> $query for indexed searches.");

    Ok(())
}