#![cfg_attr(not(feature = "embeddings"), allow(dead_code, unused_imports))]
#[cfg(feature = "embeddings")]
use vecstore::{
embeddings::{EmbeddingCollection, SimpleEmbedder, TextEmbedder},
Metadata, VecDatabase,
};
#[cfg(feature = "embeddings")]
use std::collections::HashMap;
#[cfg(feature = "embeddings")]
fn main() -> anyhow::Result<()> {
println!("=== VecStore Embedding Integration Demo ===\n");
println!("1. SimpleEmbedder - No ONNX Required\n");
let simple_embedder = SimpleEmbedder::new(128);
let text = "Rust is a systems programming language";
let embedding = simple_embedder.embed(text)?;
println!(" Text: \"{}\"", text);
println!(" Embedding dimension: {}", embedding.len());
println!(" First 5 values: {:?}\n", &embedding[0..5]);
println!("2. TextEmbedder Trait Abstraction\n");
let embedder: Box<dyn TextEmbedder> = Box::new(SimpleEmbedder::new(128));
let texts = vec!["First text", "Second text", "Third text"];
let batch_embeddings = embedder.embed_batch(&texts)?;
println!(" Batch embedded {} texts", batch_embeddings.len());
println!(
" Each embedding has {} dimensions\n",
embedder.dimension()?
);
println!("3. EmbeddingCollection - Seamless Text-to-Vector\n");
let mut db = VecDatabase::open("./demo_embedding")?;
let collection = db.create_collection("documents")?;
let embedder = SimpleEmbedder::new(128);
let mut emb_collection = EmbeddingCollection::new(collection, Box::new(embedder));
println!(" Created EmbeddingCollection with SimpleEmbedder\n");
println!("4. Inserting Text Documents (Auto-Embedding)\n");
let docs = vec![
(
"rust_doc",
"Rust is a fast and memory-safe systems programming language",
"programming",
"rust",
),
(
"python_doc",
"Python is a high-level interpreted programming language",
"programming",
"python",
),
(
"javascript_doc",
"JavaScript is the programming language of the web",
"programming",
"javascript",
),
(
"database_doc",
"Databases store and organize data efficiently",
"data",
"database",
),
(
"ml_doc",
"Machine learning enables computers to learn from data",
"ai",
"machine-learning",
),
];
for (id, text, category, tag) in &docs {
let mut meta = Metadata {
fields: HashMap::new(),
};
meta.fields.insert("text".into(), serde_json::json!(text));
meta.fields
.insert("category".into(), serde_json::json!(category));
meta.fields.insert("tag".into(), serde_json::json!(tag));
emb_collection.upsert_text(*id, text, meta)?;
println!(" ✓ Inserted: {} ({})", id, category);
}
println!("\n Total documents: {}\n", emb_collection.count()?);
println!("5. Querying with Text (Auto-Embedding)\n");
let queries = vec![
"programming language",
"data storage",
"artificial intelligence",
];
for query in &queries {
println!(" Query: \"{}\"", query);
let results = emb_collection.query_text(query, 3, None)?;
for (i, result) in results.iter().enumerate() {
let text = result
.metadata
.fields
.get("text")
.and_then(|v| v.as_str())
.unwrap_or("N/A");
println!(" {}. {} (score: {:.4})", i + 1, result.id, result.score);
println!(" \"{}\"", &text.chars().take(50).collect::<String>());
}
println!();
}
println!("6. Batch Text Insert (Efficient Bulk Loading)\n");
let mut new_db = VecDatabase::open("./demo_embedding_batch")?;
let new_collection = new_db.create_collection("batch_docs")?;
let new_embedder = SimpleEmbedder::new(128);
let mut batch_collection = EmbeddingCollection::new(new_collection, Box::new(new_embedder));
let batch_docs = vec![
("doc1".to_string(), "First batch document".to_string(), {
let mut m = Metadata {
fields: HashMap::new(),
};
m.fields.insert("batch".into(), serde_json::json!(1));
m
}),
("doc2".to_string(), "Second batch document".to_string(), {
let mut m = Metadata {
fields: HashMap::new(),
};
m.fields.insert("batch".into(), serde_json::json!(1));
m
}),
("doc3".to_string(), "Third batch document".to_string(), {
let mut m = Metadata {
fields: HashMap::new(),
};
m.fields.insert("batch".into(), serde_json::json!(1));
m
}),
];
batch_collection.batch_upsert_text(batch_docs)?;
println!(
" ✓ Batch inserted {} documents",
batch_collection.count()?
);
println!(" (More efficient than individual upserts)\n");
println!("7. Custom Embedder Implementation\n");
struct FixedEmbedder;
impl TextEmbedder for FixedEmbedder {
fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
let len = text.len() as f32;
let mut vec = vec![len / 100.0; 128];
for (i, c) in text.chars().take(10).enumerate() {
vec[i] = (c as u32 as f32) / 1000.0;
}
let magnitude: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude > 0.0 {
for v in &mut vec {
*v /= magnitude;
}
}
Ok(vec)
}
fn dimension(&self) -> anyhow::Result<usize> {
Ok(128)
}
}
let custom_embedder: Box<dyn TextEmbedder> = Box::new(FixedEmbedder);
let custom_embedding = custom_embedder.embed("Test custom embedder")?;
println!(
" Custom embedder dimension: {}",
custom_embedder.dimension()?
);
println!(
" Generated embedding with {} values",
custom_embedding.len()
);
println!(" Can be used with EmbeddingCollection!\n");
println!("8. Comparison: Manual vs Automatic Embedding\n");
println!(" Manual approach:");
println!(" 1. text → embedder.embed(text) → vector");
println!(" 2. collection.upsert(id, vector, metadata)");
println!();
println!(" With EmbeddingCollection:");
println!(" 1. emb_collection.upsert_text(id, text, metadata)");
println!(" (Embedding happens automatically!)\n");
println!("9. Collection Statistics\n");
let stats = emb_collection.stats()?;
println!(" Total vectors: {}", stats.vector_count);
println!(" Active vectors: {}", stats.active_count);
println!("\n=== Summary ===");
println!();
println!("Phase 5 Embedding Integration provides:");
println!(" ✓ TextEmbedder trait - Pluggable embedding models");
println!(" ✓ SimpleEmbedder - Testing without ONNX Runtime");
println!(" ✓ EmbeddingCollection - Text-based APIs for collections");
println!(" ✓ Batch processing - Efficient bulk operations");
println!(" ✓ Custom embedders - Implement your own embedding logic");
println!();
println!("Perfect for:");
println!(" • RAG applications with automatic embedding");
println!(" • Testing without external dependencies");
println!(" • Custom embedding model integration");
println!(" • Simplified text-to-vector workflows");
println!();
println!("Next steps:");
println!(" • Use Embedder + ONNX Runtime for production models");
println!(" • Combine with text_splitter for document chunking");
println!(" • Integrate with your embedding API (OpenAI, Cohere, etc.)");
std::fs::remove_dir_all("./demo_embedding").ok();
std::fs::remove_dir_all("./demo_embedding_batch").ok();
Ok(())
}
#[cfg(not(feature = "embeddings"))]
fn main() {
eprintln!("This example requires the 'embeddings' feature.");
eprintln!("Run with: cargo run --example embedding_integration_demo --features embeddings");
std::process::exit(1);
}