next-plaid

A pure Rust, CPU-only implementation of the PLAID algorithm for efficient multi-vector search (late interaction retrieval). This is a direct port of the fast-plaid Python library with optimizations for production use.
Installation
Add to your Cargo.toml:
[dependencies]
next-plaid = "0.2"
Optional Features
Enable BLAS acceleration for faster K-means clustering:
next-plaid = { version = "0.2", features = ["accelerate"] }
next-plaid = { version = "0.2", features = ["openblas"] }
Quick Start
use next_plaid::{MmapIndex, IndexConfig, UpdateConfig, SearchParameters};
use ndarray::array;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let doc1 = array![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]; let doc2 = array![[0.7, 0.8, 0.9], [0.2, 0.3, 0.4]]; let embeddings = vec![doc1, doc2];
let index_config = IndexConfig::default(); let update_config = UpdateConfig::default();
let (index, doc_ids) = MmapIndex::update_or_create(
&embeddings, "my_index", &index_config, &update_config
)?;
println!("Indexed documents with IDs: {:?}", doc_ids);
let query = array![[0.15, 0.25, 0.35], [0.45, 0.55, 0.65]];
let params = SearchParameters::default();
let results = index.search(&query, ¶ms, None)?;
println!("Top results: {:?}", results.passage_ids);
println!("Scores: {:?}", results.scores);
Ok(())
}
API Overview
Note: Examples below that load an existing index assume you've first run the Quick Start example to create my_index/.
Index Creation
use next_plaid::{MmapIndex, IndexConfig, UpdateConfig};
use ndarray::array;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let index_config = IndexConfig {
nbits: 4, batch_size: 50_000, seed: Some(42), kmeans_niters: 4, max_points_per_centroid: 256, n_samples_kmeans: None, start_from_scratch: 999, };
let update_config = UpdateConfig::default();
let embeddings = vec![
array![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
array![[0.7, 0.8, 0.9], [0.2, 0.3, 0.4]],
];
let (index, doc_ids) = MmapIndex::update_or_create(
&embeddings, "my_index", &index_config, &update_config
)?;
println!("Indexed {} documents", doc_ids.len());
let index = MmapIndex::load("my_index")?;
println!("Loaded index with {} documents", index.num_documents());
Ok(())
}
Search
use next_plaid::{MmapIndex, SearchParameters};
use ndarray::array;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let index = MmapIndex::load("my_index")?;
let params = SearchParameters {
top_k: 10, n_ivf_probe: 8, n_full_scores: 4096, centroid_batch_size: 100_000, batch_size: 2000, centroid_score_threshold: Some(0.4), };
let query = array![[0.15, 0.25, 0.35], [0.45, 0.55, 0.65]];
let results = index.search(&query, ¶ms, None)?;
println!("Top result: doc {} with score {}", results.passage_ids[0], results.scores[0]);
let queries = vec![
array![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
array![[0.5, 0.6, 0.7], [0.8, 0.9, 1.0]],
];
let batch_results = index.search_batch(&queries, ¶ms, true, None)?;
println!("Found {} results for {} queries", batch_results.len(), queries.len());
Ok(())
}
Incremental Updates
use next_plaid::{MmapIndex, IndexConfig, UpdateConfig};
use ndarray::array;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let index_config = IndexConfig::default();
let update_config = UpdateConfig {
buffer_size: 100, kmeans_niters: 4, start_from_scratch: 999, batch_size: 50_000, max_points_per_centroid: 256, n_samples_kmeans: None, seed: 42, };
let embeddings = vec![
array![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
array![[0.7, 0.8, 0.9], [0.2, 0.3, 0.4]],
];
let (index, doc_ids) = MmapIndex::update_or_create(
&embeddings, "my_index", &index_config, &update_config
)?;
println!("First batch: indexed documents {:?}", doc_ids);
let more_embeddings = vec![
array![[0.5, 0.6, 0.7], [0.8, 0.9, 1.0]],
];
let (index, new_doc_ids) = MmapIndex::update_or_create(
&more_embeddings, "my_index", &index_config, &update_config
)?;
println!("Second batch: added documents {:?}", new_doc_ids);
println!("Index now has {} documents", index.num_documents());
Ok(())
}
Document Deletion
use next_plaid::MmapIndex;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut index = MmapIndex::load("my_index")?;
println!("Documents before deletion: {}", index.num_documents());
let deleted_count = index.delete(&[0])?;
println!("Deleted {} documents", deleted_count);
println!("Documents after deletion: {}", index.num_documents());
Ok(())
}
Metadata Filtering
use next_plaid::{MmapIndex, SearchParameters, filtering};
use ndarray::array;
use serde_json::json;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let index = MmapIndex::load("my_index")?;
let metadata = vec![
json!({"category": "science", "year": 2024}),
json!({"category": "tech", "year": 2023}),
];
filtering::create("my_index", &metadata)?;
let subset = filtering::where_condition(
"my_index",
"category = ? AND year > ?",
&[json!("science"), json!(2020)]
)?;
println!("Found {} documents matching filter", subset.len());
let query = array![[0.15, 0.25, 0.35], [0.45, 0.55, 0.65]];
let params = SearchParameters::default();
let results = index.search(&query, ¶ms, Some(&subset))?;
println!("Top filtered result: doc {}", results.passage_ids[0]);
Ok(())
}
Embedding Reconstruction
use next_plaid::MmapIndex;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let index = MmapIndex::load("my_index")?;
let reconstructed = index.reconstruct(&[0, 1])?;
for (i, emb) in reconstructed.iter().enumerate() {
println!("Document {}: {} tokens x {} dimensions", i, emb.nrows(), emb.ncols());
}
Ok(())
}
Configuration Guide
Choosing nbits
| nbits |
Storage |
Quality |
Use Case |
| 2 |
~32x compression |
Good |
Large-scale indices, memory-constrained |
| 4 |
~16x compression |
Better |
Default, balanced trade-off |
Tuning Search Parameters
n_ivf_probe: Higher values improve recall at the cost of latency. Start with 8-16.
n_full_scores: Candidates for exact MaxSim scoring. Increase for better recall on hard queries.
centroid_batch_size: Controls memory during IVF probing. Lower values reduce peak memory.