use embedd::fastembed::FastembedSparseEmbedder;
use embedd::{EmbedMode, SparseEmbedder};
fn main() -> anyhow::Result<()> {
let sparse = FastembedSparseEmbedder::new_default()?;
println!("model: {}", sparse.model_id());
let docs = vec![
"the quick brown fox jumps over the lazy dog".to_string(),
"a fast red car drives along the highway".to_string(),
"the lazy cat sleeps on the warm windowsill".to_string(),
];
let query = "quick fox".to_string();
let doc_vecs = sparse.embed_sparse(&docs, EmbedMode::Document)?;
let query_vecs = sparse.embed_sparse(std::slice::from_ref(&query), EmbedMode::Query)?;
let query_vec = &query_vecs[0];
println!("\nQuery: \"{query}\"");
println!("Query sparse vector: {} non-zero terms", query_vec.len());
let mut scores: Vec<(usize, f32)> = doc_vecs
.iter()
.enumerate()
.map(|(i, doc_vec)| {
let score = sparse_dot(query_vec, doc_vec);
(i, score)
})
.collect();
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
for (i, (idx, score)) in scores.iter().enumerate() {
println!(
" #{}: {:.4} {} ({} terms)",
i + 1,
score,
&docs[*idx],
doc_vecs[*idx].len(),
);
}
Ok(())
}
fn sparse_dot(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
let mut score = 0.0f32;
for &(ai, av) in a {
for &(bi, bv) in b {
if ai == bi {
score += av * bv;
}
}
}
score
}