use anyhow::Result;
use brainwires::prelude::*;
struct HashEmbedding {
dim: usize,
}
impl HashEmbedding {
fn new(dim: usize) -> Self {
Self { dim }
}
}
impl EmbeddingProvider for HashEmbedding {
fn embed(&self, text: &str) -> Result<Vec<f32>> {
let mut vec = vec![0.0f32; self.dim];
for (i, byte) in text.bytes().enumerate() {
vec[i % self.dim] += byte as f32 / 255.0;
}
let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for v in &mut vec {
*v /= norm;
}
}
Ok(vec)
}
fn dimension(&self) -> usize {
self.dim
}
fn model_name(&self) -> &str {
"hash-embedding-v1"
}
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b).map(|(x, y)| x * y).sum()
}
fn main() -> Result<()> {
let provider = HashEmbedding::new(64);
println!("Model: {}", provider.model_name());
println!("Dimension: {}", provider.dimension());
let a = provider.embed("machine learning")?;
let b = provider.embed("deep learning")?;
let c = provider.embed("banana smoothie")?;
println!("\nCosine similarities:");
println!(
" 'machine learning' vs 'deep learning': {:.4}",
cosine_similarity(&a, &b)
);
println!(
" 'machine learning' vs 'banana smoothie': {:.4}",
cosine_similarity(&a, &c)
);
let texts = vec!["first document".to_string(), "second document".to_string()];
let batch = provider.embed_batch(&texts)?;
println!(
"\nBatch embedded {} texts, each with {} dimensions",
batch.len(),
batch[0].len()
);
Ok(())
}