use bytes::Bytes;
use mnem_core::objects::{Dtype, Embedding};
use crate::error::EmbedError;
use crate::manifest::EmbedderManifest;
pub trait Embedder: Send + Sync {
fn model(&self) -> &str;
fn dim(&self) -> u32;
fn embed(&self, text: &str) -> Result<Vec<f32>, EmbedError>;
fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, EmbedError> {
texts.iter().map(|t| self.embed(t)).collect()
}
#[must_use]
fn manifest(&self) -> EmbedderManifest {
panic!(
"Embedder::manifest() not implemented for model {:?}; \
every provider must override manifest() to declare its \
noise_floor (Gap 15)",
self.model()
);
}
}
#[must_use]
pub fn to_embedding(model: &str, v: &[f32]) -> Embedding {
let mut buf = Vec::with_capacity(v.len() * 4);
for x in v {
buf.extend_from_slice(&x.to_le_bytes());
}
let dim = u32::try_from(v.len()).unwrap_or(u32::MAX);
Embedding {
model: model.to_string(),
dtype: Dtype::F32,
dim,
vector: Bytes::from(buf),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn to_embedding_packs_f32_little_endian() {
let v = vec![1.0f32, -2.5, 3.25];
let e = to_embedding("test:model", &v);
assert_eq!(e.model, "test:model");
assert_eq!(e.dim, 3);
assert_eq!(e.dtype, Dtype::F32);
assert_eq!(e.vector.len(), 12);
e.validate().unwrap();
}
}