engram/embedding.rs
1//! `EmbeddingProvider` trait — pluggable text-embedding interface for Engram.
2//!
3//! Any embedding backend (Ollama, OpenAI, local ONNX models, …) implements
4//! this trait so that `Memory` and other Engram components can embed text
5//! without coupling to a specific provider.
6
7use crate::store::MemoryError;
8use async_trait::async_trait;
9
10// ---------------------------------------------------------------------------
11// EmbeddingProvider trait
12// ---------------------------------------------------------------------------
13
14/// Pluggable text-embedding provider.
15///
16/// Implementations MUST be `Send + Sync` so that `Arc<dyn EmbeddingProvider>`
17/// can be shared across async task boundaries.
18#[async_trait]
19pub trait EmbeddingProvider: Send + Sync {
20 /// Embed a batch of texts.
21 ///
22 /// Returns one vector per input text, in the same order.
23 /// Each returned vector has length `self.dimensions()`.
24 async fn embed(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, MemoryError>;
25
26 /// The dimensionality of all vectors produced by this provider.
27 fn dimensions(&self) -> usize;
28}
29
30// ---------------------------------------------------------------------------
31// MockEmbeddingProvider — deterministic, test-only
32// ---------------------------------------------------------------------------
33
34/// Deterministic embedding provider for unit and integration tests.
35///
36/// Each input text is converted to a vector of length `dims` by cycling
37/// through its bytes and applying the transformation:
38///
39/// ```text
40/// value = (byte / 255.0) * 2.0 - 1.0
41/// ```
42///
43/// This produces stable, reproducible vectors without requiring a real model.
44pub struct MockEmbeddingProvider {
45 dims: usize,
46}
47
48impl MockEmbeddingProvider {
49 /// Create a new mock provider that produces `dims`-dimensional vectors.
50 pub fn new(dims: usize) -> Self {
51 Self { dims }
52 }
53}
54
55#[async_trait]
56impl EmbeddingProvider for MockEmbeddingProvider {
57 async fn embed(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, MemoryError> {
58 let embeddings = texts
59 .iter()
60 .map(|text| {
61 let bytes = text.as_bytes();
62 if bytes.is_empty() {
63 // All-zero vector for empty strings.
64 return vec![0.0_f32; self.dims];
65 }
66 (0..self.dims)
67 .map(|i| {
68 let byte = bytes[i % bytes.len()] as f32;
69 (byte / 255.0) * 2.0 - 1.0
70 })
71 .collect()
72 })
73 .collect();
74 Ok(embeddings)
75 }
76
77 fn dimensions(&self) -> usize {
78 self.dims
79 }
80}