Skip to main content

engram/
embedding.rs

1//! `EmbeddingProvider` trait — pluggable text-embedding interface for Engram.
2//!
3//! Any embedding backend (Ollama, OpenAI, local ONNX models, …) implements
4//! this trait so that `Memory` and other Engram components can embed text
5//! without coupling to a specific provider.
6
7use crate::store::MemoryError;
8use async_trait::async_trait;
9
10// ---------------------------------------------------------------------------
11// EmbeddingProvider trait
12// ---------------------------------------------------------------------------
13
14/// Pluggable text-embedding provider.
15///
16/// Implementations MUST be `Send + Sync` so that `Arc<dyn EmbeddingProvider>`
17/// can be shared across async task boundaries.
18#[async_trait]
19pub trait EmbeddingProvider: Send + Sync {
20    /// Embed a batch of texts.
21    ///
22    /// Returns one vector per input text, in the same order.
23    /// Each returned vector has length `self.dimensions()`.
24    async fn embed(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, MemoryError>;
25
26    /// The dimensionality of all vectors produced by this provider.
27    fn dimensions(&self) -> usize;
28}
29
30// ---------------------------------------------------------------------------
31// MockEmbeddingProvider — deterministic, test-only
32// ---------------------------------------------------------------------------
33
34/// Deterministic embedding provider for unit and integration tests.
35///
36/// Each input text is converted to a vector of length `dims` by cycling
37/// through its bytes and applying the transformation:
38///
39/// ```text
40/// value = (byte / 255.0) * 2.0 - 1.0
41/// ```
42///
43/// This produces stable, reproducible vectors without requiring a real model.
44pub struct MockEmbeddingProvider {
45    dims: usize,
46}
47
48impl MockEmbeddingProvider {
49    /// Create a new mock provider that produces `dims`-dimensional vectors.
50    pub fn new(dims: usize) -> Self {
51        Self { dims }
52    }
53}
54
55#[async_trait]
56impl EmbeddingProvider for MockEmbeddingProvider {
57    async fn embed(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, MemoryError> {
58        let embeddings = texts
59            .iter()
60            .map(|text| {
61                let bytes = text.as_bytes();
62                if bytes.is_empty() {
63                    // All-zero vector for empty strings.
64                    return vec![0.0_f32; self.dims];
65                }
66                (0..self.dims)
67                    .map(|i| {
68                        let byte = bytes[i % bytes.len()] as f32;
69                        (byte / 255.0) * 2.0 - 1.0
70                    })
71                    .collect()
72            })
73            .collect();
74        Ok(embeddings)
75    }
76
77    fn dimensions(&self) -> usize {
78        self.dims
79    }
80}