lattice_embed/service/mod.rs
1//! Embedding service trait and implementations.
2
3#[cfg(feature = "native")]
4mod cached;
5#[cfg(feature = "native")]
6mod native;
7
8#[cfg(test)]
9mod tests;
10
11use crate::error::{EmbedError, Result};
12use crate::model::{EmbeddingModel, ModelConfig};
13use async_trait::async_trait;
14
15// Re-exports
16#[cfg(feature = "native")]
17pub use cached::CachedEmbeddingService;
18#[cfg(feature = "native")]
19pub use native::NativeEmbeddingService;
20
21/// **Stable**: default maximum batch size to prevent OOM.
22///
23/// This limit prevents accidentally passing huge batches that could exhaust memory.
24/// Can be overridden by using chunked calls if larger batches are needed.
25pub const DEFAULT_MAX_BATCH_SIZE: usize = 1000;
26
27/// **Stable**: maximum allowed text length in characters.
28///
29/// This limit prevents OOM attacks via extremely large input texts.
30/// 32KB is sufficient for most embedding use cases while preventing abuse.
31pub const MAX_TEXT_CHARS: usize = 32768;
32
33/// **Stable**: role of text in asymmetric retrieval.
34///
35/// Models trained with asymmetric objectives (E5, Qwen3-Embedding) use different
36/// prompt prefixes for queries vs documents. Providing the wrong role causes the
37/// embedding to land in the wrong region of the model's retrieval space, degrading
38/// retrieval quality.
39///
40/// Use [`EmbeddingService::embed_query`] / [`EmbeddingService::embed_passage`] to
41/// apply the correct prefix automatically. The role is also included in the cache
42/// key so that `embed_query("hello")` and `embed_passage("hello")` are stored as
43/// separate entries even when the raw text is identical.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
45pub enum EmbeddingRole {
46 /// Query / question text — may receive a query-side prompt prefix.
47 Query,
48 /// Document / passage text — may receive a passage-side prompt prefix.
49 Passage,
50 /// Generic text with no role-specific prefix (backwards-compatible default).
51 Generic,
52}
53
54impl EmbeddingRole {
55 /// Short ASCII tag included in the cache key hash.
56 ///
57 /// Distinct strings ensure that role changes affect the Blake3 hash even
58 /// when the raw text and model config are identical.
59 #[inline]
60 pub(crate) const fn cache_tag(self) -> &'static str {
61 match self {
62 EmbeddingRole::Query => "role:query",
63 EmbeddingRole::Passage => "role:passage",
64 EmbeddingRole::Generic => "role:generic",
65 }
66 }
67}
68
69/// **Stable**: external consumers may depend on this; breaking changes require a SemVer bump.
70///
71/// Trait for embedding generation services.
72///
73/// This trait defines the interface for services that can convert text
74/// into vector embeddings. Implementations may use local models (native Rust)
75/// or remote APIs.
76///
77/// # Example
78///
79/// ```rust,no_run
80/// use lattice_embed::{EmbeddingService, EmbeddingModel, NativeEmbeddingService};
81///
82/// #[tokio::main]
83/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
84/// let service = NativeEmbeddingService::default();
85/// let embedding = service.embed_one("Hello, world!", EmbeddingModel::default()).await?;
86/// assert_eq!(embedding.len(), 384);
87/// Ok(())
88/// }
89/// ```
90#[async_trait]
91pub trait EmbeddingService: Send + Sync {
92 /// **Stable**: generate embeddings for multiple texts.
93 ///
94 /// Returns a vector of embeddings, one for each input text, in the same order.
95 /// Applies no role-specific prompt prefix (equivalent to `Generic` role).
96 /// Use [`embed_query`] / [`embed_passage`] for asymmetric retrieval models.
97 async fn embed(&self, texts: &[String], model: EmbeddingModel) -> Result<Vec<Vec<f32>>>;
98
99 /// **Stable**: generate an embedding for a single text.
100 ///
101 /// This is a convenience method that calls `embed` with a single-element slice.
102 async fn embed_one(&self, text: &str, model: EmbeddingModel) -> Result<Vec<f32>> {
103 let texts = vec![text.to_string()];
104 let mut embeddings = self.embed(&texts, model).await?;
105 embeddings
106 .pop()
107 .ok_or_else(|| EmbedError::Internal("no embedding generated".into()))
108 }
109
110 /// **Stable**: embed query texts with model-specific query prompt prefix applied.
111 ///
112 /// For models that use asymmetric prompts (E5, Qwen3-Embedding), this prepends the
113 /// `query_instruction()` prefix before calling the model forward. For models with
114 /// no query prefix (BGE, MiniLM), this is equivalent to `embed()`.
115 ///
116 /// Cache keys produced by this method are distinct from those produced by
117 /// `embed_passage()` and `embed()` even when the raw text is identical.
118 async fn embed_query(&self, texts: &[String], model: EmbeddingModel) -> Result<Vec<Vec<f32>>> {
119 let prefix = model.query_instruction();
120 let prompted = apply_prefix(texts, prefix);
121 self.embed(&prompted, model).await
122 }
123
124 /// **Stable**: embed document/passage texts with model-specific document prompt prefix applied.
125 ///
126 /// For models that use asymmetric prompts (E5), this prepends the
127 /// `document_instruction()` prefix before calling the model forward. For models with
128 /// no document prefix (BGE, MiniLM, Qwen3), this is equivalent to `embed()`.
129 ///
130 /// Cache keys produced by this method are distinct from those produced by
131 /// `embed_query()` and `embed()` even when the raw text is identical.
132 async fn embed_passage(
133 &self,
134 texts: &[String],
135 model: EmbeddingModel,
136 ) -> Result<Vec<Vec<f32>>> {
137 let prefix = model.document_instruction();
138 let prompted = apply_prefix(texts, prefix);
139 self.embed(&prompted, model).await
140 }
141
142 /// **Unstable**: returns the effective `ModelConfig` for a given model on this service.
143 ///
144 /// The default returns a config with no MRL truncation. `NativeEmbeddingService`
145 /// overrides this to expose the configured output dimension so `CachedEmbeddingService`
146 /// can include the actual dimension in cache keys.
147 fn model_config(&self, model: EmbeddingModel) -> ModelConfig {
148 ModelConfig::new(model)
149 }
150
151 /// **Stable**: check if the service supports a given model.
152 fn supports_model(&self, model: EmbeddingModel) -> bool;
153
154 /// **Stable**: get the name/identifier of this service.
155 fn name(&self) -> &'static str;
156}
157
158/// Apply an optional prompt prefix to each text.
159///
160/// Returns a new `Vec<String>` with the prefix prepended where the prefix is
161/// `Some`, or a cloned vec of the original texts when the prefix is `None`.
162/// This is a free function (not a method) so it can be called from default
163/// trait method bodies without going through `self`.
164pub(crate) fn apply_prefix(texts: &[String], prefix: Option<&str>) -> Vec<String> {
165 match prefix {
166 None => texts.to_vec(),
167 Some(p) => texts.iter().map(|t| format!("{p}{t}")).collect(),
168 }
169}