#[cfg(feature = "native")]
mod cached;
#[cfg(feature = "native")]
mod native;
#[cfg(test)]
mod tests;
use crate::error::{EmbedError, Result};
use crate::model::{EmbeddingModel, ModelConfig};
use async_trait::async_trait;
#[cfg(feature = "native")]
pub use cached::CachedEmbeddingService;
#[cfg(feature = "native")]
pub use native::NativeEmbeddingService;
pub const DEFAULT_MAX_BATCH_SIZE: usize = 1000;
pub const MAX_TEXT_CHARS: usize = 32768;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EmbeddingRole {
Query,
Passage,
Generic,
}
impl EmbeddingRole {
#[inline]
pub(crate) const fn cache_tag(self) -> &'static str {
match self {
EmbeddingRole::Query => "role:query",
EmbeddingRole::Passage => "role:passage",
EmbeddingRole::Generic => "role:generic",
}
}
}
#[async_trait]
pub trait EmbeddingService: Send + Sync {
async fn embed(&self, texts: &[String], model: EmbeddingModel) -> Result<Vec<Vec<f32>>>;
async fn embed_one(&self, text: &str, model: EmbeddingModel) -> Result<Vec<f32>> {
let texts = vec![text.to_string()];
let mut embeddings = self.embed(&texts, model).await?;
embeddings
.pop()
.ok_or_else(|| EmbedError::Internal("no embedding generated".into()))
}
async fn embed_query(&self, texts: &[String], model: EmbeddingModel) -> Result<Vec<Vec<f32>>> {
let prefix = model.query_instruction();
let prompted = apply_prefix(texts, prefix);
self.embed(&prompted, model).await
}
async fn embed_passage(
&self,
texts: &[String],
model: EmbeddingModel,
) -> Result<Vec<Vec<f32>>> {
let prefix = model.document_instruction();
let prompted = apply_prefix(texts, prefix);
self.embed(&prompted, model).await
}
fn model_config(&self, model: EmbeddingModel) -> ModelConfig {
ModelConfig::new(model)
}
fn supports_model(&self, model: EmbeddingModel) -> bool;
fn name(&self) -> &'static str;
}
pub(crate) fn apply_prefix(texts: &[String], prefix: Option<&str>) -> Vec<String> {
match prefix {
None => texts.to_vec(),
Some(p) => texts.iter().map(|t| format!("{p}{t}")).collect(),
}
}