use crate::wasm_compat::WasmBoxedFuture;
use crate::{http_client, wasm_compat::*};
use serde::{Deserialize, Serialize};
#[derive(Debug, thiserror::Error)]
pub enum EmbeddingError {
#[error("HttpError: {0}")]
HttpError(#[from] http_client::Error),
#[error("JsonError: {0}")]
JsonError(#[from] serde_json::Error),
#[error("UrlError: {0}")]
UrlError(#[from] url::ParseError),
#[cfg(not(target_family = "wasm"))]
#[error("DocumentError: {0}")]
DocumentError(Box<dyn std::error::Error + Send + Sync + 'static>),
#[cfg(target_family = "wasm")]
#[error("DocumentError: {0}")]
DocumentError(Box<dyn std::error::Error + 'static>),
#[error("ResponseError: {0}")]
ResponseError(String),
#[error("ProviderError: {0}")]
ProviderError(String),
}
pub trait EmbeddingModel: WasmCompatSend + WasmCompatSync {
const MAX_DOCUMENTS: usize;
type Client;
fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self;
fn ndims(&self) -> usize;
fn embed_texts(
&self,
texts: impl IntoIterator<Item = String> + WasmCompatSend,
) -> impl std::future::Future<Output = Result<Vec<Embedding>, EmbeddingError>> + WasmCompatSend;
fn embed_text(
&self,
text: &str,
) -> impl std::future::Future<Output = Result<Embedding, EmbeddingError>> + WasmCompatSend {
async {
Ok(self
.embed_texts(vec![text.to_string()])
.await?
.pop()
.expect("There should be at least one embedding"))
}
}
}
#[deprecated(
since = "0.25.0",
note = "`DynClientBuilder` and related features have been deprecated and will be removed in a future release. In this case, use `EmbeddingModel` instead."
)]
pub trait EmbeddingModelDyn: WasmCompatSend + WasmCompatSync {
fn max_documents(&self) -> usize;
fn ndims(&self) -> usize;
fn embed_text<'a>(
&'a self,
text: &'a str,
) -> WasmBoxedFuture<'a, Result<Embedding, EmbeddingError>>;
fn embed_texts(
&self,
texts: Vec<String>,
) -> WasmBoxedFuture<'_, Result<Vec<Embedding>, EmbeddingError>>;
}
#[allow(deprecated)]
impl<T> EmbeddingModelDyn for T
where
T: EmbeddingModel + WasmCompatSend + WasmCompatSync,
{
fn max_documents(&self) -> usize {
T::MAX_DOCUMENTS
}
fn ndims(&self) -> usize {
self.ndims()
}
fn embed_text<'a>(
&'a self,
text: &'a str,
) -> WasmBoxedFuture<'a, Result<Embedding, EmbeddingError>> {
Box::pin(self.embed_text(text))
}
fn embed_texts(
&self,
texts: Vec<String>,
) -> WasmBoxedFuture<'_, Result<Vec<Embedding>, EmbeddingError>> {
Box::pin(self.embed_texts(texts.into_iter().collect::<Vec<_>>()))
}
}
pub trait ImageEmbeddingModel: Clone + WasmCompatSend + WasmCompatSync {
const MAX_DOCUMENTS: usize;
fn ndims(&self) -> usize;
fn embed_images(
&self,
images: impl IntoIterator<Item = Vec<u8>> + WasmCompatSend,
) -> impl std::future::Future<Output = Result<Vec<Embedding>, EmbeddingError>> + Send;
fn embed_image<'a>(
&'a self,
bytes: &'a [u8],
) -> impl std::future::Future<Output = Result<Embedding, EmbeddingError>> + WasmCompatSend {
async move {
Ok(self
.embed_images(vec![bytes.to_owned()])
.await?
.pop()
.expect("There should be at least one embedding"))
}
}
}
#[derive(Clone, Default, Deserialize, Serialize, Debug)]
pub struct Embedding {
pub document: String,
pub vec: Vec<f64>,
}
impl PartialEq for Embedding {
fn eq(&self, other: &Self) -> bool {
self.document == other.document
}
}
impl Eq for Embedding {}