rig/embeddings/embedding.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
//! The module defines the [EmbeddingModel] trait, which represents an embedding model that can
//! generate embeddings for documents.
//!
//! The module also defines the [Embedding] struct, which represents a single document embedding.
//!
//! Finally, the module defines the [EmbeddingError] enum, which represents various errors that
//! can occur during embedding generation or processing.
use serde::{Deserialize, Serialize};
#[derive(Debug, thiserror::Error)]
pub enum EmbeddingError {
/// Http error (e.g.: connection error, timeout, etc.)
#[error("HttpError: {0}")]
HttpError(#[from] reqwest::Error),
/// Json error (e.g.: serialization, deserialization)
#[error("JsonError: {0}")]
JsonError(#[from] serde_json::Error),
/// Error processing the document for embedding
#[error("DocumentError: {0}")]
DocumentError(Box<dyn std::error::Error + Send + Sync + 'static>),
/// Error parsing the completion response
#[error("ResponseError: {0}")]
ResponseError(String),
/// Error returned by the embedding model provider
#[error("ProviderError: {0}")]
ProviderError(String),
}
/// Trait for embedding models that can generate embeddings for documents.
pub trait EmbeddingModel: Clone + Sync + Send {
/// The maximum number of documents that can be embedded in a single request.
const MAX_DOCUMENTS: usize;
/// The number of dimensions in the embedding vector.
fn ndims(&self) -> usize;
/// Embed multiple text documents in a single request
fn embed_texts(
&self,
texts: impl IntoIterator<Item = String> + Send,
) -> impl std::future::Future<Output = Result<Vec<Embedding>, EmbeddingError>> + Send;
/// Embed a single text document.
fn embed_text(
&self,
text: &str,
) -> impl std::future::Future<Output = Result<Embedding, EmbeddingError>> + Send {
async {
Ok(self
.embed_texts(vec![text.to_string()])
.await?
.pop()
.expect("There should be at least one embedding"))
}
}
}
/// Struct that holds a single document and its embedding.
#[derive(Clone, Default, Deserialize, Serialize, Debug)]
pub struct Embedding {
/// The document that was embedded. Used for debugging.
pub document: String,
/// The embedding vector
pub vec: Vec<f64>,
}
impl PartialEq for Embedding {
fn eq(&self, other: &Self) -> bool {
self.document == other.document
}
}
impl Eq for Embedding {}