bep/embeddings/embedding.rs
1//! The module defines the [EmbeddingModel] trait, which represents an embedding model that can
2//! generate embeddings for documents.
3//!
4//! The module also defines the [Embedding] struct, which represents a single document embedding.
5//!
6//! Finally, the module defines the [EmbeddingError] enum, which represents various errors that
7//! can occur during embedding generation or processing.
8
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, thiserror::Error)]
12pub enum EmbeddingError {
13 /// Http error (e.g.: connection error, timeout, etc.)
14 #[error("HttpError: {0}")]
15 HttpError(#[from] reqwest::Error),
16
17 /// Json error (e.g.: serialization, deserialization)
18 #[error("JsonError: {0}")]
19 JsonError(#[from] serde_json::Error),
20
21 /// Error processing the document for embedding
22 #[error("DocumentError: {0}")]
23 DocumentError(Box<dyn std::error::Error + Send + Sync + 'static>),
24
25 /// Error parsing the completion response
26 #[error("ResponseError: {0}")]
27 ResponseError(String),
28
29 /// Error returned by the embedding model provider
30 #[error("ProviderError: {0}")]
31 ProviderError(String),
32}
33
34/// Trait for embedding models that can generate embeddings for documents.
35pub trait EmbeddingModel: Clone + Sync + Send {
36 /// The maximum number of documents that can be embedded in a single request.
37 const MAX_DOCUMENTS: usize;
38
39 /// The number of dimensions in the embedding vector.
40 fn ndims(&self) -> usize;
41
42 /// Embed multiple text documents in a single request
43 fn embed_texts(
44 &self,
45 texts: impl IntoIterator<Item = String> + Send,
46 ) -> impl std::future::Future<Output = Result<Vec<Embedding>, EmbeddingError>> + Send;
47
48 /// Embed a single text document.
49 fn embed_text(
50 &self,
51 text: &str,
52 ) -> impl std::future::Future<Output = Result<Embedding, EmbeddingError>> + Send {
53 async {
54 Ok(self
55 .embed_texts(vec![text.to_string()])
56 .await?
57 .pop()
58 .expect("There should be at least one embedding"))
59 }
60 }
61}
62
63/// Struct that holds a single document and its embedding.
64#[derive(Clone, Default, Deserialize, Serialize, Debug)]
65pub struct Embedding {
66 /// The document that was embedded. Used for debugging.
67 pub document: String,
68 /// The embedding vector
69 pub vec: Vec<f64>,
70}
71
72impl PartialEq for Embedding {
73 fn eq(&self, other: &Self) -> bool {
74 self.document == other.document
75 }
76}
77
78impl Eq for Embedding {}