pub struct CachedEmbeddingModel<M: Embedder, S = DefaultHasher> { /* private fields */ }
Expand description
Embedding models can be expensive to run. This struct wraps an embedding model with a cache that stores embeddings that have been computed before.
§Example
use kalosm::language::*;
use std::num::NonZeroUsize;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let bert = Bert::builder()
.build()
.await?
// You can call the `.cached` method to cache the results of the Bert embedding in a LRU cache with the given capacity.
.cached(NonZeroUsize::new(1000).unwrap());
// Try to load the cache from the filesystem
if let Ok(cache) = std::fs::read("cache.bin") {
let cache: Vec<(EmbeddingInput, Vec<f32>)> = postcard::from_bytes(&cache)?;
bert.load_cache(cache);
}
let start_time = std::time::Instant::now();
let sentences = [
"Cats are cool",
"The geopolitical situation is dire",
"Pets are great",
"Napoleon was a tyrant",
"Napoleon was a great general",
];
// When you embed a new sentence, the cache will store the embedding for that sentence.
let embeddings = bert.embed_batch(sentences).await?;
println!("{:?}", embeddings);
println!("embedding uncached took {:?}", start_time.elapsed());
let start_time = std::time::Instant::now();
// If you embed the same sentences again, the cache will be used.
let embeddings = bert.embed_batch(sentences).await?;
println!("{:?}", embeddings);
println!("embedding cached took {:?}", start_time.elapsed());
let sentences = [
"Cats are cool",
"The geopolitical situation is dire",
"Pets are great",
"Napoleon was a king",
"Napoleon was kind of not a great guy",
];
// When you embed a new sentence, the cache will store the embedding for that sentence.
let embeddings = bert.embed_batch(sentences).await?;
println!("{:?}", embeddings);
println!("embedding partially cached took {:?}", start_time.elapsed());
// Save the cache to the filesystem for future use
let cache = bert.export_cache();
let file = std::fs::File::create("cache.bin")?;
let mut writer = std::io::BufWriter::new(file);
postcard::to_io(&cache, &mut writer)?;
Ok(())
}
Implementations§
Source§impl<M: Embedder> CachedEmbeddingModel<M>
impl<M: Embedder> CachedEmbeddingModel<M>
Sourcepub fn new(model: M, cache_size: NonZeroUsize) -> Self
pub fn new(model: M, cache_size: NonZeroUsize) -> Self
Create a new cached embedding model.
Source§impl<M: Embedder, S> CachedEmbeddingModel<M, S>
impl<M: Embedder, S> CachedEmbeddingModel<M, S>
Sourcepub fn get_embedder(&self) -> &M
pub fn get_embedder(&self) -> &M
Get a reference to the underlying embedder.
Sourcepub fn get_embedder_mut(&mut self) -> &mut M
pub fn get_embedder_mut(&mut self) -> &mut M
Get a mutable reference to the underlying embedder.
Source§impl<M: Embedder, S: BuildHasher> CachedEmbeddingModel<M, S>
impl<M: Embedder, S: BuildHasher> CachedEmbeddingModel<M, S>
Sourcepub fn new_with_hasher(model: M, cache_size: NonZeroUsize, hasher: S) -> Self
pub fn new_with_hasher(model: M, cache_size: NonZeroUsize, hasher: S) -> Self
Create a new cached embedding model with a custom hasher.
Sourcepub fn export_cache(&self) -> Vec<(EmbeddingInput, Box<[f32]>)>
pub fn export_cache(&self) -> Vec<(EmbeddingInput, Box<[f32]>)>
Return a serializable cache of the embeddings for future use. You can load the cache from the file with Self::load_cache
.
§Example
let bert = Bert::builder()
.build()
.await?
// You can call the `.cached` method to cache the results of the Bert embedding in a LRU cache with the given capacity.
.cached(NonZeroUsize::new(1000).unwrap());
let sentences = [
"Cats are cool",
"The geopolitical situation is dire",
"Pets are great",
"Napoleon was a tyrant",
"Napoleon was a great general",
];
// When you embed a new sentence, the cache will store the embedding for that sentence.
let embeddings = bert.embed_batch(sentences).await?;
println!("{:?}", embeddings);
// Save the cache to the filesystem for future use
let cache = bert.export_cache();
let file = std::fs::File::create("cache.bin")?;
let mut writer = std::io::BufWriter::new(file);
postcard::to_io(&cache, &mut writer)?;
Sourcepub fn load_cache(&self, cached_items: Vec<(EmbeddingInput, Vec<f32>)>)
pub fn load_cache(&self, cached_items: Vec<(EmbeddingInput, Vec<f32>)>)
Load the cache from a file.
§Example
let bert = Bert::builder()
.build()
.await?
// You can call the `.cached` method to cache the results of the Bert embedding in a LRU cache with the given capacity.
.cached(NonZeroUsize::new(1000).unwrap());
// Try to load the cache from the filesystem
let cache = std::fs::read("cache.bin")?;
let cache: Vec<(EmbeddingInput, Vec<f32>)> = postcard::from_bytes(&cache)?;
let _ = bert.load_cache(cache);
let sentences = [
"Cats are cool",
"The geopolitical situation is dire",
"Pets are great",
"Napoleon was a tyrant",
"Napoleon was a great general",
];
// If the sentence is already in the cache, it will be returned from the cache instead of running the model
let embeddings = bert.embed_batch(sentences).await?;
println!("{:?}", embeddings);
Trait Implementations§
Source§impl<M: Embedder> Embedder for CachedEmbeddingModel<M>
impl<M: Embedder> Embedder for CachedEmbeddingModel<M>
Source§fn embed_for(
&self,
input: EmbeddingInput,
) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
fn embed_for( &self, input: EmbeddingInput, ) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
Embed a single string.
Source§fn embed_vec_for(
&self,
inputs: Vec<EmbeddingInput>,
) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
fn embed_vec_for( &self, inputs: Vec<EmbeddingInput>, ) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
Embed a batch of strings.
Auto Trait Implementations§
impl<M, S = RandomState> !Freeze for CachedEmbeddingModel<M, S>
impl<M, S> RefUnwindSafe for CachedEmbeddingModel<M, S>where
M: RefUnwindSafe,
impl<M, S> Send for CachedEmbeddingModel<M, S>where
S: Send,
impl<M, S> Sync for CachedEmbeddingModel<M, S>where
S: Send,
impl<M, S> Unpin for CachedEmbeddingModel<M, S>
impl<M, S> UnwindSafe for CachedEmbeddingModel<M, S>where
M: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<M> EmbedderCacheExt for Mwhere
M: Embedder,
impl<M> EmbedderCacheExt for Mwhere
M: Embedder,
Source§fn cached(self, cache_size: NonZeroUsize) -> CachedEmbeddingModel<Self>where
Self: Sized,
fn cached(self, cache_size: NonZeroUsize) -> CachedEmbeddingModel<Self>where
Self: Sized,
Wrap the embedder with a cache for previously computed embeddings. Read more
Source§impl<E> EmbedderExt for Ewhere
E: Embedder,
impl<E> EmbedderExt for Ewhere
E: Embedder,
Source§fn into_any_embedder(self) -> DynEmbedder
fn into_any_embedder(self) -> DynEmbedder
Convert this embedder into an embedder trait object.
Source§fn embed(
&self,
input: impl ToString,
) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
fn embed( &self, input: impl ToString, ) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
Embed some text into a vector space
Source§fn embed_query(
&self,
input: impl ToString,
) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
fn embed_query( &self, input: impl ToString, ) -> impl Future<Output = Result<Embedding, Self::Error>> + Send
Embed a query into a vector space
Source§fn embed_batch(
&self,
inputs: impl IntoIterator<Item = impl ToString>,
) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
fn embed_batch( &self, inputs: impl IntoIterator<Item = impl ToString>, ) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
Embed a batch of text into a vector space. Returns a list of embeddings in the same order as the inputs.
Source§fn embed_batch_for(
&self,
inputs: impl IntoIterator<Item = EmbeddingInput>,
) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
fn embed_batch_for( &self, inputs: impl IntoIterator<Item = EmbeddingInput>, ) -> impl Future<Output = Result<Vec<Embedding>, Self::Error>> + Send
Embed a batch of
EmbeddingInput
into a vector space. Returns a list of embeddings in the same order as the inputs.