milli_core/vector/
manual.rs

1use super::error::EmbedError;
2use super::DistributionShift;
3use crate::vector::Embedding;
4
5#[derive(Debug, Clone, Copy)]
6pub struct Embedder {
7    dimensions: usize,
8    distribution: Option<DistributionShift>,
9}
10
11#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
12pub struct EmbedderOptions {
13    pub dimensions: usize,
14    pub distribution: Option<DistributionShift>,
15}
16
17impl Embedder {
18    pub fn new(options: EmbedderOptions) -> Self {
19        Self { dimensions: options.dimensions, distribution: options.distribution }
20    }
21
22    pub fn embed<S: AsRef<str>>(&self, texts: &[S]) -> Result<Vec<Embedding>, EmbedError> {
23        texts.as_ref().iter().map(|text| self.embed_one(text)).collect()
24    }
25
26    pub fn embed_one<S: AsRef<str>>(&self, text: S) -> Result<Embedding, EmbedError> {
27        Err(EmbedError::embed_on_manual_embedder(text.as_ref().chars().take(250).collect()))
28    }
29    pub fn dimensions(&self) -> usize {
30        self.dimensions
31    }
32
33    pub fn embed_index(
34        &self,
35        text_chunks: Vec<Vec<String>>,
36    ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
37        text_chunks.into_iter().map(|prompts| self.embed(&prompts)).collect()
38    }
39
40    pub fn distribution(&self) -> Option<DistributionShift> {
41        self.distribution
42    }
43
44    pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
45        texts.iter().map(|text| self.embed_one(text)).collect()
46    }
47}