rag_toolchain/common/
types.rs

1use serde::{Deserialize, Serialize};
2use std::sync::Arc;
3
4// ----------------- Embedding -----------------
5/// # [`Embedding`]
6/// The embedding type contains a vector and the associated
7/// chunk of text and possibly some metadata. The type internally
8/// uses [`Arc<T>`] to hold references to the internal values. This
9/// makes it cheap to Clone and Copy.
10#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11pub struct Embedding {
12    /// The chunk that was used to generate the embedding
13    chunk: Chunk,
14    /// A vector of floats representing the embedding
15    vector: Arc<[f32]>,
16}
17
18impl Embedding {
19    /// # [`Embedding::new`]
20    ///
21    /// # Arguments
22    /// * chunk: [`Chunk`] - the chunk associated with the embedding
23    /// * vector: [`Into<Arc<[f32]>>`] - pointer to the embedding
24    ///
25    /// # Returns
26    /// * [`Embedding`] - a new Embedding
27    pub fn new(chunk: Chunk, vector: impl Into<Arc<[f32]>>) -> Self {
28        Self {
29            chunk,
30            vector: vector.into(),
31        }
32    }
33
34    /// # [`Embedding::chunk`]
35    /// Getter for the [`Chunk`]
36    ///
37    /// # Returns
38    /// * &[`Chunk`] - reference to the chunk
39    pub fn chunk(&self) -> &Chunk {
40        &self.chunk
41    }
42
43    /// # [`Embedding::vector`]
44    /// Getter for the [`Vec<f32>`] vector
45    ///
46    /// # Returns
47    /// * [`Vec<f32>`] - a copy of the vector
48    pub fn vector(&self) -> Vec<f32> {
49        self.vector.as_ref().to_vec()
50    }
51}
52// ---------------------------------------------
53
54// ----------------- Chunk ------------------
55#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]
56/// # [`Chunk`]
57/// A chunk is a piece of text with associated metadata. This is
58/// type uses [`Arc<T>`] to hold references to the internal values.
59/// so it is cheap to Clone and Copy.
60pub struct Chunk {
61    /// This is the text content
62    content: Arc<str>,
63    /// Any metadata associated with the chunk such as a date, author, etc.
64    metadata: Arc<serde_json::Value>,
65}
66
67impl Chunk {
68    /// # [`Chunk::new`]
69    /// This is the constructor to use when we have some text with no metadata that
70    /// we wish to include with it.
71    /// # Arguments
72    /// * content: [`Into<Arc<str>>`] - this is the text content of the chunk
73    ///
74    /// # Returns
75    /// * [`Chunk`] - a new Chunk with no metadata
76    pub fn new(chunk: impl Into<Arc<str>>) -> Self {
77        Self {
78            content: chunk.into(),
79            metadata: Arc::new(serde_json::Value::Null),
80        }
81    }
82
83    /// # [`Chunk::new_with_metadata`]
84    /// This is the constructor to use when we have some text with metadata.
85    /// Note the metadata does not influence any generated embeddings. It can just
86    /// be kept with the text and embedding in whatever vector store you choose to use.
87    ///
88    /// # Arguments
89    /// * content: [`Into<Arc<str>>`] - pointer to the chunk str
90    /// * metadata: [`serde_json::Value`] - metadata associated with the chunk
91    ///
92    /// # Returns
93    /// * [`Chunk`] - a new Chunk
94    pub fn new_with_metadata(content: impl Into<Arc<str>>, metadata: serde_json::Value) -> Self {
95        Self {
96            content: content.into(),
97            metadata: Arc::new(metadata),
98        }
99    }
100
101    /// # [`Chunk::content`]
102    /// Getter for the text content.
103    ///
104    /// # Returns
105    /// * &[`str`] - reference to the chunk str
106    pub fn content(&self) -> &str {
107        &self.content
108    }
109
110    /// # [`Chunk::metadata`]
111    /// Getter for the metadata
112    /// # Returns
113    /// * &[`serde_json::Value`] - reference to metadata associated with the chunk
114    pub fn metadata(&self) -> &serde_json::Value {
115        &self.metadata
116    }
117}
118// ------------------------------------------
119
120// ----------------- Chunks -----------------
121/// Type alias for a vector of [`Chunk`]
122pub type Chunks = Vec<Chunk>;
123// -----------------------------------------