rag_toolchain/common/types.rs
1use serde::{Deserialize, Serialize};
2use std::sync::Arc;
3
4// ----------------- Embedding -----------------
5/// # [`Embedding`]
6/// The embedding type contains a vector and the associated
7/// chunk of text and possibly some metadata. The type internally
8/// uses [`Arc<T>`] to hold references to the internal values. This
9/// makes it cheap to Clone and Copy.
10#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11pub struct Embedding {
12 /// The chunk that was used to generate the embedding
13 chunk: Chunk,
14 /// A vector of floats representing the embedding
15 vector: Arc<[f32]>,
16}
17
18impl Embedding {
19 /// # [`Embedding::new`]
20 ///
21 /// # Arguments
22 /// * chunk: [`Chunk`] - the chunk associated with the embedding
23 /// * vector: [`Into<Arc<[f32]>>`] - pointer to the embedding
24 ///
25 /// # Returns
26 /// * [`Embedding`] - a new Embedding
27 pub fn new(chunk: Chunk, vector: impl Into<Arc<[f32]>>) -> Self {
28 Self {
29 chunk,
30 vector: vector.into(),
31 }
32 }
33
34 /// # [`Embedding::chunk`]
35 /// Getter for the [`Chunk`]
36 ///
37 /// # Returns
38 /// * &[`Chunk`] - reference to the chunk
39 pub fn chunk(&self) -> &Chunk {
40 &self.chunk
41 }
42
43 /// # [`Embedding::vector`]
44 /// Getter for the [`Vec<f32>`] vector
45 ///
46 /// # Returns
47 /// * [`Vec<f32>`] - a copy of the vector
48 pub fn vector(&self) -> Vec<f32> {
49 self.vector.as_ref().to_vec()
50 }
51}
52// ---------------------------------------------
53
54// ----------------- Chunk ------------------
55#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]
56/// # [`Chunk`]
57/// A chunk is a piece of text with associated metadata. This is
58/// type uses [`Arc<T>`] to hold references to the internal values.
59/// so it is cheap to Clone and Copy.
60pub struct Chunk {
61 /// This is the text content
62 content: Arc<str>,
63 /// Any metadata associated with the chunk such as a date, author, etc.
64 metadata: Arc<serde_json::Value>,
65}
66
67impl Chunk {
68 /// # [`Chunk::new`]
69 /// This is the constructor to use when we have some text with no metadata that
70 /// we wish to include with it.
71 /// # Arguments
72 /// * content: [`Into<Arc<str>>`] - this is the text content of the chunk
73 ///
74 /// # Returns
75 /// * [`Chunk`] - a new Chunk with no metadata
76 pub fn new(chunk: impl Into<Arc<str>>) -> Self {
77 Self {
78 content: chunk.into(),
79 metadata: Arc::new(serde_json::Value::Null),
80 }
81 }
82
83 /// # [`Chunk::new_with_metadata`]
84 /// This is the constructor to use when we have some text with metadata.
85 /// Note the metadata does not influence any generated embeddings. It can just
86 /// be kept with the text and embedding in whatever vector store you choose to use.
87 ///
88 /// # Arguments
89 /// * content: [`Into<Arc<str>>`] - pointer to the chunk str
90 /// * metadata: [`serde_json::Value`] - metadata associated with the chunk
91 ///
92 /// # Returns
93 /// * [`Chunk`] - a new Chunk
94 pub fn new_with_metadata(content: impl Into<Arc<str>>, metadata: serde_json::Value) -> Self {
95 Self {
96 content: content.into(),
97 metadata: Arc::new(metadata),
98 }
99 }
100
101 /// # [`Chunk::content`]
102 /// Getter for the text content.
103 ///
104 /// # Returns
105 /// * &[`str`] - reference to the chunk str
106 pub fn content(&self) -> &str {
107 &self.content
108 }
109
110 /// # [`Chunk::metadata`]
111 /// Getter for the metadata
112 /// # Returns
113 /// * &[`serde_json::Value`] - reference to metadata associated with the chunk
114 pub fn metadata(&self) -> &serde_json::Value {
115 &self.metadata
116 }
117}
118// ------------------------------------------
119
120// ----------------- Chunks -----------------
121/// Type alias for a vector of [`Chunk`]
122pub type Chunks = Vec<Chunk>;
123// -----------------------------------------