Skip to main content

chroma_types/
data_chunk.rs

1use std::sync::Arc;
2
3#[derive(Debug)]
4pub struct Chunk<T> {
5    data: Arc<[T]>,
6    visibility: Arc<[bool]>,
7    visible_count: usize,
8}
9
10impl<T> Clone for Chunk<T> {
11    fn clone(&self) -> Self {
12        Chunk {
13            data: self.data.clone(),
14            visibility: self.visibility.clone(),
15            visible_count: self.visible_count,
16        }
17    }
18}
19
20impl<T> Chunk<T> {
21    pub fn new(data: Arc<[T]>) -> Self {
22        let len = data.len();
23        Chunk {
24            data,
25            visibility: vec![true; len].into(),
26            visible_count: len,
27        }
28    }
29
30    /// Returns the total length of the data chunk
31    pub fn total_len(&self) -> usize {
32        self.data.len()
33    }
34
35    /// Returns the number of visible elements in the data chunk
36    pub fn len(&self) -> usize {
37        self.visible_count
38    }
39
40    /// Returns whether the chunk has zero visible elements.
41    pub fn is_empty(&self) -> bool {
42        self.len() == 0
43    }
44
45    /// Returns the element at the given index
46    /// if the index is out of bounds, it returns None
47    /// # Arguments
48    /// * `index` - The index of the element
49    pub fn get(&self, index: usize) -> Option<&T> {
50        if index < self.data.len() {
51            Some(&self.data[index])
52        } else {
53            None
54        }
55    }
56
57    /// Returns the visibility of the element at the given index
58    /// if the index is out of bounds, it returns None
59    /// # Arguments
60    /// * `index` - The index of the element
61    pub fn get_visibility(&self, index: usize) -> Option<bool> {
62        if index < self.visibility.len() {
63            Some(self.visibility[index])
64        } else {
65            None
66        }
67    }
68
69    /// Sets the visibility of the elements in the data chunk.
70    /// Note that the length of the visibility vector should be
71    /// equal to the length of the data chunk.
72    ///
73    /// Note that this is the only way to change the visibility of the elements in the data chunk,
74    /// the data chunk does not provide a way to change the visibility of individual elements.
75    /// This is to ensure that the visibility of the elements is always in sync with the data.
76    /// If you want to change the visibility of individual elements, you should create a new data chunk.
77    ///
78    /// # Arguments
79    /// * `visibility` - A vector of boolean values indicating the visibility of the elements
80    pub fn set_visibility(&mut self, visibility: Vec<bool>) {
81        self.visible_count = visibility.iter().filter(|&v| *v).count();
82        self.visibility = visibility.into();
83    }
84
85    /// Returns an iterator over the visible elements in the data chunk
86    /// The iterator returns a tuple of the element and its index
87    /// # Returns
88    /// An iterator over the visible elements in the data chunk
89    pub fn iter(&self) -> DataChunkIteraror<'_, T> {
90        DataChunkIteraror {
91            chunk: self,
92            index: 0,
93        }
94    }
95}
96
97pub struct DataChunkIteraror<'a, T> {
98    chunk: &'a Chunk<T>,
99    index: usize,
100}
101
102impl<'a, T> Iterator for DataChunkIteraror<'a, T> {
103    type Item = (&'a T, usize);
104
105    fn next(&mut self) -> Option<Self::Item> {
106        while self.index < self.chunk.total_len() {
107            let index = self.index;
108            match self.chunk.get_visibility(index) {
109                Some(true) => {
110                    self.index += 1;
111                    return self.chunk.get(index).map(|record| (record, index));
112                }
113                Some(false) => {
114                    self.index += 1;
115                }
116                None => {
117                    break;
118                }
119            }
120        }
121        None
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128    use crate::{LogRecord, Operation, OperationRecord};
129
130    #[test]
131    fn test_data_chunk() {
132        let data = vec![
133            LogRecord {
134                log_offset: 1,
135                record: OperationRecord {
136                    id: "embedding_id_1".to_string(),
137                    embedding: None,
138                    encoding: None,
139                    metadata: None,
140                    document: None,
141                    operation: Operation::Add,
142                },
143            },
144            LogRecord {
145                log_offset: 2,
146                record: OperationRecord {
147                    id: "embedding_id_2".to_string(),
148                    embedding: None,
149                    encoding: None,
150                    metadata: None,
151                    document: None,
152                    operation: Operation::Add,
153                },
154            },
155        ];
156        let data = data.into();
157        let mut chunk = Chunk::new(data);
158        assert_eq!(chunk.len(), 2);
159        let mut iter = chunk.iter();
160        let elem = iter.next();
161        assert!(elem.is_some());
162        let (record, index) = elem.unwrap();
163        assert_eq!(record.record.id, "embedding_id_1");
164        assert_eq!(index, 0);
165        let elem = iter.next();
166        assert!(elem.is_some());
167        let (record, index) = elem.unwrap();
168        assert_eq!(record.record.id, "embedding_id_2");
169        assert_eq!(index, 1);
170        let elem = iter.next();
171        assert!(elem.is_none());
172
173        let visibility = vec![true, false];
174        chunk.set_visibility(visibility);
175        assert_eq!(chunk.len(), 1);
176        let mut iter = chunk.iter();
177        let elem = iter.next();
178        assert!(elem.is_some());
179        let (record, index) = elem.unwrap();
180        assert_eq!(record.record.id, "embedding_id_1");
181        assert_eq!(index, 0);
182        let elem = iter.next();
183        assert!(elem.is_none());
184    }
185}