chroma_types/
data_chunk.rs

1use std::sync::Arc;
2
3#[derive(Debug)]
4pub struct Chunk<T> {
5    data: Arc<[T]>,
6    visibility: Arc<[bool]>,
7}
8
9impl<T> Clone for Chunk<T> {
10    fn clone(&self) -> Self {
11        Chunk {
12            data: self.data.clone(),
13            visibility: self.visibility.clone(),
14        }
15    }
16}
17
18impl<T> Chunk<T> {
19    pub fn new(data: Arc<[T]>) -> Self {
20        let len = data.len();
21        Chunk {
22            data,
23            visibility: vec![true; len].into(),
24        }
25    }
26
27    /// Returns the total length of the data chunk
28    pub fn total_len(&self) -> usize {
29        self.data.len()
30    }
31
32    /// Returns the number of visible elements in the data chunk
33    pub fn len(&self) -> usize {
34        self.visibility.iter().filter(|&v| *v).count()
35    }
36
37    /// Returns whether the chunk has zero visible elements.
38    pub fn is_empty(&self) -> bool {
39        self.len() == 0
40    }
41
42    /// Returns the element at the given index
43    /// if the index is out of bounds, it returns None
44    /// # Arguments
45    /// * `index` - The index of the element
46    pub fn get(&self, index: usize) -> Option<&T> {
47        if index < self.data.len() {
48            Some(&self.data[index])
49        } else {
50            None
51        }
52    }
53
54    /// Returns the visibility of the element at the given index
55    /// if the index is out of bounds, it returns None
56    /// # Arguments
57    /// * `index` - The index of the element
58    pub fn get_visibility(&self, index: usize) -> Option<bool> {
59        if index < self.visibility.len() {
60            Some(self.visibility[index])
61        } else {
62            None
63        }
64    }
65
66    /// Sets the visibility of the elements in the data chunk.
67    /// Note that the length of the visibility vector should be
68    /// equal to the length of the data chunk.
69    ///
70    /// Note that this is the only way to change the visibility of the elements in the data chunk,
71    /// the data chunk does not provide a way to change the visibility of individual elements.
72    /// This is to ensure that the visibility of the elements is always in sync with the data.
73    /// If you want to change the visibility of individual elements, you should create a new data chunk.
74    ///
75    /// # Arguments
76    /// * `visibility` - A vector of boolean values indicating the visibility of the elements
77    pub fn set_visibility(&mut self, visibility: Vec<bool>) {
78        self.visibility = visibility.into();
79    }
80
81    /// Returns an iterator over the visible elements in the data chunk
82    /// The iterator returns a tuple of the element and its index
83    /// # Returns
84    /// An iterator over the visible elements in the data chunk
85    pub fn iter(&self) -> DataChunkIteraror<'_, T> {
86        DataChunkIteraror {
87            chunk: self,
88            index: 0,
89        }
90    }
91}
92
93pub struct DataChunkIteraror<'a, T> {
94    chunk: &'a Chunk<T>,
95    index: usize,
96}
97
98impl<'a, T> Iterator for DataChunkIteraror<'a, T> {
99    type Item = (&'a T, usize);
100
101    fn next(&mut self) -> Option<Self::Item> {
102        while self.index < self.chunk.total_len() {
103            let index = self.index;
104            match self.chunk.get_visibility(index) {
105                Some(true) => {
106                    self.index += 1;
107                    return self.chunk.get(index).map(|record| (record, index));
108                }
109                Some(false) => {
110                    self.index += 1;
111                }
112                None => {
113                    break;
114                }
115            }
116        }
117        None
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use crate::{LogRecord, Operation, OperationRecord};
125
126    #[test]
127    fn test_data_chunk() {
128        let data = vec![
129            LogRecord {
130                log_offset: 1,
131                record: OperationRecord {
132                    id: "embedding_id_1".to_string(),
133                    embedding: None,
134                    encoding: None,
135                    metadata: None,
136                    document: None,
137                    operation: Operation::Add,
138                },
139            },
140            LogRecord {
141                log_offset: 2,
142                record: OperationRecord {
143                    id: "embedding_id_2".to_string(),
144                    embedding: None,
145                    encoding: None,
146                    metadata: None,
147                    document: None,
148                    operation: Operation::Add,
149                },
150            },
151        ];
152        let data = data.into();
153        let mut chunk = Chunk::new(data);
154        assert_eq!(chunk.len(), 2);
155        let mut iter = chunk.iter();
156        let elem = iter.next();
157        assert!(elem.is_some());
158        let (record, index) = elem.unwrap();
159        assert_eq!(record.record.id, "embedding_id_1");
160        assert_eq!(index, 0);
161        let elem = iter.next();
162        assert!(elem.is_some());
163        let (record, index) = elem.unwrap();
164        assert_eq!(record.record.id, "embedding_id_2");
165        assert_eq!(index, 1);
166        let elem = iter.next();
167        assert!(elem.is_none());
168
169        let visibility = vec![true, false];
170        chunk.set_visibility(visibility);
171        assert_eq!(chunk.len(), 1);
172        let mut iter = chunk.iter();
173        let elem = iter.next();
174        assert!(elem.is_some());
175        let (record, index) = elem.unwrap();
176        assert_eq!(record.record.id, "embedding_id_1");
177        assert_eq!(index, 0);
178        let elem = iter.next();
179        assert!(elem.is_none());
180    }
181}