chroma_types/
data_chunk.rs1use std::sync::Arc;
2
3#[derive(Debug)]
4pub struct Chunk<T> {
5 data: Arc<[T]>,
6 visibility: Arc<[bool]>,
7 visible_count: usize,
8}
9
10impl<T> Clone for Chunk<T> {
11 fn clone(&self) -> Self {
12 Chunk {
13 data: self.data.clone(),
14 visibility: self.visibility.clone(),
15 visible_count: self.visible_count,
16 }
17 }
18}
19
20impl<T> Chunk<T> {
21 pub fn new(data: Arc<[T]>) -> Self {
22 let len = data.len();
23 Chunk {
24 data,
25 visibility: vec![true; len].into(),
26 visible_count: len,
27 }
28 }
29
30 pub fn total_len(&self) -> usize {
32 self.data.len()
33 }
34
35 pub fn len(&self) -> usize {
37 self.visible_count
38 }
39
40 pub fn is_empty(&self) -> bool {
42 self.len() == 0
43 }
44
45 pub fn get(&self, index: usize) -> Option<&T> {
50 if index < self.data.len() {
51 Some(&self.data[index])
52 } else {
53 None
54 }
55 }
56
57 pub fn get_visibility(&self, index: usize) -> Option<bool> {
62 if index < self.visibility.len() {
63 Some(self.visibility[index])
64 } else {
65 None
66 }
67 }
68
69 pub fn set_visibility(&mut self, visibility: Vec<bool>) {
81 self.visible_count = visibility.iter().filter(|&v| *v).count();
82 self.visibility = visibility.into();
83 }
84
85 pub fn iter(&self) -> DataChunkIteraror<'_, T> {
90 DataChunkIteraror {
91 chunk: self,
92 index: 0,
93 }
94 }
95}
96
97pub struct DataChunkIteraror<'a, T> {
98 chunk: &'a Chunk<T>,
99 index: usize,
100}
101
102impl<'a, T> Iterator for DataChunkIteraror<'a, T> {
103 type Item = (&'a T, usize);
104
105 fn next(&mut self) -> Option<Self::Item> {
106 while self.index < self.chunk.total_len() {
107 let index = self.index;
108 match self.chunk.get_visibility(index) {
109 Some(true) => {
110 self.index += 1;
111 return self.chunk.get(index).map(|record| (record, index));
112 }
113 Some(false) => {
114 self.index += 1;
115 }
116 None => {
117 break;
118 }
119 }
120 }
121 None
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128 use crate::{LogRecord, Operation, OperationRecord};
129
130 #[test]
131 fn test_data_chunk() {
132 let data = vec![
133 LogRecord {
134 log_offset: 1,
135 record: OperationRecord {
136 id: "embedding_id_1".to_string(),
137 embedding: None,
138 encoding: None,
139 metadata: None,
140 document: None,
141 operation: Operation::Add,
142 },
143 },
144 LogRecord {
145 log_offset: 2,
146 record: OperationRecord {
147 id: "embedding_id_2".to_string(),
148 embedding: None,
149 encoding: None,
150 metadata: None,
151 document: None,
152 operation: Operation::Add,
153 },
154 },
155 ];
156 let data = data.into();
157 let mut chunk = Chunk::new(data);
158 assert_eq!(chunk.len(), 2);
159 let mut iter = chunk.iter();
160 let elem = iter.next();
161 assert!(elem.is_some());
162 let (record, index) = elem.unwrap();
163 assert_eq!(record.record.id, "embedding_id_1");
164 assert_eq!(index, 0);
165 let elem = iter.next();
166 assert!(elem.is_some());
167 let (record, index) = elem.unwrap();
168 assert_eq!(record.record.id, "embedding_id_2");
169 assert_eq!(index, 1);
170 let elem = iter.next();
171 assert!(elem.is_none());
172
173 let visibility = vec![true, false];
174 chunk.set_visibility(visibility);
175 assert_eq!(chunk.len(), 1);
176 let mut iter = chunk.iter();
177 let elem = iter.next();
178 assert!(elem.is_some());
179 let (record, index) = elem.unwrap();
180 assert_eq!(record.record.id, "embedding_id_1");
181 assert_eq!(index, 0);
182 let elem = iter.next();
183 assert!(elem.is_none());
184 }
185}