1use super::{
2 CollectionUuid, Metadata, MetadataValueConversionError, SegmentScope,
3 SegmentScopeConversionError,
4};
5use crate::{chroma_proto, DatabaseUuid};
6use chroma_error::{ChromaError, ErrorCodes};
7use std::{collections::HashMap, str::FromStr};
8use thiserror::Error;
9use tonic::Status;
10use uuid::Uuid;
11
12pub const USER_ID_TO_OFFSET_ID: &str = "user_id_to_offset_id";
13pub const OFFSET_ID_TO_USER_ID: &str = "offset_id_to_user_id";
14pub const OFFSET_ID_TO_DATA: &str = "offset_id_to_data";
15pub const MAX_OFFSET_ID: &str = "max_offset_id";
16pub const USER_ID_BLOOM_FILTER: &str = "user_id_bloom_filter";
17
18pub const FULL_TEXT_PLS: &str = "full_text_pls";
19pub const STRING_METADATA: &str = "string_metadata";
20pub const BOOL_METADATA: &str = "bool_metadata";
21pub const F32_METADATA: &str = "f32_metadata";
22pub const U32_METADATA: &str = "u32_metadata";
23
24pub const SPARSE_MAX: &str = "sparse_max";
25pub const SPARSE_OFFSET_VALUE: &str = "sparse_offset_value";
26
27pub const HNSW_PATH: &str = "hnsw_path";
28pub const VERSION_MAP_PATH: &str = "version_map_path";
29pub const POSTING_LIST_PATH: &str = "posting_list_path";
30pub const MAX_HEAD_ID_BF_PATH: &str = "max_head_id_path";
31
32pub const QUANTIZED_SPANN_CLUSTER: &str = "quantized_spann_cluster";
33pub const QUANTIZED_SPANN_SCALAR_METADATA: &str = "quantized_spann_scalar_metadata";
34pub const QUANTIZED_SPANN_EMBEDDING_METADATA: &str = "quantized_spann_embedding_metadata";
35pub const QUANTIZED_SPANN_RAW_CENTROID: &str = "quantized_spann_raw_centroid";
36pub const QUANTIZED_SPANN_QUANTIZED_CENTROID: &str = "quantized_spann_quantized_centroid";
37
38#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
40pub struct SegmentUuid(pub Uuid);
41
42impl SegmentUuid {
43 pub fn new() -> Self {
44 SegmentUuid(Uuid::new_v4())
45 }
46}
47
48impl FromStr for SegmentUuid {
49 type Err = SegmentConversionError;
50
51 fn from_str(s: &str) -> Result<Self, SegmentConversionError> {
52 match Uuid::parse_str(s) {
53 Ok(uuid) => Ok(SegmentUuid(uuid)),
54 Err(_) => Err(SegmentConversionError::InvalidUuid),
55 }
56 }
57}
58
59impl std::fmt::Display for SegmentUuid {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 write!(f, "{}", self.0)
62 }
63}
64
65#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
66pub enum SegmentType {
67 BlockfileMetadata,
68 BlockfileRecord,
69 HnswDistributed,
70 HnswLocalMemory,
71 HnswLocalPersisted,
72 Sqlite,
73 Spann,
74 QuantizedSpann,
75}
76
77impl From<SegmentType> for String {
78 fn from(segment_type: SegmentType) -> String {
79 match segment_type {
80 SegmentType::BlockfileMetadata => "urn:chroma:segment/metadata/blockfile".to_string(),
81 SegmentType::BlockfileRecord => "urn:chroma:segment/record/blockfile".to_string(),
82 SegmentType::HnswDistributed => {
83 "urn:chroma:segment/vector/hnsw-distributed".to_string()
84 }
85 SegmentType::HnswLocalMemory => {
86 "urn:chroma:segment/vector/hnsw-local-memory".to_string()
87 }
88 SegmentType::HnswLocalPersisted => {
89 "urn:chroma:segment/vector/hnsw-local-persisted".to_string()
90 }
91 SegmentType::Spann => "urn:chroma:segment/vector/spann".to_string(),
92 SegmentType::QuantizedSpann => "urn:chroma:segment/vector/quantized-spann".to_string(),
93 SegmentType::Sqlite => "urn:chroma:segment/metadata/sqlite".to_string(),
94 }
95 }
96}
97
98impl TryFrom<&str> for SegmentType {
99 type Error = SegmentConversionError;
100
101 fn try_from(segment_type: &str) -> Result<Self, Self::Error> {
102 match segment_type {
103 "urn:chroma:segment/metadata/blockfile" => Ok(SegmentType::BlockfileMetadata),
104 "urn:chroma:segment/record/blockfile" => Ok(SegmentType::BlockfileRecord),
105 "urn:chroma:segment/vector/hnsw-distributed" => Ok(SegmentType::HnswDistributed),
106 "urn:chroma:segment/vector/hnsw-local-memory" => Ok(SegmentType::HnswLocalMemory),
107 "urn:chroma:segment/vector/hnsw-local-persisted" => Ok(Self::HnswLocalPersisted),
108 "urn:chroma:segment/vector/spann" => Ok(SegmentType::Spann),
109 "urn:chroma:segment/vector/quantized-spann" => Ok(SegmentType::QuantizedSpann),
110 "urn:chroma:segment/metadata/sqlite" => Ok(SegmentType::Sqlite),
111 _ => Err(SegmentConversionError::InvalidSegmentType),
112 }
113 }
114}
115
116#[derive(Clone, Debug, PartialEq)]
117pub struct Segment {
118 pub id: SegmentUuid,
119 pub r#type: SegmentType,
120 pub scope: SegmentScope,
121 pub collection: CollectionUuid,
122 pub metadata: Option<Metadata>,
123 pub file_path: HashMap<String, Vec<String>>,
124}
125
126impl Segment {
127 pub fn prefetch_supported(&self) -> bool {
128 matches!(
129 self.r#type,
130 SegmentType::BlockfileMetadata
131 | SegmentType::BlockfileRecord
132 | SegmentType::QuantizedSpann
133 | SegmentType::Spann
134 )
135 }
136
137 pub fn filepaths_to_prefetch(&self) -> Vec<String> {
138 let mut res = Vec::new();
139 match self.r#type {
140 SegmentType::QuantizedSpann => {
141 for key in [
142 QUANTIZED_SPANN_CLUSTER,
143 QUANTIZED_SPANN_EMBEDDING_METADATA,
144 QUANTIZED_SPANN_SCALAR_METADATA,
145 ] {
146 if let Some(paths) = self.file_path.get(key) {
147 res.extend(paths.iter().cloned());
148 }
149 }
150 }
151 SegmentType::Spann => {
152 if let Some(pl_path) = self.file_path.get(POSTING_LIST_PATH) {
153 res.extend(pl_path.iter().cloned());
154 }
155 }
156 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord => {
157 for (key, paths) in &self.file_path {
158 if key == USER_ID_BLOOM_FILTER {
159 continue;
160 }
161 res.extend(paths.iter().cloned());
162 }
163 }
164 _ => {}
165 }
166 res
167 }
168
169 pub fn extract_prefix_and_id(path: &str) -> Result<(&str, uuid::Uuid), uuid::Error> {
170 let (prefix, id) = match path.rfind('/') {
171 Some(pos) => (&path[..pos], &path[pos + 1..]),
172 None => ("", path),
173 };
174 match Uuid::try_parse(id) {
175 Ok(uid) => Ok((prefix, uid)),
176 Err(e) => Err(e),
177 }
178 }
179
180 pub fn construct_prefix_path(&self, tenant: &str, database_id: &DatabaseUuid) -> String {
181 format!(
182 "tenant/{}/database/{}/collection/{}/segment/{}",
183 tenant, database_id, self.collection, self.id
184 )
185 }
186}
187
188#[derive(Error, Debug)]
189pub enum SegmentConversionError {
190 #[error("Invalid UUID")]
191 InvalidUuid,
192 #[error(transparent)]
193 MetadataValueConversionError(#[from] MetadataValueConversionError),
194 #[error(transparent)]
195 SegmentScopeConversionError(#[from] SegmentScopeConversionError),
196 #[error("Invalid segment type")]
197 InvalidSegmentType,
198}
199
200impl ChromaError for SegmentConversionError {
201 fn code(&self) -> ErrorCodes {
202 match self {
203 SegmentConversionError::InvalidUuid => ErrorCodes::InvalidArgument,
204 SegmentConversionError::InvalidSegmentType => ErrorCodes::InvalidArgument,
205 SegmentConversionError::SegmentScopeConversionError(e) => e.code(),
206 SegmentConversionError::MetadataValueConversionError(e) => e.code(),
207 }
208 }
209}
210
211impl From<SegmentConversionError> for Status {
212 fn from(value: SegmentConversionError) -> Self {
213 Status::invalid_argument(value.to_string())
214 }
215}
216
217impl TryFrom<chroma_proto::Segment> for Segment {
218 type Error = SegmentConversionError;
219
220 fn try_from(proto_segment: chroma_proto::Segment) -> Result<Self, Self::Error> {
221 let mut proto_segment = proto_segment;
222
223 let segment_uuid = match SegmentUuid::from_str(&proto_segment.id) {
224 Ok(uuid) => uuid,
225 Err(_) => return Err(SegmentConversionError::InvalidUuid),
226 };
227 let collection_uuid = match Uuid::try_parse(&proto_segment.collection) {
228 Ok(uuid) => uuid,
229 Err(_) => return Err(SegmentConversionError::InvalidUuid),
230 };
231 let collection_uuid = CollectionUuid(collection_uuid);
232 let segment_metadata: Option<Metadata> = match proto_segment.metadata {
233 Some(proto_metadata) => match proto_metadata.try_into() {
234 Ok(metadata) => Some(metadata),
235 Err(e) => return Err(SegmentConversionError::MetadataValueConversionError(e)),
236 },
237 None => None,
238 };
239 let scope: SegmentScope = match proto_segment.scope.try_into() {
240 Ok(scope) => scope,
241 Err(e) => return Err(SegmentConversionError::SegmentScopeConversionError(e)),
242 };
243
244 let segment_type: SegmentType = proto_segment.r#type.as_str().try_into()?;
245
246 let mut file_paths = HashMap::new();
247 let drain = proto_segment.file_paths.drain();
248 for (key, value) in drain {
249 file_paths.insert(key, value.paths);
250 }
251
252 Ok(Segment {
253 id: segment_uuid,
254 r#type: segment_type,
255 scope,
256 collection: collection_uuid,
257 metadata: segment_metadata,
258 file_path: file_paths,
259 })
260 }
261}
262
263impl From<Segment> for chroma_proto::Segment {
264 fn from(value: Segment) -> Self {
265 Self {
266 id: value.id.0.to_string(),
267 r#type: value.r#type.into(),
268 scope: chroma_proto::SegmentScope::from(value.scope) as i32,
269 collection: value.collection.0.to_string(),
270 metadata: value.metadata.map(Into::into),
271 file_paths: value
272 .file_path
273 .into_iter()
274 .map(|(name, paths)| (name, chroma_proto::FilePaths { paths }))
275 .collect(),
276 }
277 }
278}
279
280pub fn test_segment(collection_uuid: CollectionUuid, scope: SegmentScope) -> Segment {
281 let r#type = match scope {
282 SegmentScope::METADATA => SegmentType::BlockfileMetadata,
283 SegmentScope::RECORD => SegmentType::BlockfileRecord,
284 SegmentScope::VECTOR => SegmentType::HnswDistributed,
285 SegmentScope::SQLITE => unimplemented!("Sqlite segment is not implemented"),
286 };
287 Segment {
288 id: SegmentUuid::new(),
289 r#type,
290 scope,
291 collection: collection_uuid,
292 metadata: None,
293 file_path: HashMap::new(),
294 }
295}
296
297#[cfg(test)]
298mod tests {
299
300 use super::*;
301 use crate::MetadataValue;
302
303 #[test]
304 fn test_segment_try_from() {
305 let mut metadata = chroma_proto::UpdateMetadata {
306 metadata: HashMap::new(),
307 };
308 metadata.metadata.insert(
309 "foo".to_string(),
310 chroma_proto::UpdateMetadataValue {
311 value: Some(chroma_proto::update_metadata_value::Value::IntValue(42)),
312 },
313 );
314 let proto_segment = chroma_proto::Segment {
315 id: "00000000-0000-0000-0000-000000000000".to_string(),
316 r#type: "urn:chroma:segment/vector/hnsw-distributed".to_string(),
317 scope: chroma_proto::SegmentScope::Vector as i32,
318 collection: "00000000-0000-0000-0000-000000000000".to_string(),
319 metadata: Some(metadata),
320 file_paths: HashMap::new(),
321 };
322 let converted_segment: Segment = proto_segment.try_into().unwrap();
323 assert_eq!(converted_segment.id, SegmentUuid(Uuid::nil()));
324 assert_eq!(converted_segment.r#type, SegmentType::HnswDistributed);
325 assert_eq!(converted_segment.scope, SegmentScope::VECTOR);
326 assert_eq!(converted_segment.collection, CollectionUuid(Uuid::nil()));
327 let metadata = converted_segment.metadata.unwrap();
328 assert_eq!(metadata.len(), 1);
329 assert_eq!(metadata.get("foo").unwrap(), &MetadataValue::Int(42));
330 }
331
332 #[test]
333 fn test_segment_construct_prefix_path() {
334 let segment = Segment {
335 id: SegmentUuid(Uuid::nil()),
336 r#type: SegmentType::BlockfileMetadata,
337 scope: SegmentScope::METADATA,
338 collection: CollectionUuid(Uuid::nil()),
339 metadata: None,
340 file_path: HashMap::new(),
341 };
342 let tenant = "test_tenant";
343 let database_id = &DatabaseUuid(Uuid::nil());
344 let prefix_path = segment.construct_prefix_path(tenant, database_id);
345 assert_eq!(
346 prefix_path,
347 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
348 );
349 }
350
351 #[test]
352 fn test_segment_extract_prefix_and_id() {
353 let path = "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000/00000000-0000-0000-0000-000000000001";
354 let (prefix, id) =
355 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
356 assert_eq!(
357 prefix,
358 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
359 );
360 assert_eq!(
361 id,
362 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
363 );
364 }
365
366 #[test]
367 fn test_segment_extract_prefix_and_id_legacy() {
368 let path = "00000000-0000-0000-0000-000000000001";
369 let (prefix, id) =
370 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
371 assert_eq!(prefix, "");
372 assert_eq!(
373 id,
374 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
375 );
376 }
377}