1use super::{
2 CollectionUuid, Metadata, MetadataValueConversionError, SegmentScope,
3 SegmentScopeConversionError,
4};
5use crate::{chroma_proto, DatabaseUuid};
6use chroma_error::{ChromaError, ErrorCodes};
7use std::{collections::HashMap, str::FromStr};
8use thiserror::Error;
9use tonic::Status;
10use uuid::Uuid;
11
12pub const USER_ID_TO_OFFSET_ID: &str = "user_id_to_offset_id";
13pub const OFFSET_ID_TO_USER_ID: &str = "offset_id_to_user_id";
14pub const OFFSET_ID_TO_DATA: &str = "offset_id_to_data";
15pub const MAX_OFFSET_ID: &str = "max_offset_id";
16
17pub const FULL_TEXT_PLS: &str = "full_text_pls";
18pub const STRING_METADATA: &str = "string_metadata";
19pub const BOOL_METADATA: &str = "bool_metadata";
20pub const F32_METADATA: &str = "f32_metadata";
21pub const U32_METADATA: &str = "u32_metadata";
22
23pub const SPARSE_MAX: &str = "sparse_max";
24pub const SPARSE_OFFSET_VALUE: &str = "sparse_offset_value";
25
26pub const HNSW_PATH: &str = "hnsw_path";
27pub const VERSION_MAP_PATH: &str = "version_map_path";
28pub const POSTING_LIST_PATH: &str = "posting_list_path";
29pub const MAX_HEAD_ID_BF_PATH: &str = "max_head_id_path";
30
31pub const QUANTIZED_SPANN_CLUSTER: &str = "quantized_spann_cluster";
32pub const QUANTIZED_SPANN_SCALAR_METADATA: &str = "quantized_spann_scalar_metadata";
33pub const QUANTIZED_SPANN_EMBEDDING_METADATA: &str = "quantized_spann_embedding_metadata";
34pub const QUANTIZED_SPANN_RAW_CENTROID: &str = "quantized_spann_raw_centroid";
35pub const QUANTIZED_SPANN_QUANTIZED_CENTROID: &str = "quantized_spann_quantized_centroid";
36
37#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
39pub struct SegmentUuid(pub Uuid);
40
41impl SegmentUuid {
42 pub fn new() -> Self {
43 SegmentUuid(Uuid::new_v4())
44 }
45}
46
47impl FromStr for SegmentUuid {
48 type Err = SegmentConversionError;
49
50 fn from_str(s: &str) -> Result<Self, SegmentConversionError> {
51 match Uuid::parse_str(s) {
52 Ok(uuid) => Ok(SegmentUuid(uuid)),
53 Err(_) => Err(SegmentConversionError::InvalidUuid),
54 }
55 }
56}
57
58impl std::fmt::Display for SegmentUuid {
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 write!(f, "{}", self.0)
61 }
62}
63
64#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
65pub enum SegmentType {
66 BlockfileMetadata,
67 BlockfileRecord,
68 HnswDistributed,
69 HnswLocalMemory,
70 HnswLocalPersisted,
71 Sqlite,
72 Spann,
73 QuantizedSpann,
74}
75
76impl From<SegmentType> for String {
77 fn from(segment_type: SegmentType) -> String {
78 match segment_type {
79 SegmentType::BlockfileMetadata => "urn:chroma:segment/metadata/blockfile".to_string(),
80 SegmentType::BlockfileRecord => "urn:chroma:segment/record/blockfile".to_string(),
81 SegmentType::HnswDistributed => {
82 "urn:chroma:segment/vector/hnsw-distributed".to_string()
83 }
84 SegmentType::HnswLocalMemory => {
85 "urn:chroma:segment/vector/hnsw-local-memory".to_string()
86 }
87 SegmentType::HnswLocalPersisted => {
88 "urn:chroma:segment/vector/hnsw-local-persisted".to_string()
89 }
90 SegmentType::Spann => "urn:chroma:segment/vector/spann".to_string(),
91 SegmentType::QuantizedSpann => "urn:chroma:segment/vector/quantized-spann".to_string(),
92 SegmentType::Sqlite => "urn:chroma:segment/metadata/sqlite".to_string(),
93 }
94 }
95}
96
97impl TryFrom<&str> for SegmentType {
98 type Error = SegmentConversionError;
99
100 fn try_from(segment_type: &str) -> Result<Self, Self::Error> {
101 match segment_type {
102 "urn:chroma:segment/metadata/blockfile" => Ok(SegmentType::BlockfileMetadata),
103 "urn:chroma:segment/record/blockfile" => Ok(SegmentType::BlockfileRecord),
104 "urn:chroma:segment/vector/hnsw-distributed" => Ok(SegmentType::HnswDistributed),
105 "urn:chroma:segment/vector/hnsw-local-memory" => Ok(SegmentType::HnswLocalMemory),
106 "urn:chroma:segment/vector/hnsw-local-persisted" => Ok(Self::HnswLocalPersisted),
107 "urn:chroma:segment/vector/spann" => Ok(SegmentType::Spann),
108 "urn:chroma:segment/vector/quantized-spann" => Ok(SegmentType::QuantizedSpann),
109 "urn:chroma:segment/metadata/sqlite" => Ok(SegmentType::Sqlite),
110 _ => Err(SegmentConversionError::InvalidSegmentType),
111 }
112 }
113}
114
115#[derive(Clone, Debug, PartialEq)]
116pub struct Segment {
117 pub id: SegmentUuid,
118 pub r#type: SegmentType,
119 pub scope: SegmentScope,
120 pub collection: CollectionUuid,
121 pub metadata: Option<Metadata>,
122 pub file_path: HashMap<String, Vec<String>>,
123}
124
125impl Segment {
126 pub fn prefetch_supported(&self) -> bool {
127 matches!(
128 self.r#type,
129 SegmentType::BlockfileMetadata
130 | SegmentType::BlockfileRecord
131 | SegmentType::QuantizedSpann
132 | SegmentType::Spann
133 )
134 }
135
136 pub fn filepaths_to_prefetch(&self) -> Vec<String> {
137 let mut res = Vec::new();
138 match self.r#type {
139 SegmentType::QuantizedSpann => {
140 for key in [
141 QUANTIZED_SPANN_CLUSTER,
142 QUANTIZED_SPANN_EMBEDDING_METADATA,
143 QUANTIZED_SPANN_SCALAR_METADATA,
144 ] {
145 if let Some(paths) = self.file_path.get(key) {
146 res.extend(paths.iter().cloned());
147 }
148 }
149 }
150 SegmentType::Spann => {
151 if let Some(pl_path) = self.file_path.get(POSTING_LIST_PATH) {
152 res.extend(pl_path.iter().cloned());
153 }
154 }
155 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord => {
156 for paths in self.file_path.values() {
157 res.extend(paths.iter().cloned());
158 }
159 }
160 _ => {}
161 }
162 res
163 }
164
165 pub fn extract_prefix_and_id(path: &str) -> Result<(&str, uuid::Uuid), uuid::Error> {
166 let (prefix, id) = match path.rfind('/') {
167 Some(pos) => (&path[..pos], &path[pos + 1..]),
168 None => ("", path),
169 };
170 match Uuid::try_parse(id) {
171 Ok(uid) => Ok((prefix, uid)),
172 Err(e) => Err(e),
173 }
174 }
175
176 pub fn construct_prefix_path(&self, tenant: &str, database_id: &DatabaseUuid) -> String {
177 format!(
178 "tenant/{}/database/{}/collection/{}/segment/{}",
179 tenant, database_id, self.collection, self.id
180 )
181 }
182}
183
184#[derive(Error, Debug)]
185pub enum SegmentConversionError {
186 #[error("Invalid UUID")]
187 InvalidUuid,
188 #[error(transparent)]
189 MetadataValueConversionError(#[from] MetadataValueConversionError),
190 #[error(transparent)]
191 SegmentScopeConversionError(#[from] SegmentScopeConversionError),
192 #[error("Invalid segment type")]
193 InvalidSegmentType,
194}
195
196impl ChromaError for SegmentConversionError {
197 fn code(&self) -> ErrorCodes {
198 match self {
199 SegmentConversionError::InvalidUuid => ErrorCodes::InvalidArgument,
200 SegmentConversionError::InvalidSegmentType => ErrorCodes::InvalidArgument,
201 SegmentConversionError::SegmentScopeConversionError(e) => e.code(),
202 SegmentConversionError::MetadataValueConversionError(e) => e.code(),
203 }
204 }
205}
206
207impl From<SegmentConversionError> for Status {
208 fn from(value: SegmentConversionError) -> Self {
209 Status::invalid_argument(value.to_string())
210 }
211}
212
213impl TryFrom<chroma_proto::Segment> for Segment {
214 type Error = SegmentConversionError;
215
216 fn try_from(proto_segment: chroma_proto::Segment) -> Result<Self, Self::Error> {
217 let mut proto_segment = proto_segment;
218
219 let segment_uuid = match SegmentUuid::from_str(&proto_segment.id) {
220 Ok(uuid) => uuid,
221 Err(_) => return Err(SegmentConversionError::InvalidUuid),
222 };
223 let collection_uuid = match Uuid::try_parse(&proto_segment.collection) {
224 Ok(uuid) => uuid,
225 Err(_) => return Err(SegmentConversionError::InvalidUuid),
226 };
227 let collection_uuid = CollectionUuid(collection_uuid);
228 let segment_metadata: Option<Metadata> = match proto_segment.metadata {
229 Some(proto_metadata) => match proto_metadata.try_into() {
230 Ok(metadata) => Some(metadata),
231 Err(e) => return Err(SegmentConversionError::MetadataValueConversionError(e)),
232 },
233 None => None,
234 };
235 let scope: SegmentScope = match proto_segment.scope.try_into() {
236 Ok(scope) => scope,
237 Err(e) => return Err(SegmentConversionError::SegmentScopeConversionError(e)),
238 };
239
240 let segment_type: SegmentType = proto_segment.r#type.as_str().try_into()?;
241
242 let mut file_paths = HashMap::new();
243 let drain = proto_segment.file_paths.drain();
244 for (key, value) in drain {
245 file_paths.insert(key, value.paths);
246 }
247
248 Ok(Segment {
249 id: segment_uuid,
250 r#type: segment_type,
251 scope,
252 collection: collection_uuid,
253 metadata: segment_metadata,
254 file_path: file_paths,
255 })
256 }
257}
258
259impl From<Segment> for chroma_proto::Segment {
260 fn from(value: Segment) -> Self {
261 Self {
262 id: value.id.0.to_string(),
263 r#type: value.r#type.into(),
264 scope: chroma_proto::SegmentScope::from(value.scope) as i32,
265 collection: value.collection.0.to_string(),
266 metadata: value.metadata.map(Into::into),
267 file_paths: value
268 .file_path
269 .into_iter()
270 .map(|(name, paths)| (name, chroma_proto::FilePaths { paths }))
271 .collect(),
272 }
273 }
274}
275
276pub fn test_segment(collection_uuid: CollectionUuid, scope: SegmentScope) -> Segment {
277 let r#type = match scope {
278 SegmentScope::METADATA => SegmentType::BlockfileMetadata,
279 SegmentScope::RECORD => SegmentType::BlockfileRecord,
280 SegmentScope::VECTOR => SegmentType::HnswDistributed,
281 SegmentScope::SQLITE => unimplemented!("Sqlite segment is not implemented"),
282 };
283 Segment {
284 id: SegmentUuid::new(),
285 r#type,
286 scope,
287 collection: collection_uuid,
288 metadata: None,
289 file_path: HashMap::new(),
290 }
291}
292
293#[cfg(test)]
294mod tests {
295
296 use super::*;
297 use crate::MetadataValue;
298
299 #[test]
300 fn test_segment_try_from() {
301 let mut metadata = chroma_proto::UpdateMetadata {
302 metadata: HashMap::new(),
303 };
304 metadata.metadata.insert(
305 "foo".to_string(),
306 chroma_proto::UpdateMetadataValue {
307 value: Some(chroma_proto::update_metadata_value::Value::IntValue(42)),
308 },
309 );
310 let proto_segment = chroma_proto::Segment {
311 id: "00000000-0000-0000-0000-000000000000".to_string(),
312 r#type: "urn:chroma:segment/vector/hnsw-distributed".to_string(),
313 scope: chroma_proto::SegmentScope::Vector as i32,
314 collection: "00000000-0000-0000-0000-000000000000".to_string(),
315 metadata: Some(metadata),
316 file_paths: HashMap::new(),
317 };
318 let converted_segment: Segment = proto_segment.try_into().unwrap();
319 assert_eq!(converted_segment.id, SegmentUuid(Uuid::nil()));
320 assert_eq!(converted_segment.r#type, SegmentType::HnswDistributed);
321 assert_eq!(converted_segment.scope, SegmentScope::VECTOR);
322 assert_eq!(converted_segment.collection, CollectionUuid(Uuid::nil()));
323 let metadata = converted_segment.metadata.unwrap();
324 assert_eq!(metadata.len(), 1);
325 assert_eq!(metadata.get("foo").unwrap(), &MetadataValue::Int(42));
326 }
327
328 #[test]
329 fn test_segment_construct_prefix_path() {
330 let segment = Segment {
331 id: SegmentUuid(Uuid::nil()),
332 r#type: SegmentType::BlockfileMetadata,
333 scope: SegmentScope::METADATA,
334 collection: CollectionUuid(Uuid::nil()),
335 metadata: None,
336 file_path: HashMap::new(),
337 };
338 let tenant = "test_tenant";
339 let database_id = &DatabaseUuid(Uuid::nil());
340 let prefix_path = segment.construct_prefix_path(tenant, database_id);
341 assert_eq!(
342 prefix_path,
343 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
344 );
345 }
346
347 #[test]
348 fn test_segment_extract_prefix_and_id() {
349 let path = "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000/00000000-0000-0000-0000-000000000001";
350 let (prefix, id) =
351 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
352 assert_eq!(
353 prefix,
354 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
355 );
356 assert_eq!(
357 id,
358 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
359 );
360 }
361
362 #[test]
363 fn test_segment_extract_prefix_and_id_legacy() {
364 let path = "00000000-0000-0000-0000-000000000001";
365 let (prefix, id) =
366 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
367 assert_eq!(prefix, "");
368 assert_eq!(
369 id,
370 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
371 );
372 }
373}