chroma_types/
segment.rs

1use super::{
2    CollectionUuid, Metadata, MetadataValueConversionError, SegmentScope,
3    SegmentScopeConversionError,
4};
5use crate::{chroma_proto, DatabaseUuid};
6use chroma_error::{ChromaError, ErrorCodes};
7use std::{collections::HashMap, str::FromStr};
8use thiserror::Error;
9use tonic::Status;
10use uuid::Uuid;
11
12pub const USER_ID_TO_OFFSET_ID: &str = "user_id_to_offset_id";
13pub const OFFSET_ID_TO_USER_ID: &str = "offset_id_to_user_id";
14pub const OFFSET_ID_TO_DATA: &str = "offset_id_to_data";
15pub const MAX_OFFSET_ID: &str = "max_offset_id";
16
17pub const FULL_TEXT_PLS: &str = "full_text_pls";
18pub const STRING_METADATA: &str = "string_metadata";
19pub const BOOL_METADATA: &str = "bool_metadata";
20pub const F32_METADATA: &str = "f32_metadata";
21pub const U32_METADATA: &str = "u32_metadata";
22
23pub const SPARSE_MAX: &str = "sparse_max";
24pub const SPARSE_OFFSET_VALUE: &str = "sparse_offset_value";
25
26pub const HNSW_PATH: &str = "hnsw_path";
27pub const VERSION_MAP_PATH: &str = "version_map_path";
28pub const POSTING_LIST_PATH: &str = "posting_list_path";
29pub const MAX_HEAD_ID_BF_PATH: &str = "max_head_id_path";
30
31/// SegmentUuid is a wrapper around Uuid to provide a type for the segment id.
32#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
33pub struct SegmentUuid(pub Uuid);
34
35impl SegmentUuid {
36    pub fn new() -> Self {
37        SegmentUuid(Uuid::new_v4())
38    }
39}
40
41impl FromStr for SegmentUuid {
42    type Err = SegmentConversionError;
43
44    fn from_str(s: &str) -> Result<Self, SegmentConversionError> {
45        match Uuid::parse_str(s) {
46            Ok(uuid) => Ok(SegmentUuid(uuid)),
47            Err(_) => Err(SegmentConversionError::InvalidUuid),
48        }
49    }
50}
51
52impl std::fmt::Display for SegmentUuid {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        write!(f, "{}", self.0)
55    }
56}
57
58#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
59pub enum SegmentType {
60    BlockfileMetadata,
61    BlockfileRecord,
62    HnswDistributed,
63    HnswLocalMemory,
64    HnswLocalPersisted,
65    Sqlite,
66    Spann,
67}
68
69impl From<SegmentType> for String {
70    fn from(segment_type: SegmentType) -> String {
71        match segment_type {
72            SegmentType::BlockfileMetadata => "urn:chroma:segment/metadata/blockfile".to_string(),
73            SegmentType::BlockfileRecord => "urn:chroma:segment/record/blockfile".to_string(),
74            SegmentType::HnswDistributed => {
75                "urn:chroma:segment/vector/hnsw-distributed".to_string()
76            }
77            SegmentType::HnswLocalMemory => {
78                "urn:chroma:segment/vector/hnsw-local-memory".to_string()
79            }
80            SegmentType::HnswLocalPersisted => {
81                "urn:chroma:segment/vector/hnsw-local-persisted".to_string()
82            }
83            SegmentType::Spann => "urn:chroma:segment/vector/spann".to_string(),
84            SegmentType::Sqlite => "urn:chroma:segment/metadata/sqlite".to_string(),
85        }
86    }
87}
88
89impl TryFrom<&str> for SegmentType {
90    type Error = SegmentConversionError;
91
92    fn try_from(segment_type: &str) -> Result<Self, Self::Error> {
93        match segment_type {
94            "urn:chroma:segment/metadata/blockfile" => Ok(SegmentType::BlockfileMetadata),
95            "urn:chroma:segment/record/blockfile" => Ok(SegmentType::BlockfileRecord),
96            "urn:chroma:segment/vector/hnsw-distributed" => Ok(SegmentType::HnswDistributed),
97            "urn:chroma:segment/vector/hnsw-local-memory" => Ok(SegmentType::HnswLocalMemory),
98            "urn:chroma:segment/vector/hnsw-local-persisted" => Ok(Self::HnswLocalPersisted),
99            "urn:chroma:segment/vector/spann" => Ok(SegmentType::Spann),
100            "urn:chroma:segment/metadata/sqlite" => Ok(SegmentType::Sqlite),
101            _ => Err(SegmentConversionError::InvalidSegmentType),
102        }
103    }
104}
105
106#[derive(Clone, Debug, PartialEq)]
107pub struct Segment {
108    pub id: SegmentUuid,
109    pub r#type: SegmentType,
110    pub scope: SegmentScope,
111    pub collection: CollectionUuid,
112    pub metadata: Option<Metadata>,
113    pub file_path: HashMap<String, Vec<String>>,
114}
115
116impl Segment {
117    pub fn prefetch_supported(&self) -> bool {
118        matches!(
119            self.r#type,
120            SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord | SegmentType::Spann
121        )
122    }
123
124    pub fn filepaths_to_prefetch(&self) -> Vec<String> {
125        let mut res = Vec::new();
126        match self.r#type {
127            SegmentType::Spann => {
128                if let Some(pl_path) = self.file_path.get(POSTING_LIST_PATH) {
129                    res.extend(pl_path.iter().cloned());
130                }
131            }
132            SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord => {
133                for paths in self.file_path.values() {
134                    res.extend(paths.iter().cloned());
135                }
136            }
137            _ => {}
138        }
139        res
140    }
141
142    pub fn extract_prefix_and_id(path: &str) -> Result<(&str, uuid::Uuid), uuid::Error> {
143        let (prefix, id) = match path.rfind('/') {
144            Some(pos) => (&path[..pos], &path[pos + 1..]),
145            None => ("", path),
146        };
147        match Uuid::try_parse(id) {
148            Ok(uid) => Ok((prefix, uid)),
149            Err(e) => Err(e),
150        }
151    }
152
153    pub fn construct_prefix_path(&self, tenant: &str, database_id: &DatabaseUuid) -> String {
154        format!(
155            "tenant/{}/database/{}/collection/{}/segment/{}",
156            tenant, database_id, self.collection, self.id
157        )
158    }
159}
160
161#[derive(Error, Debug)]
162pub enum SegmentConversionError {
163    #[error("Invalid UUID")]
164    InvalidUuid,
165    #[error(transparent)]
166    MetadataValueConversionError(#[from] MetadataValueConversionError),
167    #[error(transparent)]
168    SegmentScopeConversionError(#[from] SegmentScopeConversionError),
169    #[error("Invalid segment type")]
170    InvalidSegmentType,
171}
172
173impl ChromaError for SegmentConversionError {
174    fn code(&self) -> ErrorCodes {
175        match self {
176            SegmentConversionError::InvalidUuid => ErrorCodes::InvalidArgument,
177            SegmentConversionError::InvalidSegmentType => ErrorCodes::InvalidArgument,
178            SegmentConversionError::SegmentScopeConversionError(e) => e.code(),
179            SegmentConversionError::MetadataValueConversionError(e) => e.code(),
180        }
181    }
182}
183
184impl From<SegmentConversionError> for Status {
185    fn from(value: SegmentConversionError) -> Self {
186        Status::invalid_argument(value.to_string())
187    }
188}
189
190impl TryFrom<chroma_proto::Segment> for Segment {
191    type Error = SegmentConversionError;
192
193    fn try_from(proto_segment: chroma_proto::Segment) -> Result<Self, Self::Error> {
194        let mut proto_segment = proto_segment;
195
196        let segment_uuid = match SegmentUuid::from_str(&proto_segment.id) {
197            Ok(uuid) => uuid,
198            Err(_) => return Err(SegmentConversionError::InvalidUuid),
199        };
200        let collection_uuid = match Uuid::try_parse(&proto_segment.collection) {
201            Ok(uuid) => uuid,
202            Err(_) => return Err(SegmentConversionError::InvalidUuid),
203        };
204        let collection_uuid = CollectionUuid(collection_uuid);
205        let segment_metadata: Option<Metadata> = match proto_segment.metadata {
206            Some(proto_metadata) => match proto_metadata.try_into() {
207                Ok(metadata) => Some(metadata),
208                Err(e) => return Err(SegmentConversionError::MetadataValueConversionError(e)),
209            },
210            None => None,
211        };
212        let scope: SegmentScope = match proto_segment.scope.try_into() {
213            Ok(scope) => scope,
214            Err(e) => return Err(SegmentConversionError::SegmentScopeConversionError(e)),
215        };
216
217        let segment_type: SegmentType = proto_segment.r#type.as_str().try_into()?;
218
219        let mut file_paths = HashMap::new();
220        let drain = proto_segment.file_paths.drain();
221        for (key, value) in drain {
222            file_paths.insert(key, value.paths);
223        }
224
225        Ok(Segment {
226            id: segment_uuid,
227            r#type: segment_type,
228            scope,
229            collection: collection_uuid,
230            metadata: segment_metadata,
231            file_path: file_paths,
232        })
233    }
234}
235
236impl From<Segment> for chroma_proto::Segment {
237    fn from(value: Segment) -> Self {
238        Self {
239            id: value.id.0.to_string(),
240            r#type: value.r#type.into(),
241            scope: chroma_proto::SegmentScope::from(value.scope) as i32,
242            collection: value.collection.0.to_string(),
243            metadata: value.metadata.map(Into::into),
244            file_paths: value
245                .file_path
246                .into_iter()
247                .map(|(name, paths)| (name, chroma_proto::FilePaths { paths }))
248                .collect(),
249        }
250    }
251}
252
253pub fn test_segment(collection_uuid: CollectionUuid, scope: SegmentScope) -> Segment {
254    let r#type = match scope {
255        SegmentScope::METADATA => SegmentType::BlockfileMetadata,
256        SegmentScope::RECORD => SegmentType::BlockfileRecord,
257        SegmentScope::VECTOR => SegmentType::HnswDistributed,
258        SegmentScope::SQLITE => unimplemented!("Sqlite segment is not implemented"),
259    };
260    Segment {
261        id: SegmentUuid::new(),
262        r#type,
263        scope,
264        collection: collection_uuid,
265        metadata: None,
266        file_path: HashMap::new(),
267    }
268}
269
270#[cfg(test)]
271mod tests {
272
273    use super::*;
274    use crate::MetadataValue;
275
276    #[test]
277    fn test_segment_try_from() {
278        let mut metadata = chroma_proto::UpdateMetadata {
279            metadata: HashMap::new(),
280        };
281        metadata.metadata.insert(
282            "foo".to_string(),
283            chroma_proto::UpdateMetadataValue {
284                value: Some(chroma_proto::update_metadata_value::Value::IntValue(42)),
285            },
286        );
287        let proto_segment = chroma_proto::Segment {
288            id: "00000000-0000-0000-0000-000000000000".to_string(),
289            r#type: "urn:chroma:segment/vector/hnsw-distributed".to_string(),
290            scope: chroma_proto::SegmentScope::Vector as i32,
291            collection: "00000000-0000-0000-0000-000000000000".to_string(),
292            metadata: Some(metadata),
293            file_paths: HashMap::new(),
294        };
295        let converted_segment: Segment = proto_segment.try_into().unwrap();
296        assert_eq!(converted_segment.id, SegmentUuid(Uuid::nil()));
297        assert_eq!(converted_segment.r#type, SegmentType::HnswDistributed);
298        assert_eq!(converted_segment.scope, SegmentScope::VECTOR);
299        assert_eq!(converted_segment.collection, CollectionUuid(Uuid::nil()));
300        let metadata = converted_segment.metadata.unwrap();
301        assert_eq!(metadata.len(), 1);
302        assert_eq!(metadata.get("foo").unwrap(), &MetadataValue::Int(42));
303    }
304
305    #[test]
306    fn test_segment_construct_prefix_path() {
307        let segment = Segment {
308            id: SegmentUuid(Uuid::nil()),
309            r#type: SegmentType::BlockfileMetadata,
310            scope: SegmentScope::METADATA,
311            collection: CollectionUuid(Uuid::nil()),
312            metadata: None,
313            file_path: HashMap::new(),
314        };
315        let tenant = "test_tenant";
316        let database_id = &DatabaseUuid(Uuid::nil());
317        let prefix_path = segment.construct_prefix_path(tenant, database_id);
318        assert_eq!(
319            prefix_path,
320            "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
321        );
322    }
323
324    #[test]
325    fn test_segment_extract_prefix_and_id() {
326        let path = "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000/00000000-0000-0000-0000-000000000001";
327        let (prefix, id) =
328            Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
329        assert_eq!(
330            prefix,
331            "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
332        );
333        assert_eq!(
334            id,
335            Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
336        );
337    }
338
339    #[test]
340    fn test_segment_extract_prefix_and_id_legacy() {
341        let path = "00000000-0000-0000-0000-000000000001";
342        let (prefix, id) =
343            Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
344        assert_eq!(prefix, "");
345        assert_eq!(
346            id,
347            Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
348        );
349    }
350}