1use super::{
2 CollectionUuid, Metadata, MetadataValueConversionError, SegmentScope,
3 SegmentScopeConversionError,
4};
5use crate::{chroma_proto, DatabaseUuid};
6use chroma_error::{ChromaError, ErrorCodes};
7use std::{collections::HashMap, str::FromStr};
8use thiserror::Error;
9use tonic::Status;
10use uuid::Uuid;
11
12pub const USER_ID_TO_OFFSET_ID: &str = "user_id_to_offset_id";
13pub const OFFSET_ID_TO_USER_ID: &str = "offset_id_to_user_id";
14pub const OFFSET_ID_TO_DATA: &str = "offset_id_to_data";
15pub const MAX_OFFSET_ID: &str = "max_offset_id";
16
17pub const FULL_TEXT_PLS: &str = "full_text_pls";
18pub const STRING_METADATA: &str = "string_metadata";
19pub const BOOL_METADATA: &str = "bool_metadata";
20pub const F32_METADATA: &str = "f32_metadata";
21pub const U32_METADATA: &str = "u32_metadata";
22
23pub const SPARSE_MAX: &str = "sparse_max";
24pub const SPARSE_OFFSET_VALUE: &str = "sparse_offset_value";
25
26pub const HNSW_PATH: &str = "hnsw_path";
27pub const VERSION_MAP_PATH: &str = "version_map_path";
28pub const POSTING_LIST_PATH: &str = "posting_list_path";
29pub const MAX_HEAD_ID_BF_PATH: &str = "max_head_id_path";
30
31#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
33pub struct SegmentUuid(pub Uuid);
34
35impl SegmentUuid {
36 pub fn new() -> Self {
37 SegmentUuid(Uuid::new_v4())
38 }
39}
40
41impl FromStr for SegmentUuid {
42 type Err = SegmentConversionError;
43
44 fn from_str(s: &str) -> Result<Self, SegmentConversionError> {
45 match Uuid::parse_str(s) {
46 Ok(uuid) => Ok(SegmentUuid(uuid)),
47 Err(_) => Err(SegmentConversionError::InvalidUuid),
48 }
49 }
50}
51
52impl std::fmt::Display for SegmentUuid {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 write!(f, "{}", self.0)
55 }
56}
57
58#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
59pub enum SegmentType {
60 BlockfileMetadata,
61 BlockfileRecord,
62 HnswDistributed,
63 HnswLocalMemory,
64 HnswLocalPersisted,
65 Sqlite,
66 Spann,
67 QuantizedSpann,
68}
69
70impl From<SegmentType> for String {
71 fn from(segment_type: SegmentType) -> String {
72 match segment_type {
73 SegmentType::BlockfileMetadata => "urn:chroma:segment/metadata/blockfile".to_string(),
74 SegmentType::BlockfileRecord => "urn:chroma:segment/record/blockfile".to_string(),
75 SegmentType::HnswDistributed => {
76 "urn:chroma:segment/vector/hnsw-distributed".to_string()
77 }
78 SegmentType::HnswLocalMemory => {
79 "urn:chroma:segment/vector/hnsw-local-memory".to_string()
80 }
81 SegmentType::HnswLocalPersisted => {
82 "urn:chroma:segment/vector/hnsw-local-persisted".to_string()
83 }
84 SegmentType::Spann => "urn:chroma:segment/vector/spann".to_string(),
85 SegmentType::QuantizedSpann => "urn:chroma:segment/vector/quantized-spann".to_string(),
86 SegmentType::Sqlite => "urn:chroma:segment/metadata/sqlite".to_string(),
87 }
88 }
89}
90
91impl TryFrom<&str> for SegmentType {
92 type Error = SegmentConversionError;
93
94 fn try_from(segment_type: &str) -> Result<Self, Self::Error> {
95 match segment_type {
96 "urn:chroma:segment/metadata/blockfile" => Ok(SegmentType::BlockfileMetadata),
97 "urn:chroma:segment/record/blockfile" => Ok(SegmentType::BlockfileRecord),
98 "urn:chroma:segment/vector/hnsw-distributed" => Ok(SegmentType::HnswDistributed),
99 "urn:chroma:segment/vector/hnsw-local-memory" => Ok(SegmentType::HnswLocalMemory),
100 "urn:chroma:segment/vector/hnsw-local-persisted" => Ok(Self::HnswLocalPersisted),
101 "urn:chroma:segment/vector/spann" => Ok(SegmentType::Spann),
102 "urn:chroma:segment/vector/quantized-spann" => Ok(SegmentType::QuantizedSpann),
103 "urn:chroma:segment/metadata/sqlite" => Ok(SegmentType::Sqlite),
104 _ => Err(SegmentConversionError::InvalidSegmentType),
105 }
106 }
107}
108
109#[derive(Clone, Debug, PartialEq)]
110pub struct Segment {
111 pub id: SegmentUuid,
112 pub r#type: SegmentType,
113 pub scope: SegmentScope,
114 pub collection: CollectionUuid,
115 pub metadata: Option<Metadata>,
116 pub file_path: HashMap<String, Vec<String>>,
117}
118
119impl Segment {
120 pub fn prefetch_supported(&self) -> bool {
123 matches!(
124 self.r#type,
125 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord | SegmentType::Spann
126 )
127 }
128
129 pub fn filepaths_to_prefetch(&self) -> Vec<String> {
131 let mut res = Vec::new();
132 match self.r#type {
133 SegmentType::Spann => {
134 if let Some(pl_path) = self.file_path.get(POSTING_LIST_PATH) {
135 res.extend(pl_path.iter().cloned());
136 }
137 }
138 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord => {
139 for paths in self.file_path.values() {
140 res.extend(paths.iter().cloned());
141 }
142 }
143 _ => {}
144 }
145 res
146 }
147
148 pub fn extract_prefix_and_id(path: &str) -> Result<(&str, uuid::Uuid), uuid::Error> {
149 let (prefix, id) = match path.rfind('/') {
150 Some(pos) => (&path[..pos], &path[pos + 1..]),
151 None => ("", path),
152 };
153 match Uuid::try_parse(id) {
154 Ok(uid) => Ok((prefix, uid)),
155 Err(e) => Err(e),
156 }
157 }
158
159 pub fn construct_prefix_path(&self, tenant: &str, database_id: &DatabaseUuid) -> String {
160 format!(
161 "tenant/{}/database/{}/collection/{}/segment/{}",
162 tenant, database_id, self.collection, self.id
163 )
164 }
165}
166
167#[derive(Error, Debug)]
168pub enum SegmentConversionError {
169 #[error("Invalid UUID")]
170 InvalidUuid,
171 #[error(transparent)]
172 MetadataValueConversionError(#[from] MetadataValueConversionError),
173 #[error(transparent)]
174 SegmentScopeConversionError(#[from] SegmentScopeConversionError),
175 #[error("Invalid segment type")]
176 InvalidSegmentType,
177}
178
179impl ChromaError for SegmentConversionError {
180 fn code(&self) -> ErrorCodes {
181 match self {
182 SegmentConversionError::InvalidUuid => ErrorCodes::InvalidArgument,
183 SegmentConversionError::InvalidSegmentType => ErrorCodes::InvalidArgument,
184 SegmentConversionError::SegmentScopeConversionError(e) => e.code(),
185 SegmentConversionError::MetadataValueConversionError(e) => e.code(),
186 }
187 }
188}
189
190impl From<SegmentConversionError> for Status {
191 fn from(value: SegmentConversionError) -> Self {
192 Status::invalid_argument(value.to_string())
193 }
194}
195
196impl TryFrom<chroma_proto::Segment> for Segment {
197 type Error = SegmentConversionError;
198
199 fn try_from(proto_segment: chroma_proto::Segment) -> Result<Self, Self::Error> {
200 let mut proto_segment = proto_segment;
201
202 let segment_uuid = match SegmentUuid::from_str(&proto_segment.id) {
203 Ok(uuid) => uuid,
204 Err(_) => return Err(SegmentConversionError::InvalidUuid),
205 };
206 let collection_uuid = match Uuid::try_parse(&proto_segment.collection) {
207 Ok(uuid) => uuid,
208 Err(_) => return Err(SegmentConversionError::InvalidUuid),
209 };
210 let collection_uuid = CollectionUuid(collection_uuid);
211 let segment_metadata: Option<Metadata> = match proto_segment.metadata {
212 Some(proto_metadata) => match proto_metadata.try_into() {
213 Ok(metadata) => Some(metadata),
214 Err(e) => return Err(SegmentConversionError::MetadataValueConversionError(e)),
215 },
216 None => None,
217 };
218 let scope: SegmentScope = match proto_segment.scope.try_into() {
219 Ok(scope) => scope,
220 Err(e) => return Err(SegmentConversionError::SegmentScopeConversionError(e)),
221 };
222
223 let segment_type: SegmentType = proto_segment.r#type.as_str().try_into()?;
224
225 let mut file_paths = HashMap::new();
226 let drain = proto_segment.file_paths.drain();
227 for (key, value) in drain {
228 file_paths.insert(key, value.paths);
229 }
230
231 Ok(Segment {
232 id: segment_uuid,
233 r#type: segment_type,
234 scope,
235 collection: collection_uuid,
236 metadata: segment_metadata,
237 file_path: file_paths,
238 })
239 }
240}
241
242impl From<Segment> for chroma_proto::Segment {
243 fn from(value: Segment) -> Self {
244 Self {
245 id: value.id.0.to_string(),
246 r#type: value.r#type.into(),
247 scope: chroma_proto::SegmentScope::from(value.scope) as i32,
248 collection: value.collection.0.to_string(),
249 metadata: value.metadata.map(Into::into),
250 file_paths: value
251 .file_path
252 .into_iter()
253 .map(|(name, paths)| (name, chroma_proto::FilePaths { paths }))
254 .collect(),
255 }
256 }
257}
258
259pub fn test_segment(collection_uuid: CollectionUuid, scope: SegmentScope) -> Segment {
260 let r#type = match scope {
261 SegmentScope::METADATA => SegmentType::BlockfileMetadata,
262 SegmentScope::RECORD => SegmentType::BlockfileRecord,
263 SegmentScope::VECTOR => SegmentType::HnswDistributed,
264 SegmentScope::SQLITE => unimplemented!("Sqlite segment is not implemented"),
265 };
266 Segment {
267 id: SegmentUuid::new(),
268 r#type,
269 scope,
270 collection: collection_uuid,
271 metadata: None,
272 file_path: HashMap::new(),
273 }
274}
275
276#[cfg(test)]
277mod tests {
278
279 use super::*;
280 use crate::MetadataValue;
281
282 #[test]
283 fn test_segment_try_from() {
284 let mut metadata = chroma_proto::UpdateMetadata {
285 metadata: HashMap::new(),
286 };
287 metadata.metadata.insert(
288 "foo".to_string(),
289 chroma_proto::UpdateMetadataValue {
290 value: Some(chroma_proto::update_metadata_value::Value::IntValue(42)),
291 },
292 );
293 let proto_segment = chroma_proto::Segment {
294 id: "00000000-0000-0000-0000-000000000000".to_string(),
295 r#type: "urn:chroma:segment/vector/hnsw-distributed".to_string(),
296 scope: chroma_proto::SegmentScope::Vector as i32,
297 collection: "00000000-0000-0000-0000-000000000000".to_string(),
298 metadata: Some(metadata),
299 file_paths: HashMap::new(),
300 };
301 let converted_segment: Segment = proto_segment.try_into().unwrap();
302 assert_eq!(converted_segment.id, SegmentUuid(Uuid::nil()));
303 assert_eq!(converted_segment.r#type, SegmentType::HnswDistributed);
304 assert_eq!(converted_segment.scope, SegmentScope::VECTOR);
305 assert_eq!(converted_segment.collection, CollectionUuid(Uuid::nil()));
306 let metadata = converted_segment.metadata.unwrap();
307 assert_eq!(metadata.len(), 1);
308 assert_eq!(metadata.get("foo").unwrap(), &MetadataValue::Int(42));
309 }
310
311 #[test]
312 fn test_segment_construct_prefix_path() {
313 let segment = Segment {
314 id: SegmentUuid(Uuid::nil()),
315 r#type: SegmentType::BlockfileMetadata,
316 scope: SegmentScope::METADATA,
317 collection: CollectionUuid(Uuid::nil()),
318 metadata: None,
319 file_path: HashMap::new(),
320 };
321 let tenant = "test_tenant";
322 let database_id = &DatabaseUuid(Uuid::nil());
323 let prefix_path = segment.construct_prefix_path(tenant, database_id);
324 assert_eq!(
325 prefix_path,
326 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
327 );
328 }
329
330 #[test]
331 fn test_segment_extract_prefix_and_id() {
332 let path = "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000/00000000-0000-0000-0000-000000000001";
333 let (prefix, id) =
334 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
335 assert_eq!(
336 prefix,
337 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
338 );
339 assert_eq!(
340 id,
341 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
342 );
343 }
344
345 #[test]
346 fn test_segment_extract_prefix_and_id_legacy() {
347 let path = "00000000-0000-0000-0000-000000000001";
348 let (prefix, id) =
349 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
350 assert_eq!(prefix, "");
351 assert_eq!(
352 id,
353 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
354 );
355 }
356}