1use super::{
2 CollectionUuid, Metadata, MetadataValueConversionError, SegmentScope,
3 SegmentScopeConversionError,
4};
5use crate::{chroma_proto, DatabaseUuid};
6use chroma_error::{ChromaError, ErrorCodes};
7use std::{collections::HashMap, str::FromStr};
8use thiserror::Error;
9use tonic::Status;
10use uuid::Uuid;
11
12pub const USER_ID_TO_OFFSET_ID: &str = "user_id_to_offset_id";
13pub const OFFSET_ID_TO_USER_ID: &str = "offset_id_to_user_id";
14pub const OFFSET_ID_TO_DATA: &str = "offset_id_to_data";
15pub const MAX_OFFSET_ID: &str = "max_offset_id";
16
17pub const FULL_TEXT_PLS: &str = "full_text_pls";
18pub const STRING_METADATA: &str = "string_metadata";
19pub const BOOL_METADATA: &str = "bool_metadata";
20pub const F32_METADATA: &str = "f32_metadata";
21pub const U32_METADATA: &str = "u32_metadata";
22
23pub const SPARSE_MAX: &str = "sparse_max";
24pub const SPARSE_OFFSET_VALUE: &str = "sparse_offset_value";
25
26pub const HNSW_PATH: &str = "hnsw_path";
27pub const VERSION_MAP_PATH: &str = "version_map_path";
28pub const POSTING_LIST_PATH: &str = "posting_list_path";
29pub const MAX_HEAD_ID_BF_PATH: &str = "max_head_id_path";
30
31#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
33pub struct SegmentUuid(pub Uuid);
34
35impl SegmentUuid {
36 pub fn new() -> Self {
37 SegmentUuid(Uuid::new_v4())
38 }
39}
40
41impl FromStr for SegmentUuid {
42 type Err = SegmentConversionError;
43
44 fn from_str(s: &str) -> Result<Self, SegmentConversionError> {
45 match Uuid::parse_str(s) {
46 Ok(uuid) => Ok(SegmentUuid(uuid)),
47 Err(_) => Err(SegmentConversionError::InvalidUuid),
48 }
49 }
50}
51
52impl std::fmt::Display for SegmentUuid {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 write!(f, "{}", self.0)
55 }
56}
57
58#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
59pub enum SegmentType {
60 BlockfileMetadata,
61 BlockfileRecord,
62 HnswDistributed,
63 HnswLocalMemory,
64 HnswLocalPersisted,
65 Sqlite,
66 Spann,
67}
68
69impl From<SegmentType> for String {
70 fn from(segment_type: SegmentType) -> String {
71 match segment_type {
72 SegmentType::BlockfileMetadata => "urn:chroma:segment/metadata/blockfile".to_string(),
73 SegmentType::BlockfileRecord => "urn:chroma:segment/record/blockfile".to_string(),
74 SegmentType::HnswDistributed => {
75 "urn:chroma:segment/vector/hnsw-distributed".to_string()
76 }
77 SegmentType::HnswLocalMemory => {
78 "urn:chroma:segment/vector/hnsw-local-memory".to_string()
79 }
80 SegmentType::HnswLocalPersisted => {
81 "urn:chroma:segment/vector/hnsw-local-persisted".to_string()
82 }
83 SegmentType::Spann => "urn:chroma:segment/vector/spann".to_string(),
84 SegmentType::Sqlite => "urn:chroma:segment/metadata/sqlite".to_string(),
85 }
86 }
87}
88
89impl TryFrom<&str> for SegmentType {
90 type Error = SegmentConversionError;
91
92 fn try_from(segment_type: &str) -> Result<Self, Self::Error> {
93 match segment_type {
94 "urn:chroma:segment/metadata/blockfile" => Ok(SegmentType::BlockfileMetadata),
95 "urn:chroma:segment/record/blockfile" => Ok(SegmentType::BlockfileRecord),
96 "urn:chroma:segment/vector/hnsw-distributed" => Ok(SegmentType::HnswDistributed),
97 "urn:chroma:segment/vector/hnsw-local-memory" => Ok(SegmentType::HnswLocalMemory),
98 "urn:chroma:segment/vector/hnsw-local-persisted" => Ok(Self::HnswLocalPersisted),
99 "urn:chroma:segment/vector/spann" => Ok(SegmentType::Spann),
100 "urn:chroma:segment/metadata/sqlite" => Ok(SegmentType::Sqlite),
101 _ => Err(SegmentConversionError::InvalidSegmentType),
102 }
103 }
104}
105
106#[derive(Clone, Debug, PartialEq)]
107pub struct Segment {
108 pub id: SegmentUuid,
109 pub r#type: SegmentType,
110 pub scope: SegmentScope,
111 pub collection: CollectionUuid,
112 pub metadata: Option<Metadata>,
113 pub file_path: HashMap<String, Vec<String>>,
114}
115
116impl Segment {
117 pub fn prefetch_supported(&self) -> bool {
118 matches!(
119 self.r#type,
120 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord | SegmentType::Spann
121 )
122 }
123
124 pub fn filepaths_to_prefetch(&self) -> Vec<String> {
125 let mut res = Vec::new();
126 match self.r#type {
127 SegmentType::Spann => {
128 if let Some(pl_path) = self.file_path.get(POSTING_LIST_PATH) {
129 res.extend(pl_path.iter().cloned());
130 }
131 }
132 SegmentType::BlockfileMetadata | SegmentType::BlockfileRecord => {
133 for paths in self.file_path.values() {
134 res.extend(paths.iter().cloned());
135 }
136 }
137 _ => {}
138 }
139 res
140 }
141
142 pub fn extract_prefix_and_id(path: &str) -> Result<(&str, uuid::Uuid), uuid::Error> {
143 let (prefix, id) = match path.rfind('/') {
144 Some(pos) => (&path[..pos], &path[pos + 1..]),
145 None => ("", path),
146 };
147 match Uuid::try_parse(id) {
148 Ok(uid) => Ok((prefix, uid)),
149 Err(e) => Err(e),
150 }
151 }
152
153 pub fn construct_prefix_path(&self, tenant: &str, database_id: &DatabaseUuid) -> String {
154 format!(
155 "tenant/{}/database/{}/collection/{}/segment/{}",
156 tenant, database_id, self.collection, self.id
157 )
158 }
159}
160
161#[derive(Error, Debug)]
162pub enum SegmentConversionError {
163 #[error("Invalid UUID")]
164 InvalidUuid,
165 #[error(transparent)]
166 MetadataValueConversionError(#[from] MetadataValueConversionError),
167 #[error(transparent)]
168 SegmentScopeConversionError(#[from] SegmentScopeConversionError),
169 #[error("Invalid segment type")]
170 InvalidSegmentType,
171}
172
173impl ChromaError for SegmentConversionError {
174 fn code(&self) -> ErrorCodes {
175 match self {
176 SegmentConversionError::InvalidUuid => ErrorCodes::InvalidArgument,
177 SegmentConversionError::InvalidSegmentType => ErrorCodes::InvalidArgument,
178 SegmentConversionError::SegmentScopeConversionError(e) => e.code(),
179 SegmentConversionError::MetadataValueConversionError(e) => e.code(),
180 }
181 }
182}
183
184impl From<SegmentConversionError> for Status {
185 fn from(value: SegmentConversionError) -> Self {
186 Status::invalid_argument(value.to_string())
187 }
188}
189
190impl TryFrom<chroma_proto::Segment> for Segment {
191 type Error = SegmentConversionError;
192
193 fn try_from(proto_segment: chroma_proto::Segment) -> Result<Self, Self::Error> {
194 let mut proto_segment = proto_segment;
195
196 let segment_uuid = match SegmentUuid::from_str(&proto_segment.id) {
197 Ok(uuid) => uuid,
198 Err(_) => return Err(SegmentConversionError::InvalidUuid),
199 };
200 let collection_uuid = match Uuid::try_parse(&proto_segment.collection) {
201 Ok(uuid) => uuid,
202 Err(_) => return Err(SegmentConversionError::InvalidUuid),
203 };
204 let collection_uuid = CollectionUuid(collection_uuid);
205 let segment_metadata: Option<Metadata> = match proto_segment.metadata {
206 Some(proto_metadata) => match proto_metadata.try_into() {
207 Ok(metadata) => Some(metadata),
208 Err(e) => return Err(SegmentConversionError::MetadataValueConversionError(e)),
209 },
210 None => None,
211 };
212 let scope: SegmentScope = match proto_segment.scope.try_into() {
213 Ok(scope) => scope,
214 Err(e) => return Err(SegmentConversionError::SegmentScopeConversionError(e)),
215 };
216
217 let segment_type: SegmentType = proto_segment.r#type.as_str().try_into()?;
218
219 let mut file_paths = HashMap::new();
220 let drain = proto_segment.file_paths.drain();
221 for (key, value) in drain {
222 file_paths.insert(key, value.paths);
223 }
224
225 Ok(Segment {
226 id: segment_uuid,
227 r#type: segment_type,
228 scope,
229 collection: collection_uuid,
230 metadata: segment_metadata,
231 file_path: file_paths,
232 })
233 }
234}
235
236impl From<Segment> for chroma_proto::Segment {
237 fn from(value: Segment) -> Self {
238 Self {
239 id: value.id.0.to_string(),
240 r#type: value.r#type.into(),
241 scope: chroma_proto::SegmentScope::from(value.scope) as i32,
242 collection: value.collection.0.to_string(),
243 metadata: value.metadata.map(Into::into),
244 file_paths: value
245 .file_path
246 .into_iter()
247 .map(|(name, paths)| (name, chroma_proto::FilePaths { paths }))
248 .collect(),
249 }
250 }
251}
252
253pub fn test_segment(collection_uuid: CollectionUuid, scope: SegmentScope) -> Segment {
254 let r#type = match scope {
255 SegmentScope::METADATA => SegmentType::BlockfileMetadata,
256 SegmentScope::RECORD => SegmentType::BlockfileRecord,
257 SegmentScope::VECTOR => SegmentType::HnswDistributed,
258 SegmentScope::SQLITE => unimplemented!("Sqlite segment is not implemented"),
259 };
260 Segment {
261 id: SegmentUuid::new(),
262 r#type,
263 scope,
264 collection: collection_uuid,
265 metadata: None,
266 file_path: HashMap::new(),
267 }
268}
269
270#[cfg(test)]
271mod tests {
272
273 use super::*;
274 use crate::MetadataValue;
275
276 #[test]
277 fn test_segment_try_from() {
278 let mut metadata = chroma_proto::UpdateMetadata {
279 metadata: HashMap::new(),
280 };
281 metadata.metadata.insert(
282 "foo".to_string(),
283 chroma_proto::UpdateMetadataValue {
284 value: Some(chroma_proto::update_metadata_value::Value::IntValue(42)),
285 },
286 );
287 let proto_segment = chroma_proto::Segment {
288 id: "00000000-0000-0000-0000-000000000000".to_string(),
289 r#type: "urn:chroma:segment/vector/hnsw-distributed".to_string(),
290 scope: chroma_proto::SegmentScope::Vector as i32,
291 collection: "00000000-0000-0000-0000-000000000000".to_string(),
292 metadata: Some(metadata),
293 file_paths: HashMap::new(),
294 };
295 let converted_segment: Segment = proto_segment.try_into().unwrap();
296 assert_eq!(converted_segment.id, SegmentUuid(Uuid::nil()));
297 assert_eq!(converted_segment.r#type, SegmentType::HnswDistributed);
298 assert_eq!(converted_segment.scope, SegmentScope::VECTOR);
299 assert_eq!(converted_segment.collection, CollectionUuid(Uuid::nil()));
300 let metadata = converted_segment.metadata.unwrap();
301 assert_eq!(metadata.len(), 1);
302 assert_eq!(metadata.get("foo").unwrap(), &MetadataValue::Int(42));
303 }
304
305 #[test]
306 fn test_segment_construct_prefix_path() {
307 let segment = Segment {
308 id: SegmentUuid(Uuid::nil()),
309 r#type: SegmentType::BlockfileMetadata,
310 scope: SegmentScope::METADATA,
311 collection: CollectionUuid(Uuid::nil()),
312 metadata: None,
313 file_path: HashMap::new(),
314 };
315 let tenant = "test_tenant";
316 let database_id = &DatabaseUuid(Uuid::nil());
317 let prefix_path = segment.construct_prefix_path(tenant, database_id);
318 assert_eq!(
319 prefix_path,
320 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
321 );
322 }
323
324 #[test]
325 fn test_segment_extract_prefix_and_id() {
326 let path = "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000/00000000-0000-0000-0000-000000000001";
327 let (prefix, id) =
328 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
329 assert_eq!(
330 prefix,
331 "tenant/test_tenant/database/00000000-0000-0000-0000-000000000000/collection/00000000-0000-0000-0000-000000000000/segment/00000000-0000-0000-0000-000000000000"
332 );
333 assert_eq!(
334 id,
335 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
336 );
337 }
338
339 #[test]
340 fn test_segment_extract_prefix_and_id_legacy() {
341 let path = "00000000-0000-0000-0000-000000000001";
342 let (prefix, id) =
343 Segment::extract_prefix_and_id(path).expect("Failed to extract prefix and id");
344 assert_eq!(prefix, "");
345 assert_eq!(
346 id,
347 Uuid::from_str("00000000-0000-0000-0000-000000000001").expect("Cannot happen")
348 );
349 }
350}