Skip to main content

chroma_types/
api_types.rs

1use crate::collection_configuration::InternalCollectionConfiguration;
2use crate::collection_configuration::InternalUpdateCollectionConfiguration;
3use crate::error::QueryConversionError;
4use crate::operator::GetResult;
5use crate::operator::Key;
6use crate::operator::KnnBatchResult;
7use crate::operator::KnnProjectionRecord;
8use crate::operator::ProjectionRecord;
9use crate::operator::SearchResult;
10use crate::operators_generated::{
11    FUNCTION_RECORD_COUNTER_ID, FUNCTION_RECORD_COUNTER_NAME, FUNCTION_STATISTICS_ID,
12    FUNCTION_STATISTICS_NAME,
13};
14use crate::plan::PlanToProtoError;
15use crate::plan::ReadLevel;
16use crate::plan::SearchPayload;
17use crate::validators::{
18    validate_metadata_vec, validate_name, validate_non_empty_collection_update_metadata,
19    validate_optional_metadata, validate_schema, validate_update_metadata_vec,
20};
21use crate::AttachedFunction;
22use crate::AttachedFunctionUuid;
23use crate::Collection;
24use crate::CollectionConfigurationToInternalConfigurationError;
25use crate::CollectionConversionError;
26use crate::CollectionUuid;
27use crate::DatabaseName;
28use crate::DistributedSpannParametersFromSegmentError;
29use crate::EmbeddingsPayload;
30use crate::HnswParametersFromSegmentError;
31use crate::Metadata;
32use crate::RawWhereFields;
33use crate::Schema;
34use crate::SchemaError;
35use crate::SegmentConversionError;
36use crate::SegmentScopeConversionError;
37use crate::UpdateEmbeddingsPayload;
38use crate::UpdateMetadata;
39use crate::Where;
40use crate::WhereValidationError;
41use chroma_error::ChromaValidationError;
42use chroma_error::{ChromaError, ErrorCodes};
43use serde::Deserialize;
44use serde::Serialize;
45use std::time::SystemTimeError;
46use thiserror::Error;
47use tonic::Status;
48use uuid::Uuid;
49use validator::Validate;
50use validator::ValidationError;
51
52#[cfg(feature = "pyo3")]
53use pyo3::types::PyAnyMethods;
54
55#[derive(Debug, Error)]
56pub enum GetSegmentsError {
57    #[error("Could not parse segment")]
58    SegmentConversion(#[from] SegmentConversionError),
59    #[error("Unknown segment scope")]
60    UnknownScope(#[from] SegmentScopeConversionError),
61    #[error(transparent)]
62    Internal(#[from] Box<dyn ChromaError>),
63}
64
65impl ChromaError for GetSegmentsError {
66    fn code(&self) -> ErrorCodes {
67        match self {
68            GetSegmentsError::SegmentConversion(_) => ErrorCodes::Internal,
69            GetSegmentsError::UnknownScope(_) => ErrorCodes::Internal,
70            GetSegmentsError::Internal(err) => err.code(),
71        }
72    }
73}
74
75#[derive(Debug, Error)]
76pub enum GetCollectionWithSegmentsError {
77    #[error("Failed to convert proto collection")]
78    CollectionConversionError(#[from] CollectionConversionError),
79    #[error("Duplicate segment")]
80    DuplicateSegment,
81    #[error("Missing field: [{0}]")]
82    Field(String),
83    #[error("Failed to convert proto segment")]
84    SegmentConversionError(#[from] SegmentConversionError),
85    #[error("Failed to get segments")]
86    GetSegmentsError(#[from] GetSegmentsError),
87    #[error("Grpc error: {0}")]
88    Grpc(#[from] Status),
89    #[error("Collection [{0}] does not exist.")]
90    NotFound(String),
91    #[error(transparent)]
92    Internal(#[from] Box<dyn ChromaError>),
93}
94
95impl ChromaError for GetCollectionWithSegmentsError {
96    fn code(&self) -> ErrorCodes {
97        match self {
98            GetCollectionWithSegmentsError::CollectionConversionError(
99                collection_conversion_error,
100            ) => collection_conversion_error.code(),
101            GetCollectionWithSegmentsError::DuplicateSegment => ErrorCodes::Internal,
102            GetCollectionWithSegmentsError::Field(_) => ErrorCodes::FailedPrecondition,
103            GetCollectionWithSegmentsError::SegmentConversionError(segment_conversion_error) => {
104                segment_conversion_error.code()
105            }
106            GetCollectionWithSegmentsError::Grpc(status) => status.code().into(),
107            GetCollectionWithSegmentsError::GetSegmentsError(get_segments_error) => {
108                get_segments_error.code()
109            }
110            GetCollectionWithSegmentsError::NotFound(_) => ErrorCodes::NotFound,
111            GetCollectionWithSegmentsError::Internal(err) => err.code(),
112        }
113    }
114
115    fn should_trace_error(&self) -> bool {
116        if let Self::Grpc(status) = self {
117            status.code() != ErrorCodes::NotFound.into()
118        } else {
119            true
120        }
121    }
122}
123
124#[derive(Debug, Error)]
125pub enum BatchGetCollectionVersionFilePathsError {
126    #[error("Grpc error: {0}")]
127    Grpc(#[from] Status),
128    #[error("Could not parse UUID from string {1}: {0}")]
129    Uuid(uuid::Error, String),
130    #[error("Client resolution error: {0}")]
131    ClientResolution(#[from] ClientResolutionError),
132}
133
134impl ChromaError for BatchGetCollectionVersionFilePathsError {
135    fn code(&self) -> ErrorCodes {
136        match self {
137            BatchGetCollectionVersionFilePathsError::Grpc(status) => status.code().into(),
138            BatchGetCollectionVersionFilePathsError::Uuid(_, _) => ErrorCodes::InvalidArgument,
139            BatchGetCollectionVersionFilePathsError::ClientResolution(e) => e.code(),
140        }
141    }
142}
143
144#[derive(Debug, Error)]
145pub enum BatchGetCollectionSoftDeleteStatusError {
146    #[error("Grpc error: {0}")]
147    Grpc(#[from] Status),
148    #[error("Could not parse UUID from string {1}: {0}")]
149    Uuid(uuid::Error, String),
150    #[error("Client resolution error: {0}")]
151    ClientResolution(#[from] ClientResolutionError),
152}
153
154impl ChromaError for BatchGetCollectionSoftDeleteStatusError {
155    fn code(&self) -> ErrorCodes {
156        match self {
157            BatchGetCollectionSoftDeleteStatusError::Grpc(status) => status.code().into(),
158            BatchGetCollectionSoftDeleteStatusError::Uuid(_, _) => ErrorCodes::InvalidArgument,
159            BatchGetCollectionSoftDeleteStatusError::ClientResolution(e) => e.code(),
160        }
161    }
162}
163
164#[derive(Serialize)]
165#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
166pub struct ResetResponse {}
167
168#[derive(Debug, Error)]
169pub enum ResetError {
170    #[error(transparent)]
171    Cache(Box<dyn ChromaError>),
172    #[error(transparent)]
173    Internal(#[from] Box<dyn ChromaError>),
174    #[error("Reset is disabled by config")]
175    NotAllowed,
176}
177
178impl ChromaError for ResetError {
179    fn code(&self) -> ErrorCodes {
180        match self {
181            ResetError::Cache(err) => err.code(),
182            ResetError::Internal(err) => err.code(),
183            ResetError::NotAllowed => ErrorCodes::PermissionDenied,
184        }
185    }
186}
187
188#[derive(Serialize)]
189#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
190pub struct ChecklistResponse {
191    pub max_batch_size: u32,
192    pub supports_base64_encoding: bool,
193}
194
195#[derive(Debug, Error)]
196#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
197pub enum HeartbeatError {
198    #[error("system time error: {0}")]
199    CouldNotGetTime(String),
200}
201
202impl From<SystemTimeError> for HeartbeatError {
203    fn from(err: SystemTimeError) -> Self {
204        HeartbeatError::CouldNotGetTime(err.to_string())
205    }
206}
207
208impl ChromaError for HeartbeatError {
209    fn code(&self) -> ErrorCodes {
210        ErrorCodes::Internal
211    }
212}
213
214#[non_exhaustive]
215#[derive(Serialize, Validate, Deserialize)]
216#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
217pub struct CreateTenantRequest {
218    #[validate(length(min = 3))]
219    pub name: String,
220}
221
222impl CreateTenantRequest {
223    pub fn try_new(name: String) -> Result<Self, ChromaValidationError> {
224        let request = Self { name };
225        request.validate().map_err(ChromaValidationError::from)?;
226        Ok(request)
227    }
228}
229
230#[derive(Serialize, Deserialize)]
231#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
232pub struct CreateTenantResponse {}
233
234#[derive(Debug, Error)]
235pub enum CreateTenantError {
236    #[error("Tenant [{0}] already exists")]
237    AlreadyExists(String),
238    #[error(transparent)]
239    Internal(#[from] Box<dyn ChromaError>),
240}
241
242impl ChromaError for CreateTenantError {
243    fn code(&self) -> ErrorCodes {
244        match self {
245            CreateTenantError::AlreadyExists(_) => ErrorCodes::AlreadyExists,
246            CreateTenantError::Internal(err) => err.code(),
247        }
248    }
249}
250
251#[non_exhaustive]
252#[derive(Validate, Serialize)]
253#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
254pub struct GetTenantRequest {
255    pub name: String,
256}
257
258impl GetTenantRequest {
259    pub fn try_new(name: String) -> Result<Self, ChromaValidationError> {
260        let request = Self { name };
261        request.validate().map_err(ChromaValidationError::from)?;
262        Ok(request)
263    }
264}
265
266#[derive(Serialize)]
267#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
268#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
269pub struct GetTenantResponse {
270    pub name: String,
271    pub resource_name: Option<String>,
272}
273
274#[cfg(feature = "pyo3")]
275#[pyo3::pymethods]
276impl GetTenantResponse {
277    #[getter]
278    pub fn name(&self) -> &String {
279        &self.name
280    }
281
282    #[getter]
283    pub fn resource_name(&self) -> Option<String> {
284        self.resource_name.clone()
285    }
286}
287
288#[derive(Debug, Error)]
289pub enum GetTenantError {
290    #[error(transparent)]
291    Internal(#[from] Box<dyn ChromaError>),
292    #[error("Tenant [{0}] not found")]
293    NotFound(String),
294}
295
296impl ChromaError for GetTenantError {
297    fn code(&self) -> ErrorCodes {
298        match self {
299            GetTenantError::Internal(err) => err.code(),
300            GetTenantError::NotFound(_) => ErrorCodes::NotFound,
301        }
302    }
303}
304
305#[non_exhaustive]
306#[derive(Validate, Serialize)]
307#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
308pub struct UpdateTenantRequest {
309    pub tenant_id: String,
310    pub resource_name: String,
311}
312
313impl UpdateTenantRequest {
314    pub fn try_new(
315        tenant_id: String,
316        resource_name: String,
317    ) -> Result<Self, ChromaValidationError> {
318        let request = Self {
319            tenant_id,
320            resource_name,
321        };
322        request.validate().map_err(ChromaValidationError::from)?;
323        Ok(request)
324    }
325}
326
327#[derive(Serialize)]
328#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
329#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
330pub struct UpdateTenantResponse {}
331
332#[cfg(feature = "pyo3")]
333#[pyo3::pymethods]
334impl UpdateTenantResponse {}
335
336#[derive(Error, Debug)]
337pub enum UpdateTenantError {
338    #[error("Failed to set resource name")]
339    FailedToSetResourceName(#[from] tonic::Status),
340    #[error(transparent)]
341    Internal(#[from] Box<dyn ChromaError>),
342    #[error("Tenant [{0}] not found")]
343    NotFound(String),
344}
345
346impl ChromaError for UpdateTenantError {
347    fn code(&self) -> ErrorCodes {
348        match self {
349            UpdateTenantError::FailedToSetResourceName(_) => ErrorCodes::AlreadyExists,
350            UpdateTenantError::Internal(err) => err.code(),
351            UpdateTenantError::NotFound(_) => ErrorCodes::NotFound,
352        }
353    }
354}
355
356#[non_exhaustive]
357#[derive(Validate, Serialize)]
358#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
359pub struct CreateDatabaseRequest {
360    pub database_id: Uuid,
361    pub tenant_id: String,
362    pub database_name: DatabaseName,
363}
364
365impl CreateDatabaseRequest {
366    pub fn try_new(
367        tenant_id: String,
368        database_name: DatabaseName,
369    ) -> Result<Self, ChromaValidationError> {
370        let database_id = Uuid::new_v4();
371        let request = Self {
372            database_id,
373            tenant_id,
374            database_name,
375        };
376        request.validate().map_err(ChromaValidationError::from)?;
377        Ok(request)
378    }
379}
380
381#[derive(Error, Debug)]
382pub enum ClientResolutionError {
383    #[error("Not supported")]
384    McmrNotSupported,
385    #[error("Database not found")]
386    DatabaseNotFound,
387}
388
389impl ChromaError for ClientResolutionError {
390    fn code(&self) -> ErrorCodes {
391        match self {
392            ClientResolutionError::McmrNotSupported => ErrorCodes::InvalidArgument,
393            ClientResolutionError::DatabaseNotFound => ErrorCodes::NotFound,
394        }
395    }
396}
397
398#[derive(Serialize)]
399#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
400pub struct CreateDatabaseResponse {}
401
402#[derive(Error, Debug)]
403pub enum CreateDatabaseError {
404    #[error("Database [{0}] already exists")]
405    AlreadyExists(String),
406    #[error(transparent)]
407    Internal(#[from] Box<dyn ChromaError>),
408    #[error("Client resolution error: {0}")]
409    ClientResolutionError(#[from] ClientResolutionError),
410}
411
412impl ChromaError for CreateDatabaseError {
413    fn code(&self) -> ErrorCodes {
414        match self {
415            CreateDatabaseError::AlreadyExists(_) => ErrorCodes::AlreadyExists,
416            CreateDatabaseError::Internal(status) => status.code(),
417            CreateDatabaseError::ClientResolutionError(e) => e.code(),
418        }
419    }
420}
421
422#[derive(Serialize, Deserialize, Debug, Clone, Default)]
423#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
424#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
425pub struct Database {
426    pub id: Uuid,
427    pub name: String,
428    pub tenant: String,
429}
430
431#[cfg(feature = "pyo3")]
432#[pyo3::pymethods]
433impl Database {
434    #[getter]
435    fn id<'py>(&self, py: pyo3::Python<'py>) -> pyo3::PyResult<pyo3::Bound<'py, pyo3::PyAny>> {
436        let res = pyo3::prelude::PyModule::import(py, "uuid")?
437            .getattr("UUID")?
438            .call1((self.id.to_string(),))?;
439        Ok(res)
440    }
441
442    #[getter]
443    pub fn name(&self) -> &str {
444        &self.name
445    }
446
447    #[getter]
448    pub fn tenant(&self) -> &str {
449        &self.tenant
450    }
451}
452
453impl From<Database> for crate::chroma_proto::Database {
454    fn from(d: Database) -> Self {
455        crate::chroma_proto::Database {
456            id: d.id.to_string(),
457            name: d.name,
458            tenant: d.tenant,
459        }
460    }
461}
462
463#[non_exhaustive]
464#[derive(Validate, Serialize)]
465#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
466pub struct ListDatabasesRequest {
467    pub tenant_id: String,
468    pub limit: Option<u32>,
469    pub offset: u32,
470}
471
472impl ListDatabasesRequest {
473    pub fn try_new(
474        tenant_id: String,
475        limit: Option<u32>,
476        offset: u32,
477    ) -> Result<Self, ChromaValidationError> {
478        let request = Self {
479            tenant_id,
480            limit,
481            offset,
482        };
483        request.validate().map_err(ChromaValidationError::from)?;
484        Ok(request)
485    }
486}
487
488pub type ListDatabasesResponse = Vec<Database>;
489
490#[derive(Debug, Error)]
491pub enum ListDatabasesError {
492    #[error(transparent)]
493    Internal(#[from] Box<dyn ChromaError>),
494    #[error("Invalid database id [{0}]")]
495    InvalidID(String),
496}
497
498impl ChromaError for ListDatabasesError {
499    fn code(&self) -> ErrorCodes {
500        match self {
501            ListDatabasesError::Internal(status) => status.code(),
502            ListDatabasesError::InvalidID(_) => ErrorCodes::InvalidArgument,
503        }
504    }
505}
506
507#[non_exhaustive]
508#[derive(Validate, Serialize)]
509#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
510pub struct GetDatabaseRequest {
511    pub tenant_id: String,
512    pub database_name: DatabaseName,
513}
514
515impl GetDatabaseRequest {
516    pub fn try_new(
517        tenant_id: String,
518        database_name: DatabaseName,
519    ) -> Result<Self, ChromaValidationError> {
520        let request = Self {
521            tenant_id,
522            database_name,
523        };
524        request.validate().map_err(ChromaValidationError::from)?;
525        Ok(request)
526    }
527}
528
529pub type GetDatabaseResponse = Database;
530
531#[derive(Error, Debug)]
532pub enum GetDatabaseError {
533    #[error(transparent)]
534    Internal(#[from] Box<dyn ChromaError>),
535    #[error("Invalid database id [{0}]")]
536    InvalidID(String),
537    #[error("Database [{0}] not found. Are you sure it exists?")]
538    NotFound(String),
539    #[error("Client resolution error: {0}")]
540    ClientResolutionError(#[from] ClientResolutionError),
541}
542
543impl ChromaError for GetDatabaseError {
544    fn code(&self) -> ErrorCodes {
545        match self {
546            GetDatabaseError::Internal(err) => err.code(),
547            GetDatabaseError::InvalidID(_) => ErrorCodes::InvalidArgument,
548            GetDatabaseError::NotFound(_) => ErrorCodes::NotFound,
549            GetDatabaseError::ClientResolutionError(e) => e.code(),
550        }
551    }
552}
553
554#[non_exhaustive]
555#[derive(Validate, Serialize)]
556#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
557pub struct DeleteDatabaseRequest {
558    pub tenant_id: String,
559    pub database_name: String,
560}
561
562impl DeleteDatabaseRequest {
563    pub fn try_new(
564        tenant_id: String,
565        database_name: String,
566    ) -> Result<Self, ChromaValidationError> {
567        let request = Self {
568            tenant_id,
569            database_name,
570        };
571        request.validate().map_err(ChromaValidationError::from)?;
572        Ok(request)
573    }
574}
575
576#[derive(Serialize)]
577#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
578pub struct DeleteDatabaseResponse {}
579
580#[derive(Debug, Error)]
581pub enum DeleteDatabaseError {
582    #[error(transparent)]
583    Internal(#[from] Box<dyn ChromaError>),
584    #[error("Invalid database id [{0}]")]
585    InvalidID(String),
586    #[error("Database [{0}] not found")]
587    NotFound(String),
588}
589
590impl ChromaError for DeleteDatabaseError {
591    fn code(&self) -> ErrorCodes {
592        match self {
593            DeleteDatabaseError::Internal(err) => err.code(),
594            DeleteDatabaseError::InvalidID(_) => ErrorCodes::InvalidArgument,
595            DeleteDatabaseError::NotFound(_) => ErrorCodes::NotFound,
596        }
597    }
598}
599
600#[derive(Debug, Error)]
601pub enum FinishDatabaseDeletionError {
602    #[error(transparent)]
603    Internal(#[from] Box<dyn ChromaError>),
604}
605
606impl ChromaError for FinishDatabaseDeletionError {
607    fn code(&self) -> ErrorCodes {
608        match self {
609            FinishDatabaseDeletionError::Internal(err) => err.code(),
610        }
611    }
612}
613
614#[non_exhaustive]
615#[derive(Validate, Debug, Serialize)]
616#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
617pub struct ListCollectionsRequest {
618    pub tenant_id: String,
619    pub database_name: DatabaseName,
620    pub limit: Option<u32>,
621    pub offset: u32,
622}
623
624impl ListCollectionsRequest {
625    pub fn try_new(
626        tenant_id: String,
627        database_name: DatabaseName,
628        limit: Option<u32>,
629        offset: u32,
630    ) -> Result<Self, ChromaValidationError> {
631        let request = Self {
632            tenant_id,
633            database_name,
634            limit,
635            offset,
636        };
637        request.validate().map_err(ChromaValidationError::from)?;
638        Ok(request)
639    }
640}
641
642pub type ListCollectionsResponse = Vec<Collection>;
643
644#[non_exhaustive]
645#[derive(Validate, Serialize)]
646#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
647pub struct CountCollectionsRequest {
648    pub tenant_id: String,
649    pub database_name: DatabaseName,
650}
651
652impl CountCollectionsRequest {
653    pub fn try_new(
654        tenant_id: String,
655        database_name: DatabaseName,
656    ) -> Result<Self, ChromaValidationError> {
657        let request = Self {
658            tenant_id,
659            database_name,
660        };
661        request.validate().map_err(ChromaValidationError::from)?;
662        Ok(request)
663    }
664}
665
666pub type CountCollectionsResponse = u32;
667
668#[non_exhaustive]
669#[derive(Validate, Clone, Serialize)]
670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
671pub struct GetCollectionRequest {
672    pub tenant_id: String,
673    pub database_name: DatabaseName,
674    pub collection_name: String,
675}
676
677impl GetCollectionRequest {
678    pub fn try_new(
679        tenant_id: String,
680        database_name: DatabaseName,
681        collection_name: String,
682    ) -> Result<Self, ChromaValidationError> {
683        let request = Self {
684            tenant_id,
685            database_name,
686            collection_name,
687        };
688        request.validate().map_err(ChromaValidationError::from)?;
689        Ok(request)
690    }
691}
692
693pub type GetCollectionResponse = Collection;
694
695#[derive(Debug, Error)]
696pub enum GetCollectionError {
697    #[error("Failed to reconcile schema: {0}")]
698    InvalidSchema(#[from] SchemaError),
699    #[error(transparent)]
700    Internal(#[from] Box<dyn ChromaError>),
701    #[error("Collection [{0}] does not exist")]
702    NotFound(String),
703}
704
705impl ChromaError for GetCollectionError {
706    fn code(&self) -> ErrorCodes {
707        match self {
708            GetCollectionError::InvalidSchema(e) => e.code(),
709            GetCollectionError::Internal(err) => err.code(),
710            GetCollectionError::NotFound(_) => ErrorCodes::NotFound,
711        }
712    }
713}
714
715#[non_exhaustive]
716#[derive(Clone, Debug, Validate, Serialize)]
717#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
718pub struct CreateCollectionRequest {
719    pub tenant_id: String,
720    pub database_name: DatabaseName,
721    #[validate(custom(function = "validate_name"))]
722    pub name: String,
723    #[validate(custom(function = "validate_optional_metadata"))]
724    pub metadata: Option<Metadata>,
725    pub configuration: Option<InternalCollectionConfiguration>,
726    #[validate(custom(function = "validate_schema"))]
727    pub schema: Option<Schema>,
728    pub get_or_create: bool,
729}
730
731impl CreateCollectionRequest {
732    pub fn try_new(
733        tenant_id: String,
734        database_name: DatabaseName,
735        name: String,
736        metadata: Option<Metadata>,
737        configuration: Option<InternalCollectionConfiguration>,
738        schema: Option<Schema>,
739        get_or_create: bool,
740    ) -> Result<Self, ChromaValidationError> {
741        let request = Self {
742            tenant_id,
743            database_name,
744            name,
745            metadata,
746            configuration,
747            schema,
748            get_or_create,
749        };
750        request.validate().map_err(ChromaValidationError::from)?;
751        Ok(request)
752    }
753}
754
755pub type CreateCollectionResponse = Collection;
756
757#[derive(Debug, Error)]
758pub enum CreateCollectionError {
759    #[error("Invalid HNSW parameters: {0}")]
760    InvalidHnswParameters(#[from] HnswParametersFromSegmentError),
761    #[error("Could not parse config: {0}")]
762    InvalidConfig(#[from] CollectionConfigurationToInternalConfigurationError),
763    #[error("Invalid Spann parameters: {0}")]
764    InvalidSpannParameters(#[from] DistributedSpannParametersFromSegmentError),
765    #[error("Collection [{0}] already exists")]
766    AlreadyExists(String),
767    #[error("Database [{0}] does not exist")]
768    DatabaseNotFound(String),
769    #[error("Could not fetch collections: {0}")]
770    Get(#[from] GetCollectionsError),
771    #[error("Could not deserialize configuration: {0}")]
772    Configuration(serde_json::Error),
773    #[error("Could not serialize schema: {0}")]
774    Schema(#[source] SchemaError),
775    #[error(transparent)]
776    Internal(#[from] Box<dyn ChromaError>),
777    #[error("The operation was aborted, {0}")]
778    Aborted(String),
779    #[error("SPANN is still in development. Not allowed to created spann indexes")]
780    SpannNotImplemented,
781    #[error("HNSW is not supported on this platform")]
782    HnswNotSupported,
783    #[error("Failed to parse db id")]
784    DatabaseIdParseError,
785    #[error("Failed to reconcile schema: {0}")]
786    InvalidSchema(#[source] SchemaError),
787}
788
789impl ChromaError for CreateCollectionError {
790    fn code(&self) -> ErrorCodes {
791        match self {
792            CreateCollectionError::InvalidHnswParameters(_) => ErrorCodes::InvalidArgument,
793            CreateCollectionError::InvalidConfig(_) => ErrorCodes::InvalidArgument,
794            CreateCollectionError::InvalidSpannParameters(_) => ErrorCodes::InvalidArgument,
795            CreateCollectionError::AlreadyExists(_) => ErrorCodes::AlreadyExists,
796            CreateCollectionError::DatabaseNotFound(_) => ErrorCodes::InvalidArgument,
797            CreateCollectionError::Get(err) => err.code(),
798            CreateCollectionError::Configuration(_) => ErrorCodes::Internal,
799            CreateCollectionError::Internal(err) => err.code(),
800            CreateCollectionError::Aborted(_) => ErrorCodes::Aborted,
801            CreateCollectionError::SpannNotImplemented => ErrorCodes::InvalidArgument,
802            CreateCollectionError::HnswNotSupported => ErrorCodes::InvalidArgument,
803            CreateCollectionError::DatabaseIdParseError => ErrorCodes::Internal,
804            CreateCollectionError::InvalidSchema(e) => e.code(),
805            CreateCollectionError::Schema(e) => e.code(),
806        }
807    }
808}
809
810#[derive(Debug, Error)]
811pub enum CountCollectionsError {
812    #[error("Internal error in getting count")]
813    Internal,
814}
815
816impl ChromaError for CountCollectionsError {
817    fn code(&self) -> ErrorCodes {
818        match self {
819            CountCollectionsError::Internal => ErrorCodes::Internal,
820        }
821    }
822}
823
824#[derive(Debug, Error)]
825pub enum GetCollectionsError {
826    #[error("Failed to reconcile schema: {0}")]
827    InvalidSchema(#[from] SchemaError),
828    #[error(transparent)]
829    Internal(#[from] Box<dyn ChromaError>),
830    #[error("Could not deserialize configuration")]
831    Configuration(#[source] serde_json::Error),
832    #[error("Could not deserialize collection ID")]
833    CollectionId(#[from] uuid::Error),
834    #[error("Could not deserialize database ID")]
835    DatabaseId,
836    #[error("Could not deserialize schema")]
837    Schema(#[source] serde_json::Error),
838}
839
840impl ChromaError for GetCollectionsError {
841    fn code(&self) -> ErrorCodes {
842        match self {
843            GetCollectionsError::InvalidSchema(e) => e.code(),
844            GetCollectionsError::Internal(err) => err.code(),
845            GetCollectionsError::Configuration(_) => ErrorCodes::Internal,
846            GetCollectionsError::CollectionId(_) => ErrorCodes::Internal,
847            GetCollectionsError::DatabaseId => ErrorCodes::Internal,
848            GetCollectionsError::Schema(_) => ErrorCodes::Internal,
849        }
850    }
851}
852
853#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
854#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
855pub struct ChromaResourceName {
856    pub tenant_resource_name: String,
857    pub database_name: String,
858    pub collection_name: String,
859}
860#[non_exhaustive]
861#[derive(Clone, Serialize)]
862#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
863pub struct GetCollectionByCrnRequest {
864    pub parsed_crn: ChromaResourceName,
865}
866
867impl GetCollectionByCrnRequest {
868    pub fn try_new(crn: String) -> Result<Self, ChromaValidationError> {
869        let parsed_crn = parse_and_validate_crn(&crn)?;
870        Ok(Self { parsed_crn })
871    }
872}
873
874fn parse_and_validate_crn(crn: &str) -> Result<ChromaResourceName, ChromaValidationError> {
875    let mut parts = crn.splitn(4, ':');
876    if let (Some(p1), Some(p2), Some(p3), None) =
877        (parts.next(), parts.next(), parts.next(), parts.next())
878    {
879        if !p1.is_empty() && !p2.is_empty() && !p3.is_empty() {
880            return Ok(ChromaResourceName {
881                tenant_resource_name: p1.to_string(),
882                database_name: p2.to_string(),
883                collection_name: p3.to_string(),
884            });
885        }
886    }
887    let mut err = ValidationError::new("invalid_crn_format");
888    err.message = Some(
889        "CRN must be in the format <tenant_resource_name>:<database_name>:<collection_name> with non-empty parts"
890            .into(),
891    );
892    Err(ChromaValidationError::from(("crn", err)))
893}
894
895pub type GetCollectionByCrnResponse = Collection;
896
897#[derive(Debug, Error)]
898pub enum GetCollectionByCrnError {
899    #[error("Failed to reconcile schema: {0}")]
900    InvalidSchema(#[from] SchemaError),
901    #[error(transparent)]
902    Internal(#[from] Box<dyn ChromaError>),
903    #[error("Collection [{0}] does not exist")]
904    NotFound(String),
905}
906
907impl ChromaError for GetCollectionByCrnError {
908    fn code(&self) -> ErrorCodes {
909        match self {
910            GetCollectionByCrnError::InvalidSchema(e) => e.code(),
911            GetCollectionByCrnError::Internal(err) => err.code(),
912            GetCollectionByCrnError::NotFound(_) => ErrorCodes::NotFound,
913        }
914    }
915}
916
917#[derive(Clone, Deserialize, Serialize, Debug)]
918#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
919pub enum CollectionMetadataUpdate {
920    ResetMetadata,
921    UpdateMetadata(UpdateMetadata),
922}
923
924#[non_exhaustive]
925#[derive(Clone, Validate, Debug, Serialize)]
926#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
927pub struct UpdateCollectionRequest {
928    pub database_name: Option<DatabaseName>,
929    pub collection_id: CollectionUuid,
930    #[validate(custom(function = "validate_name"))]
931    pub new_name: Option<String>,
932    #[validate(custom(function = "validate_non_empty_collection_update_metadata"))]
933    pub new_metadata: Option<CollectionMetadataUpdate>,
934    pub new_configuration: Option<InternalUpdateCollectionConfiguration>,
935}
936
937impl UpdateCollectionRequest {
938    pub fn try_new(
939        database_name: Option<DatabaseName>,
940        collection_id: CollectionUuid,
941        new_name: Option<String>,
942        new_metadata: Option<CollectionMetadataUpdate>,
943        new_configuration: Option<InternalUpdateCollectionConfiguration>,
944    ) -> Result<Self, ChromaValidationError> {
945        let request = Self {
946            database_name,
947            collection_id,
948            new_name,
949            new_metadata,
950            new_configuration,
951        };
952        request.validate().map_err(ChromaValidationError::from)?;
953        Ok(request)
954    }
955}
956
957#[derive(Serialize)]
958#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
959pub struct UpdateCollectionResponse {}
960
961#[derive(Error, Debug)]
962pub enum UpdateCollectionError {
963    #[error("Collection [{0}] does not exist")]
964    NotFound(String),
965    #[error("Metadata reset unsupported")]
966    MetadataResetUnsupported,
967    #[error("Could not serialize configuration")]
968    Configuration(#[source] serde_json::Error),
969    #[error(transparent)]
970    Internal(#[from] Box<dyn ChromaError>),
971    #[error("Could not parse config: {0}")]
972    InvalidConfig(#[from] CollectionConfigurationToInternalConfigurationError),
973    #[error("SPANN is still in development. Not allowed to created spann indexes")]
974    SpannNotImplemented,
975    #[error("Could not serialize schema: {0}")]
976    Schema(#[source] serde_json::Error),
977}
978
979impl ChromaError for UpdateCollectionError {
980    fn code(&self) -> ErrorCodes {
981        match self {
982            UpdateCollectionError::NotFound(_) => ErrorCodes::NotFound,
983            UpdateCollectionError::MetadataResetUnsupported => ErrorCodes::InvalidArgument,
984            UpdateCollectionError::Configuration(_) => ErrorCodes::Internal,
985            UpdateCollectionError::Internal(err) => err.code(),
986            UpdateCollectionError::InvalidConfig(_) => ErrorCodes::InvalidArgument,
987            UpdateCollectionError::SpannNotImplemented => ErrorCodes::InvalidArgument,
988            UpdateCollectionError::Schema(_) => ErrorCodes::Internal,
989        }
990    }
991}
992
993#[non_exhaustive]
994#[derive(Clone, Validate, Serialize)]
995#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
996pub struct DeleteCollectionRequest {
997    pub tenant_id: String,
998    pub database_name: String,
999    pub collection_name: String,
1000}
1001
1002impl DeleteCollectionRequest {
1003    pub fn try_new(
1004        tenant_id: String,
1005        database_name: String,
1006        collection_name: String,
1007    ) -> Result<Self, ChromaValidationError> {
1008        let request = Self {
1009            tenant_id,
1010            database_name,
1011            collection_name,
1012        };
1013        request.validate().map_err(ChromaValidationError::from)?;
1014        Ok(request)
1015    }
1016}
1017
1018#[derive(Serialize)]
1019#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1020pub struct DeleteCollectionResponse {}
1021
1022#[derive(Error, Debug)]
1023pub enum DeleteCollectionError {
1024    #[error("Collection [{0}] does not exist")]
1025    NotFound(String),
1026    #[error(transparent)]
1027    Validation(#[from] ChromaValidationError),
1028    #[error(transparent)]
1029    Get(#[from] GetCollectionError),
1030    #[error(transparent)]
1031    Internal(#[from] Box<dyn ChromaError>),
1032}
1033
1034impl ChromaError for DeleteCollectionError {
1035    fn code(&self) -> ErrorCodes {
1036        match self {
1037            DeleteCollectionError::Validation(err) => err.code(),
1038            DeleteCollectionError::NotFound(_) => ErrorCodes::NotFound,
1039            DeleteCollectionError::Get(err) => err.code(),
1040            DeleteCollectionError::Internal(err) => err.code(),
1041        }
1042    }
1043}
1044
1045#[derive(Serialize, Deserialize, Debug)]
1046#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1047pub struct IndexStatusResponse {
1048    pub op_indexing_progress: f32,
1049    pub num_unindexed_ops: u64,
1050    pub num_indexed_ops: u64,
1051    pub total_ops: u64,
1052}
1053
1054#[derive(Error, Debug)]
1055pub enum IndexStatusError {
1056    #[error("Collection [{0}] does not exist")]
1057    NotFound(String),
1058    #[error(transparent)]
1059    Internal(#[from] Box<dyn ChromaError>),
1060}
1061
1062impl From<GetCollectionError> for IndexStatusError {
1063    fn from(err: GetCollectionError) -> Self {
1064        match err {
1065            GetCollectionError::NotFound(msg) => IndexStatusError::NotFound(msg),
1066            other => IndexStatusError::Internal(Box::new(other)),
1067        }
1068    }
1069}
1070
1071impl ChromaError for IndexStatusError {
1072    fn code(&self) -> ErrorCodes {
1073        match self {
1074            IndexStatusError::NotFound(_) => ErrorCodes::NotFound,
1075            IndexStatusError::Internal(err) => err.code(),
1076        }
1077    }
1078}
1079
1080#[non_exhaustive]
1081#[derive(Clone, Validate, Serialize)]
1082#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1083pub struct ForkCollectionRequest {
1084    pub tenant_id: String,
1085    pub database_name: String,
1086    pub source_collection_id: CollectionUuid,
1087    pub target_collection_name: String,
1088}
1089
1090impl ForkCollectionRequest {
1091    pub fn try_new(
1092        tenant_id: String,
1093        database_name: String,
1094        source_collection_id: CollectionUuid,
1095        target_collection_name: String,
1096    ) -> Result<Self, ChromaValidationError> {
1097        let request = Self {
1098            tenant_id,
1099            database_name,
1100            source_collection_id,
1101            target_collection_name,
1102        };
1103        request.validate().map_err(ChromaValidationError::from)?;
1104        Ok(request)
1105    }
1106}
1107
1108pub type ForkCollectionResponse = Collection;
1109
1110#[derive(Clone, Debug)]
1111pub struct ForkLogsResponse {
1112    pub compaction_offset: u64,
1113    pub enumeration_offset: u64,
1114}
1115
1116#[derive(Error, Debug)]
1117pub enum ForkCollectionError {
1118    #[error("Collection [{0}] already exists")]
1119    AlreadyExists(String),
1120    #[error("Failed to convert proto collection")]
1121    CollectionConversionError(#[from] CollectionConversionError),
1122    #[error("Duplicate segment")]
1123    DuplicateSegment,
1124    #[error("Missing field: [{0}]")]
1125    Field(String),
1126    #[error("Invalid argument: {0}")]
1127    InvalidArgument(String),
1128    #[error("Collection forking is unsupported for local chroma")]
1129    Local,
1130    #[error(transparent)]
1131    Internal(#[from] Box<dyn ChromaError>),
1132    #[error("Collection [{0}] does not exist")]
1133    NotFound(String),
1134    #[error("Failed to convert proto segment")]
1135    SegmentConversionError(#[from] SegmentConversionError),
1136    #[error("Failed to reconcile schema: {0}")]
1137    InvalidSchema(#[from] SchemaError),
1138}
1139
1140impl ChromaError for ForkCollectionError {
1141    fn code(&self) -> ErrorCodes {
1142        match self {
1143            ForkCollectionError::NotFound(_) => ErrorCodes::NotFound,
1144            ForkCollectionError::AlreadyExists(_) => ErrorCodes::AlreadyExists,
1145            ForkCollectionError::CollectionConversionError(e) => e.code(),
1146            ForkCollectionError::DuplicateSegment => ErrorCodes::Internal,
1147            ForkCollectionError::Field(_) => ErrorCodes::FailedPrecondition,
1148            ForkCollectionError::InvalidArgument(_) => ErrorCodes::InvalidArgument,
1149            ForkCollectionError::Local => ErrorCodes::Unimplemented,
1150            ForkCollectionError::Internal(e) => e.code(),
1151            ForkCollectionError::SegmentConversionError(e) => e.code(),
1152            ForkCollectionError::InvalidSchema(e) => e.code(),
1153        }
1154    }
1155}
1156
1157#[derive(Debug, Error)]
1158pub enum CountForksError {
1159    #[error("Collection [{0}] does not exist")]
1160    NotFound(String),
1161    #[error(transparent)]
1162    Internal(#[from] Box<dyn ChromaError>),
1163    #[error("Count forks is unsupported for local chroma")]
1164    Local,
1165}
1166
1167impl ChromaError for CountForksError {
1168    fn code(&self) -> ErrorCodes {
1169        match self {
1170            CountForksError::NotFound(_) => ErrorCodes::NotFound,
1171            CountForksError::Internal(chroma_error) => chroma_error.code(),
1172            CountForksError::Local => ErrorCodes::Unimplemented,
1173        }
1174    }
1175}
1176
1177#[derive(Debug, Error)]
1178pub enum ListAttachedFunctionsError {
1179    #[error("Collection [{0}] does not exist")]
1180    NotFound(String),
1181    #[error(transparent)]
1182    Internal(#[from] Box<dyn ChromaError>),
1183    #[error("List attached functions is not implemented")]
1184    NotImplemented,
1185}
1186
1187impl ChromaError for ListAttachedFunctionsError {
1188    fn code(&self) -> ErrorCodes {
1189        match self {
1190            ListAttachedFunctionsError::NotFound(_) => ErrorCodes::NotFound,
1191            ListAttachedFunctionsError::Internal(chroma_error) => chroma_error.code(),
1192            ListAttachedFunctionsError::NotImplemented => ErrorCodes::Unimplemented,
1193        }
1194    }
1195}
1196
1197#[derive(Debug, Error)]
1198pub enum GetCollectionSizeError {
1199    #[error(transparent)]
1200    Internal(#[from] Box<dyn ChromaError>),
1201    #[error("Collection [{0}] does not exist")]
1202    NotFound(String),
1203}
1204
1205impl ChromaError for GetCollectionSizeError {
1206    fn code(&self) -> ErrorCodes {
1207        match self {
1208            GetCollectionSizeError::Internal(err) => err.code(),
1209            GetCollectionSizeError::NotFound(_) => ErrorCodes::NotFound,
1210        }
1211    }
1212}
1213
1214#[derive(Error, Debug)]
1215pub enum ListCollectionVersionsError {
1216    #[error(transparent)]
1217    Internal(#[from] Box<dyn ChromaError>),
1218    #[error("Collection [{0}] does not exist")]
1219    NotFound(String),
1220}
1221
1222impl ChromaError for ListCollectionVersionsError {
1223    fn code(&self) -> ErrorCodes {
1224        match self {
1225            ListCollectionVersionsError::Internal(err) => err.code(),
1226            ListCollectionVersionsError::NotFound(_) => ErrorCodes::NotFound,
1227        }
1228    }
1229}
1230
1231////////////////////////// Metadata Key Constants //////////////////////////
1232
1233pub const CHROMA_KEY: &str = "chroma:";
1234pub const CHROMA_DOCUMENT_KEY: &str = "chroma:document";
1235pub const CHROMA_URI_KEY: &str = "chroma:uri";
1236
1237////////////////////////// AddCollectionRecords //////////////////////////
1238
1239/// Payload for adding records to a collection.
1240///
1241/// Records are added in batches. All arrays must have the same length, with each index
1242/// representing a single record. For example, `ids[0]`, `embeddings[0]`, `documents[0]`, etc.
1243/// all belong to the same record.
1244#[derive(Serialize, Deserialize, Debug, Clone)]
1245#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1246pub struct AddCollectionRecordsPayload {
1247    /// Unique identifiers for each record.
1248    pub ids: Vec<String>,
1249    /// Embeddings for each record. Can contain the raw f32 arrays or base64 encoded strings.
1250    pub embeddings: EmbeddingsPayload,
1251    pub documents: Option<Vec<Option<String>>>,
1252    pub uris: Option<Vec<Option<String>>>,
1253    pub metadatas: Option<Vec<Option<Metadata>>>,
1254}
1255
1256impl AddCollectionRecordsPayload {
1257    pub fn new(
1258        ids: Vec<String>,
1259        embeddings: Vec<Vec<f32>>,
1260        documents: Option<Vec<Option<String>>>,
1261        uris: Option<Vec<Option<String>>>,
1262        metadatas: Option<Vec<Option<Metadata>>>,
1263    ) -> Self {
1264        Self {
1265            ids,
1266            embeddings: EmbeddingsPayload::JsonArrays(embeddings),
1267            documents,
1268            uris,
1269            metadatas,
1270        }
1271    }
1272}
1273
1274#[non_exhaustive]
1275#[derive(Debug, Clone, Validate, Serialize)]
1276#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1277pub struct AddCollectionRecordsRequest {
1278    pub tenant_id: String,
1279    pub database_name: String,
1280    pub collection_id: CollectionUuid,
1281    pub ids: Vec<String>,
1282    #[validate(custom(function = "validate_embeddings"))]
1283    pub embeddings: Vec<Vec<f32>>,
1284    pub documents: Option<Vec<Option<String>>>,
1285    pub uris: Option<Vec<Option<String>>>,
1286    #[validate(custom(function = "validate_metadata_vec"))]
1287    pub metadatas: Option<Vec<Option<Metadata>>>,
1288}
1289
1290impl AddCollectionRecordsRequest {
1291    #[allow(clippy::too_many_arguments)]
1292    pub fn try_new(
1293        tenant_id: String,
1294        database_name: String,
1295        collection_id: CollectionUuid,
1296        ids: Vec<String>,
1297        embeddings: Vec<Vec<f32>>,
1298        documents: Option<Vec<Option<String>>>,
1299        uris: Option<Vec<Option<String>>>,
1300        metadatas: Option<Vec<Option<Metadata>>>,
1301    ) -> Result<Self, ChromaValidationError> {
1302        let request = Self {
1303            tenant_id,
1304            database_name,
1305            collection_id,
1306            ids,
1307            embeddings,
1308            documents,
1309            uris,
1310            metadatas,
1311        };
1312        request.validate().map_err(ChromaValidationError::from)?;
1313        Ok(request)
1314    }
1315
1316    pub fn into_payload(self) -> AddCollectionRecordsPayload {
1317        AddCollectionRecordsPayload {
1318            ids: self.ids,
1319            embeddings: EmbeddingsPayload::JsonArrays(self.embeddings),
1320            documents: self.documents,
1321            uris: self.uris,
1322            metadatas: self.metadatas,
1323        }
1324    }
1325}
1326
1327fn validate_embeddings(embeddings: &[Vec<f32>]) -> Result<(), ValidationError> {
1328    if embeddings.iter().any(|e| e.is_empty()) {
1329        return Err(ValidationError::new("embedding_minimum_dimensions")
1330            .with_message("Each embedding must have at least 1 dimension".into()));
1331    }
1332    Ok(())
1333}
1334
1335#[derive(Serialize, Default, Deserialize)]
1336#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1337pub struct AddCollectionRecordsResponse {}
1338
1339#[derive(Error, Debug)]
1340pub enum AddCollectionRecordsError {
1341    #[error("Failed to get collection: {0}")]
1342    Collection(#[from] GetCollectionError),
1343    #[error("Backoff and retry")]
1344    Backoff,
1345    #[error("Invalid database name")]
1346    InvalidDatabaseName,
1347    #[error(transparent)]
1348    Other(#[from] Box<dyn ChromaError>),
1349}
1350
1351impl ChromaError for AddCollectionRecordsError {
1352    fn code(&self) -> ErrorCodes {
1353        match self {
1354            AddCollectionRecordsError::Collection(err) => err.code(),
1355            AddCollectionRecordsError::Backoff => ErrorCodes::ResourceExhausted,
1356            AddCollectionRecordsError::InvalidDatabaseName => ErrorCodes::InvalidArgument,
1357            AddCollectionRecordsError::Other(err) => err.code(),
1358        }
1359    }
1360}
1361
1362////////////////////////// UpdateCollectionRecords //////////////////////////
1363
1364/// Payload for updating existing records in a collection.
1365///
1366/// Records are added in batches. All arrays must have the same length, with each index
1367/// representing a single record. For example, `ids[0]`, `embeddings[0]`, `documents[0]`, etc.
1368/// all belong to the same record.
1369#[derive(Deserialize, Debug, Clone, Serialize)]
1370#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1371pub struct UpdateCollectionRecordsPayload {
1372    pub ids: Vec<String>,
1373    /// Updated embeddings for each record. Can contain the raw f32 arrays or base64 encoded strings.
1374    pub embeddings: Option<UpdateEmbeddingsPayload>,
1375    pub documents: Option<Vec<Option<String>>>,
1376    pub uris: Option<Vec<Option<String>>>,
1377    pub metadatas: Option<Vec<Option<UpdateMetadata>>>,
1378}
1379
1380#[non_exhaustive]
1381#[derive(Debug, Clone, Validate, Serialize)]
1382#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1383pub struct UpdateCollectionRecordsRequest {
1384    pub tenant_id: String,
1385    pub database_name: String,
1386    pub collection_id: CollectionUuid,
1387    pub ids: Vec<String>,
1388    pub embeddings: Option<Vec<Option<Vec<f32>>>>,
1389    pub documents: Option<Vec<Option<String>>>,
1390    pub uris: Option<Vec<Option<String>>>,
1391    #[validate(custom(function = "validate_update_metadata_vec"))]
1392    pub metadatas: Option<Vec<Option<UpdateMetadata>>>,
1393}
1394
1395impl UpdateCollectionRecordsRequest {
1396    #[allow(clippy::too_many_arguments)]
1397    pub fn try_new(
1398        tenant_id: String,
1399        database_name: String,
1400        collection_id: CollectionUuid,
1401        ids: Vec<String>,
1402        embeddings: Option<Vec<Option<Vec<f32>>>>,
1403        documents: Option<Vec<Option<String>>>,
1404        uris: Option<Vec<Option<String>>>,
1405        metadatas: Option<Vec<Option<UpdateMetadata>>>,
1406    ) -> Result<Self, ChromaValidationError> {
1407        let request = Self {
1408            tenant_id,
1409            database_name,
1410            collection_id,
1411            ids,
1412            embeddings,
1413            documents,
1414            uris,
1415            metadatas,
1416        };
1417        request.validate().map_err(ChromaValidationError::from)?;
1418        Ok(request)
1419    }
1420
1421    pub fn into_payload(self) -> UpdateCollectionRecordsPayload {
1422        UpdateCollectionRecordsPayload {
1423            ids: self.ids,
1424            embeddings: self.embeddings.map(UpdateEmbeddingsPayload::JsonArrays),
1425            documents: self.documents,
1426            uris: self.uris,
1427            metadatas: self.metadatas,
1428        }
1429    }
1430}
1431
1432#[derive(Serialize, Deserialize)]
1433#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1434pub struct UpdateCollectionRecordsResponse {}
1435
1436#[derive(Error, Debug)]
1437pub enum UpdateCollectionRecordsError {
1438    #[error("Backoff and retry")]
1439    Backoff,
1440    #[error("Invalid database name")]
1441    InvalidDatabaseName,
1442    #[error(transparent)]
1443    Other(#[from] Box<dyn ChromaError>),
1444}
1445
1446impl ChromaError for UpdateCollectionRecordsError {
1447    fn code(&self) -> ErrorCodes {
1448        match self {
1449            UpdateCollectionRecordsError::Backoff => ErrorCodes::ResourceExhausted,
1450            UpdateCollectionRecordsError::InvalidDatabaseName => ErrorCodes::InvalidArgument,
1451            UpdateCollectionRecordsError::Other(err) => err.code(),
1452        }
1453    }
1454}
1455
1456////////////////////////// UpsertCollectionRecords //////////////////////////
1457
1458/// Payload for upserting records in a collection.
1459///
1460/// Upsert creates records if they don't exist, or updates them if they do.
1461/// Records are added in batches. All arrays must have the same length, with each index
1462/// representing a single record. For example, `ids[0]`, `embeddings[0]`, `documents[0]`, etc.
1463/// all belong to the same record.
1464#[derive(Deserialize, Debug, Clone, Serialize)]
1465#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1466pub struct UpsertCollectionRecordsPayload {
1467    pub ids: Vec<String>,
1468    /// Embeddings for each record. Can contain the raw f32 arrays or base64 encoded strings.
1469    pub embeddings: EmbeddingsPayload,
1470    pub documents: Option<Vec<Option<String>>>,
1471    pub uris: Option<Vec<Option<String>>>,
1472    pub metadatas: Option<Vec<Option<UpdateMetadata>>>,
1473}
1474
1475#[non_exhaustive]
1476#[derive(Debug, Clone, Validate, Serialize)]
1477#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1478pub struct UpsertCollectionRecordsRequest {
1479    pub tenant_id: String,
1480    pub database_name: String,
1481    pub collection_id: CollectionUuid,
1482    pub ids: Vec<String>,
1483    #[validate(custom(function = "validate_embeddings"))]
1484    pub embeddings: Vec<Vec<f32>>,
1485    pub documents: Option<Vec<Option<String>>>,
1486    pub uris: Option<Vec<Option<String>>>,
1487    #[validate(custom(function = "validate_update_metadata_vec"))]
1488    pub metadatas: Option<Vec<Option<UpdateMetadata>>>,
1489}
1490
1491impl UpsertCollectionRecordsRequest {
1492    #[allow(clippy::too_many_arguments)]
1493    pub fn try_new(
1494        tenant_id: String,
1495        database_name: String,
1496        collection_id: CollectionUuid,
1497        ids: Vec<String>,
1498        embeddings: Vec<Vec<f32>>,
1499        documents: Option<Vec<Option<String>>>,
1500        uris: Option<Vec<Option<String>>>,
1501        metadatas: Option<Vec<Option<UpdateMetadata>>>,
1502    ) -> Result<Self, ChromaValidationError> {
1503        let request = Self {
1504            tenant_id,
1505            database_name,
1506            collection_id,
1507            ids,
1508            embeddings,
1509            documents,
1510            uris,
1511            metadatas,
1512        };
1513        request.validate().map_err(ChromaValidationError::from)?;
1514        Ok(request)
1515    }
1516
1517    pub fn into_payload(self) -> UpsertCollectionRecordsPayload {
1518        UpsertCollectionRecordsPayload {
1519            ids: self.ids.clone(),
1520            embeddings: EmbeddingsPayload::JsonArrays(self.embeddings.clone()),
1521            documents: self.documents.clone(),
1522            uris: self.uris.clone(),
1523            metadatas: self.metadatas.clone(),
1524        }
1525    }
1526}
1527
1528#[derive(Serialize, Deserialize)]
1529#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1530pub struct UpsertCollectionRecordsResponse {}
1531
1532#[derive(Error, Debug)]
1533pub enum UpsertCollectionRecordsError {
1534    #[error("Backoff and retry")]
1535    Backoff,
1536    #[error("Invalid database name")]
1537    InvalidDatabaseName,
1538    #[error(transparent)]
1539    Other(#[from] Box<dyn ChromaError>),
1540}
1541
1542impl ChromaError for UpsertCollectionRecordsError {
1543    fn code(&self) -> ErrorCodes {
1544        match self {
1545            UpsertCollectionRecordsError::Backoff => ErrorCodes::ResourceExhausted,
1546            UpsertCollectionRecordsError::InvalidDatabaseName => ErrorCodes::InvalidArgument,
1547            UpsertCollectionRecordsError::Other(err) => err.code(),
1548        }
1549    }
1550}
1551
1552////////////////////////// DeleteCollectionRecords //////////////////////////
1553
1554/// Payload for deleting records from a collection.
1555///
1556/// Records can be deleted by their IDs or by a metadata filter. At least one of `ids` or `where`
1557/// must be provided.
1558#[derive(Deserialize, Debug, Clone, Serialize)]
1559#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1560pub struct DeleteCollectionRecordsPayload {
1561    pub ids: Option<Vec<String>>,
1562    #[serde(default)]
1563    pub limit: Option<u32>,
1564    #[serde(flatten)]
1565    pub where_fields: RawWhereFields,
1566}
1567
1568#[non_exhaustive]
1569#[derive(Debug, Clone, Validate, Serialize)]
1570#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1571pub struct DeleteCollectionRecordsRequest {
1572    pub tenant_id: String,
1573    pub database_name: String,
1574    pub collection_id: CollectionUuid,
1575    pub ids: Option<Vec<String>>,
1576    pub r#where: Option<Where>,
1577    pub limit: Option<u32>,
1578}
1579
1580impl DeleteCollectionRecordsRequest {
1581    pub fn try_new(
1582        tenant_id: String,
1583        database_name: String,
1584        collection_id: CollectionUuid,
1585        ids: Option<Vec<String>>,
1586        r#where: Option<Where>,
1587        limit: Option<u32>,
1588    ) -> Result<Self, ChromaValidationError> {
1589        if ids.as_ref().map(|ids| ids.is_empty()).unwrap_or(false) && r#where.is_none() {
1590            return Err(ChromaValidationError::from((
1591                ("ids, where"),
1592                ValidationError::new("filter")
1593                    .with_message("Either ids or where must be specified".into()),
1594            )));
1595        }
1596
1597        if limit.is_some() && r#where.is_none() {
1598            return Err(ChromaValidationError::from((
1599                ("limit, where"),
1600                ValidationError::new("limit").with_message(
1601                    "limit can only be specified when a where clause is provided".into(),
1602                ),
1603            )));
1604        }
1605
1606        let request = Self {
1607            tenant_id,
1608            database_name,
1609            collection_id,
1610            ids,
1611            r#where,
1612            limit,
1613        };
1614        request.validate().map_err(ChromaValidationError::from)?;
1615        Ok(request)
1616    }
1617
1618    pub fn into_payload(self) -> Result<DeleteCollectionRecordsPayload, WhereError> {
1619        let where_fields = if let Some(r#where) = self.r#where.as_ref() {
1620            RawWhereFields::from_json_str(Some(&serde_json::to_string(r#where)?), None)?
1621        } else {
1622            RawWhereFields::default()
1623        };
1624        Ok(DeleteCollectionRecordsPayload {
1625            ids: self.ids.clone(),
1626            limit: self.limit,
1627            where_fields,
1628        })
1629    }
1630}
1631
1632#[derive(Serialize, Deserialize)]
1633#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1634pub struct DeleteCollectionRecordsResponse {
1635    #[serde(default)]
1636    pub deleted: u32,
1637}
1638
1639#[derive(Error, Debug)]
1640pub enum DeleteCollectionRecordsError {
1641    #[error("Failed to resolve records for deletion: {0}")]
1642    Get(#[from] ExecutorError),
1643    #[error("Backoff and retry")]
1644    Backoff,
1645    #[error("Invalid database name")]
1646    InvalidDatabaseName,
1647    #[error(transparent)]
1648    Internal(#[from] Box<dyn ChromaError>),
1649}
1650
1651impl ChromaError for DeleteCollectionRecordsError {
1652    fn code(&self) -> ErrorCodes {
1653        match self {
1654            DeleteCollectionRecordsError::Get(err) => err.code(),
1655            DeleteCollectionRecordsError::Backoff => ErrorCodes::ResourceExhausted,
1656            DeleteCollectionRecordsError::InvalidDatabaseName => ErrorCodes::InvalidArgument,
1657            DeleteCollectionRecordsError::Internal(err) => err.code(),
1658        }
1659    }
1660}
1661
1662////////////////////////// Include //////////////////////////
1663
1664#[derive(Error, Debug)]
1665#[error("Invalid include value: {0}")]
1666pub struct IncludeParsingError(String);
1667
1668impl ChromaError for IncludeParsingError {
1669    fn code(&self) -> ErrorCodes {
1670        ErrorCodes::InvalidArgument
1671    }
1672}
1673
1674/// Use this enum to specify which fields should be returned when retrieving records.
1675#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1676#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1677pub enum Include {
1678    #[serde(rename = "distances")]
1679    Distance,
1680    #[serde(rename = "documents")]
1681    Document,
1682    #[serde(rename = "embeddings")]
1683    Embedding,
1684    #[serde(rename = "metadatas")]
1685    Metadata,
1686    #[serde(rename = "uris")]
1687    Uri,
1688}
1689
1690impl TryFrom<&str> for Include {
1691    type Error = IncludeParsingError;
1692
1693    fn try_from(value: &str) -> Result<Self, Self::Error> {
1694        match value {
1695            "distances" => Ok(Include::Distance),
1696            "documents" => Ok(Include::Document),
1697            "embeddings" => Ok(Include::Embedding),
1698            "metadatas" => Ok(Include::Metadata),
1699            "uris" => Ok(Include::Uri),
1700            _ => Err(IncludeParsingError(value.to_string())),
1701        }
1702    }
1703}
1704
1705#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
1706#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1707#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
1708pub struct IncludeList(pub Vec<Include>);
1709
1710impl IncludeList {
1711    pub fn empty() -> Self {
1712        Self(Vec::new())
1713    }
1714
1715    pub fn default_query() -> Self {
1716        Self(vec![
1717            Include::Document,
1718            Include::Metadata,
1719            Include::Distance,
1720        ])
1721    }
1722    pub fn default_get() -> Self {
1723        Self(vec![Include::Document, Include::Metadata])
1724    }
1725    pub fn all() -> Self {
1726        Self(vec![
1727            Include::Document,
1728            Include::Metadata,
1729            Include::Distance,
1730            Include::Embedding,
1731            Include::Uri,
1732        ])
1733    }
1734}
1735
1736impl TryFrom<Vec<String>> for IncludeList {
1737    type Error = IncludeParsingError;
1738
1739    fn try_from(value: Vec<String>) -> Result<Self, Self::Error> {
1740        let mut includes = Vec::new();
1741        for v in value {
1742            // "data" is only used by single node Chroma
1743            if v == "data" {
1744                includes.push(Include::Metadata);
1745                continue;
1746            }
1747
1748            includes.push(Include::try_from(v.as_str())?);
1749        }
1750        Ok(IncludeList(includes))
1751    }
1752}
1753
1754////////////////////////// Count //////////////////////////
1755
1756#[non_exhaustive]
1757#[derive(Clone, Deserialize, Serialize, Validate)]
1758pub struct CountRequest {
1759    pub tenant_id: String,
1760    pub database_name: String,
1761    pub collection_id: CollectionUuid,
1762    #[serde(default)]
1763    pub read_level: ReadLevel,
1764}
1765
1766impl CountRequest {
1767    pub fn try_new(
1768        tenant_id: String,
1769        database_name: String,
1770        collection_id: CollectionUuid,
1771        read_level: ReadLevel,
1772    ) -> Result<Self, ChromaValidationError> {
1773        let request = Self {
1774            tenant_id,
1775            database_name,
1776            collection_id,
1777            read_level,
1778        };
1779        request.validate().map_err(ChromaValidationError::from)?;
1780        Ok(request)
1781    }
1782}
1783
1784pub type CountResponse = u32;
1785
1786//////////////////////// Payload Err ////////////////////
1787
1788#[derive(Debug, thiserror::Error)]
1789pub enum WhereError {
1790    #[error("serialization: {0}")]
1791    Serialization(#[from] serde_json::Error),
1792    #[error("validation: {0}")]
1793    Validation(#[from] WhereValidationError),
1794}
1795
1796////////////////////////// Get //////////////////////////
1797
1798/// Records can be retrieved by their IDs or by a metadata filter. At least one of `ids` or `where`
1799/// must be provided. Use `include` to specify which fields to return in the response.
1800#[derive(Debug, Clone, Deserialize, Serialize)]
1801#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1802pub struct GetRequestPayload {
1803    pub ids: Option<Vec<String>>,
1804    #[serde(flatten)]
1805    pub where_fields: RawWhereFields,
1806    pub limit: Option<u32>,
1807    pub offset: Option<u32>,
1808    #[serde(default = "IncludeList::default_get")]
1809    pub include: IncludeList,
1810}
1811
1812#[non_exhaustive]
1813#[derive(Debug, Clone, Validate, Serialize)]
1814#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1815pub struct GetRequest {
1816    pub tenant_id: String,
1817    pub database_name: String,
1818    pub collection_id: CollectionUuid,
1819    pub ids: Option<Vec<String>>,
1820    pub r#where: Option<Where>,
1821    pub limit: Option<u32>,
1822    pub offset: u32,
1823    pub include: IncludeList,
1824}
1825
1826impl GetRequest {
1827    #[allow(clippy::too_many_arguments)]
1828    pub fn try_new(
1829        tenant_id: String,
1830        database_name: String,
1831        collection_id: CollectionUuid,
1832        ids: Option<Vec<String>>,
1833        r#where: Option<Where>,
1834        limit: Option<u32>,
1835        offset: u32,
1836        include: IncludeList,
1837    ) -> Result<Self, ChromaValidationError> {
1838        let request = Self {
1839            tenant_id,
1840            database_name,
1841            collection_id,
1842            ids,
1843            r#where,
1844            limit,
1845            offset,
1846            include,
1847        };
1848        request.validate().map_err(ChromaValidationError::from)?;
1849        Ok(request)
1850    }
1851
1852    pub fn into_payload(self) -> Result<GetRequestPayload, WhereError> {
1853        let where_fields = if let Some(r#where) = self.r#where.as_ref() {
1854            RawWhereFields::from_json_str(Some(&serde_json::to_string(r#where)?), None)?
1855        } else {
1856            RawWhereFields::default()
1857        };
1858        Ok(GetRequestPayload {
1859            ids: self.ids,
1860            where_fields,
1861            limit: self.limit,
1862            offset: Some(self.offset),
1863            include: self.include,
1864        })
1865    }
1866}
1867
1868/// All arrays have the same length, with each index representing a single record.
1869/// Only fields specified in the request's `include` parameter are populated.
1870#[derive(Clone, Deserialize, Serialize, Debug, Default)]
1871#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1872#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
1873pub struct GetResponse {
1874    pub ids: Vec<String>,
1875    pub embeddings: Option<Vec<Vec<f32>>>,
1876    pub documents: Option<Vec<Option<String>>>,
1877    pub uris: Option<Vec<Option<String>>>,
1878    pub metadatas: Option<Vec<Option<Metadata>>>,
1879    /// List of fields that were included in this response.
1880    pub include: Vec<Include>,
1881}
1882
1883impl GetResponse {
1884    pub fn sort_by_ids(&mut self) {
1885        let mut indices: Vec<usize> = (0..self.ids.len()).collect();
1886        indices.sort_by(|&a, &b| self.ids[a].cmp(&self.ids[b]));
1887
1888        let sorted_ids = indices.iter().map(|&i| self.ids[i].clone()).collect();
1889        self.ids = sorted_ids;
1890
1891        if let Some(ref mut embeddings) = self.embeddings {
1892            let sorted_embeddings = indices.iter().map(|&i| embeddings[i].clone()).collect();
1893            *embeddings = sorted_embeddings;
1894        }
1895
1896        if let Some(ref mut documents) = self.documents {
1897            let sorted_docs = indices.iter().map(|&i| documents[i].clone()).collect();
1898            *documents = sorted_docs;
1899        }
1900
1901        if let Some(ref mut uris) = self.uris {
1902            let sorted_uris = indices.iter().map(|&i| uris[i].clone()).collect();
1903            *uris = sorted_uris;
1904        }
1905
1906        if let Some(ref mut metadatas) = self.metadatas {
1907            let sorted_metas = indices.iter().map(|&i| metadatas[i].clone()).collect();
1908            *metadatas = sorted_metas;
1909        }
1910    }
1911}
1912
1913#[cfg(feature = "pyo3")]
1914#[pyo3::pymethods]
1915impl GetResponse {
1916    #[getter]
1917    pub fn ids(&self) -> &Vec<String> {
1918        &self.ids
1919    }
1920
1921    #[getter]
1922    pub fn embeddings(&self) -> Option<Vec<Vec<f32>>> {
1923        self.embeddings.clone()
1924    }
1925
1926    #[getter]
1927    pub fn documents(&self) -> Option<Vec<Option<String>>> {
1928        self.documents.clone()
1929    }
1930
1931    #[getter]
1932    pub fn uris(&self) -> Option<Vec<Option<String>>> {
1933        self.uris.clone()
1934    }
1935
1936    #[getter]
1937    pub fn metadatas(&self) -> Option<Vec<Option<Metadata>>> {
1938        self.metadatas.clone()
1939    }
1940}
1941
1942impl From<(GetResult, IncludeList)> for GetResponse {
1943    fn from((result, IncludeList(include_vec)): (GetResult, IncludeList)) -> Self {
1944        let mut res = Self {
1945            ids: Vec::new(),
1946            embeddings: include_vec
1947                .contains(&Include::Embedding)
1948                .then_some(Vec::new()),
1949            documents: include_vec
1950                .contains(&Include::Document)
1951                .then_some(Vec::new()),
1952            uris: include_vec.contains(&Include::Uri).then_some(Vec::new()),
1953            metadatas: include_vec
1954                .contains(&Include::Metadata)
1955                .then_some(Vec::new()),
1956            include: include_vec,
1957        };
1958        for ProjectionRecord {
1959            id,
1960            document,
1961            embedding,
1962            mut metadata,
1963        } in result.result.records
1964        {
1965            res.ids.push(id);
1966            if let (Some(emb), Some(embeddings)) = (embedding, res.embeddings.as_mut()) {
1967                embeddings.push(emb);
1968            }
1969            if let Some(documents) = res.documents.as_mut() {
1970                documents.push(document);
1971            }
1972            let uri = metadata.as_mut().and_then(|meta| {
1973                meta.remove(CHROMA_URI_KEY).and_then(|v| {
1974                    if let crate::MetadataValue::Str(uri) = v {
1975                        Some(uri)
1976                    } else {
1977                        None
1978                    }
1979                })
1980            });
1981            if let Some(uris) = res.uris.as_mut() {
1982                uris.push(uri);
1983            }
1984
1985            let metadata = metadata.map(|m| {
1986                m.into_iter()
1987                    .filter(|(k, _)| !k.starts_with(CHROMA_KEY))
1988                    .collect()
1989            });
1990            if let Some(metadatas) = res.metadatas.as_mut() {
1991                metadatas.push(metadata);
1992            }
1993        }
1994        res
1995    }
1996}
1997
1998////////////////////////// Query //////////////////////////
1999
2000#[derive(Deserialize, Debug, Clone, Serialize)]
2001#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2002pub struct QueryRequestPayload {
2003    pub ids: Option<Vec<String>>,
2004    #[serde(flatten)]
2005    pub where_fields: RawWhereFields,
2006    pub query_embeddings: Vec<Vec<f32>>,
2007    pub n_results: Option<u32>,
2008    #[serde(default = "IncludeList::default_query")]
2009    pub include: IncludeList,
2010}
2011
2012#[non_exhaustive]
2013#[derive(Debug, Clone, Validate, Serialize)]
2014#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2015pub struct QueryRequest {
2016    pub tenant_id: String,
2017    pub database_name: String,
2018    pub collection_id: CollectionUuid,
2019    pub ids: Option<Vec<String>>,
2020    pub r#where: Option<Where>,
2021    pub embeddings: Vec<Vec<f32>>,
2022    pub n_results: u32,
2023    pub include: IncludeList,
2024}
2025
2026impl QueryRequest {
2027    #[allow(clippy::too_many_arguments)]
2028    pub fn try_new(
2029        tenant_id: String,
2030        database_name: String,
2031        collection_id: CollectionUuid,
2032        ids: Option<Vec<String>>,
2033        r#where: Option<Where>,
2034        embeddings: Vec<Vec<f32>>,
2035        n_results: u32,
2036        include: IncludeList,
2037    ) -> Result<Self, ChromaValidationError> {
2038        let request = Self {
2039            tenant_id,
2040            database_name,
2041            collection_id,
2042            ids,
2043            r#where,
2044            embeddings,
2045            n_results,
2046            include,
2047        };
2048        request.validate().map_err(ChromaValidationError::from)?;
2049        Ok(request)
2050    }
2051
2052    pub fn into_payload(self) -> Result<QueryRequestPayload, WhereError> {
2053        let where_fields = if let Some(r#where) = self.r#where.as_ref() {
2054            RawWhereFields::from_json_str(Some(&serde_json::to_string(r#where)?), None)?
2055        } else {
2056            RawWhereFields::default()
2057        };
2058        Ok(QueryRequestPayload {
2059            ids: self.ids,
2060            where_fields,
2061            query_embeddings: self.embeddings,
2062            n_results: Some(self.n_results),
2063            include: self.include,
2064        })
2065    }
2066}
2067
2068#[derive(Clone, Deserialize, Serialize, Debug)]
2069#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2070#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
2071pub struct QueryResponse {
2072    pub ids: Vec<Vec<String>>,
2073    pub embeddings: Option<Vec<Vec<Option<Vec<f32>>>>>,
2074    pub documents: Option<Vec<Vec<Option<String>>>>,
2075    pub uris: Option<Vec<Vec<Option<String>>>>,
2076    pub metadatas: Option<Vec<Vec<Option<Metadata>>>>,
2077    pub distances: Option<Vec<Vec<Option<f32>>>>,
2078    pub include: Vec<Include>,
2079}
2080
2081impl QueryResponse {
2082    pub fn sort_by_ids(&mut self) {
2083        fn reorder<T: Clone>(v: &mut [T], indices: &[usize]) {
2084            let old = v.to_owned();
2085            for (new_pos, &i) in indices.iter().enumerate() {
2086                v[new_pos] = old[i].clone();
2087            }
2088        }
2089
2090        for i in 0..self.ids.len() {
2091            let mut indices: Vec<usize> = (0..self.ids[i].len()).collect();
2092
2093            indices.sort_unstable_by(|&a, &b| self.ids[i][a].cmp(&self.ids[i][b]));
2094
2095            reorder(&mut self.ids[i], &indices);
2096
2097            if let Some(embeddings) = &mut self.embeddings {
2098                reorder(&mut embeddings[i], &indices);
2099            }
2100
2101            if let Some(documents) = &mut self.documents {
2102                reorder(&mut documents[i], &indices);
2103            }
2104
2105            if let Some(uris) = &mut self.uris {
2106                reorder(&mut uris[i], &indices);
2107            }
2108
2109            if let Some(metadatas) = &mut self.metadatas {
2110                reorder(&mut metadatas[i], &indices);
2111            }
2112
2113            if let Some(distances) = &mut self.distances {
2114                reorder(&mut distances[i], &indices);
2115            }
2116        }
2117    }
2118}
2119
2120#[cfg(feature = "pyo3")]
2121#[pyo3::pymethods]
2122impl QueryResponse {
2123    #[getter]
2124    pub fn ids(&self) -> &Vec<Vec<String>> {
2125        &self.ids
2126    }
2127
2128    #[getter]
2129    pub fn embeddings(&self) -> Option<Vec<Vec<Option<Vec<f32>>>>> {
2130        self.embeddings.clone()
2131    }
2132
2133    #[getter]
2134    pub fn documents(&self) -> Option<Vec<Vec<Option<String>>>> {
2135        self.documents.clone()
2136    }
2137
2138    #[getter]
2139    pub fn uris(&self) -> Option<Vec<Vec<Option<String>>>> {
2140        self.uris.clone()
2141    }
2142
2143    #[getter]
2144    pub fn metadatas(&self) -> Option<Vec<Vec<Option<Metadata>>>> {
2145        self.metadatas.clone()
2146    }
2147
2148    #[getter]
2149    pub fn distances(&self) -> Option<Vec<Vec<Option<f32>>>> {
2150        self.distances.clone()
2151    }
2152}
2153
2154impl From<(KnnBatchResult, IncludeList)> for QueryResponse {
2155    fn from((result, IncludeList(include_vec)): (KnnBatchResult, IncludeList)) -> Self {
2156        let mut res = Self {
2157            ids: Vec::new(),
2158            embeddings: include_vec
2159                .contains(&Include::Embedding)
2160                .then_some(Vec::new()),
2161            documents: include_vec
2162                .contains(&Include::Document)
2163                .then_some(Vec::new()),
2164            uris: include_vec.contains(&Include::Uri).then_some(Vec::new()),
2165            metadatas: include_vec
2166                .contains(&Include::Metadata)
2167                .then_some(Vec::new()),
2168            distances: include_vec
2169                .contains(&Include::Distance)
2170                .then_some(Vec::new()),
2171            include: include_vec,
2172        };
2173        for query_result in result.results {
2174            let mut ids = Vec::new();
2175            let mut embeddings = Vec::new();
2176            let mut documents = Vec::new();
2177            let mut uris = Vec::new();
2178            let mut metadatas = Vec::new();
2179            let mut distances = Vec::new();
2180            for KnnProjectionRecord {
2181                record:
2182                    ProjectionRecord {
2183                        id,
2184                        document,
2185                        embedding,
2186                        mut metadata,
2187                    },
2188                distance,
2189            } in query_result.records
2190            {
2191                ids.push(id);
2192                embeddings.push(embedding);
2193                documents.push(document);
2194
2195                let uri = metadata.as_mut().and_then(|meta| {
2196                    meta.remove(CHROMA_URI_KEY).and_then(|v| {
2197                        if let crate::MetadataValue::Str(uri) = v {
2198                            Some(uri)
2199                        } else {
2200                            None
2201                        }
2202                    })
2203                });
2204                uris.push(uri);
2205
2206                let metadata = metadata.map(|m| {
2207                    m.into_iter()
2208                        .filter(|(k, _)| !k.starts_with(CHROMA_KEY))
2209                        .collect()
2210                });
2211                metadatas.push(metadata);
2212
2213                distances.push(distance);
2214            }
2215            res.ids.push(ids);
2216
2217            if let Some(res_embs) = res.embeddings.as_mut() {
2218                res_embs.push(embeddings);
2219            }
2220            if let Some(res_docs) = res.documents.as_mut() {
2221                res_docs.push(documents);
2222            }
2223            if let Some(res_uri) = res.uris.as_mut() {
2224                res_uri.push(uris);
2225            }
2226            if let Some(res_metas) = res.metadatas.as_mut() {
2227                res_metas.push(metadatas);
2228            }
2229            if let Some(res_dists) = res.distances.as_mut() {
2230                res_dists.push(distances);
2231            }
2232        }
2233        res
2234    }
2235}
2236
2237#[derive(Debug, Clone, Deserialize, Serialize)]
2238#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2239pub struct SearchRequestPayload {
2240    pub searches: Vec<SearchPayload>,
2241    /// Specifies whether to include unindexed data in the search results.
2242    #[serde(default)]
2243    pub read_level: ReadLevel,
2244}
2245
2246#[non_exhaustive]
2247#[derive(Clone, Debug, Serialize, Validate)]
2248#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2249pub struct SearchRequest {
2250    pub tenant_id: String,
2251    pub database_name: String,
2252    pub collection_id: CollectionUuid,
2253    #[validate(nested)]
2254    pub searches: Vec<SearchPayload>,
2255    /// Specifies the read level for consistency vs performance tradeoffs.
2256    pub read_level: ReadLevel,
2257}
2258
2259impl SearchRequest {
2260    pub fn try_new(
2261        tenant_id: String,
2262        database_name: String,
2263        collection_id: CollectionUuid,
2264        searches: Vec<SearchPayload>,
2265        read_level: ReadLevel,
2266    ) -> Result<Self, ChromaValidationError> {
2267        let request = Self {
2268            tenant_id,
2269            database_name,
2270            collection_id,
2271            searches,
2272            read_level,
2273        };
2274        request.validate().map_err(ChromaValidationError::from)?;
2275        Ok(request)
2276    }
2277
2278    pub fn into_payload(self) -> SearchRequestPayload {
2279        SearchRequestPayload {
2280            searches: self.searches,
2281            read_level: self.read_level,
2282        }
2283    }
2284}
2285
2286#[derive(Clone, Deserialize, Serialize, Debug)]
2287#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2288pub struct SearchResponse {
2289    pub ids: Vec<Vec<String>>,
2290    pub documents: Vec<Option<Vec<Option<String>>>>,
2291    pub embeddings: Vec<Option<Vec<Option<Vec<f32>>>>>,
2292    pub metadatas: Vec<Option<Vec<Option<Metadata>>>>,
2293    pub scores: Vec<Option<Vec<Option<f32>>>>,
2294    pub select: Vec<Vec<Key>>,
2295}
2296
2297impl From<(SearchResult, Vec<SearchPayload>)> for SearchResponse {
2298    fn from((result, payloads): (SearchResult, Vec<SearchPayload>)) -> Self {
2299        let num_payloads = payloads.len();
2300        let mut res = Self {
2301            ids: Vec::with_capacity(num_payloads),
2302            documents: Vec::with_capacity(num_payloads),
2303            embeddings: Vec::with_capacity(num_payloads),
2304            metadatas: Vec::with_capacity(num_payloads),
2305            scores: Vec::with_capacity(num_payloads),
2306            select: Vec::with_capacity(num_payloads),
2307        };
2308
2309        for (payload_result, payload) in result.results.into_iter().zip(payloads) {
2310            // Get the sorted keys for this payload
2311            let mut payload_select = Vec::from_iter(payload.select.keys.iter().cloned());
2312            payload_select.sort();
2313
2314            let num_records = payload_result.records.len();
2315            let mut ids = Vec::with_capacity(num_records);
2316            let mut documents = Vec::with_capacity(num_records);
2317            let mut embeddings = Vec::with_capacity(num_records);
2318            let mut metadatas = Vec::with_capacity(num_records);
2319            let mut scores = Vec::with_capacity(num_records);
2320
2321            for record in payload_result.records {
2322                ids.push(record.id);
2323                documents.push(record.document);
2324                embeddings.push(record.embedding);
2325                metadatas.push(record.metadata);
2326                scores.push(record.score);
2327            }
2328
2329            res.ids.push(ids);
2330            res.select.push(payload_select.clone());
2331
2332            // Push documents if requested by this payload, otherwise None
2333            res.documents.push(
2334                payload_select
2335                    .binary_search(&Key::Document)
2336                    .is_ok()
2337                    .then_some(documents),
2338            );
2339
2340            // Push embeddings if requested by this payload, otherwise None
2341            res.embeddings.push(
2342                payload_select
2343                    .binary_search(&Key::Embedding)
2344                    .is_ok()
2345                    .then_some(embeddings),
2346            );
2347
2348            // Push metadatas if requested by this payload, otherwise None
2349            // Include if either Key::Metadata is present or any Key::MetadataField(_)
2350            let has_metadata = payload_select.binary_search(&Key::Metadata).is_ok()
2351                || payload_select
2352                    .last()
2353                    .is_some_and(|field| matches!(field, Key::MetadataField(_)));
2354            res.metadatas.push(has_metadata.then_some(metadatas));
2355
2356            // Push scores if requested by this payload, otherwise None
2357            res.scores.push(
2358                payload_select
2359                    .binary_search(&Key::Score)
2360                    .is_ok()
2361                    .then_some(scores),
2362            );
2363        }
2364
2365        res
2366    }
2367}
2368
2369#[derive(Error, Debug)]
2370pub enum QueryError {
2371    #[error("Error executing plan: {0}")]
2372    Executor(#[from] ExecutorError),
2373    #[error(transparent)]
2374    Other(#[from] Box<dyn ChromaError>),
2375}
2376
2377impl ChromaError for QueryError {
2378    fn code(&self) -> ErrorCodes {
2379        match self {
2380            QueryError::Executor(e) => e.code(),
2381            QueryError::Other(err) => err.code(),
2382        }
2383    }
2384}
2385
2386#[derive(Serialize)]
2387#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2388pub struct HealthCheckResponse {
2389    pub is_executor_ready: bool,
2390    pub is_log_client_ready: bool,
2391}
2392
2393impl HealthCheckResponse {
2394    pub fn get_status_code(&self) -> tonic::Code {
2395        if self.is_executor_ready && self.is_log_client_ready {
2396            tonic::Code::Ok
2397        } else {
2398            tonic::Code::Unavailable
2399        }
2400    }
2401}
2402
2403#[derive(Debug, Error)]
2404pub enum ExecutorError {
2405    #[error("Error converting: {0}")]
2406    Conversion(#[from] QueryConversionError),
2407    #[error("Error converting plan to proto: {0}")]
2408    PlanToProto(#[from] PlanToProtoError),
2409    #[error(transparent)]
2410    Grpc(#[from] Status),
2411    #[error("Inconsistent data")]
2412    InconsistentData,
2413    #[error("Collection is missing HNSW configuration")]
2414    CollectionMissingHnswConfiguration,
2415    #[error("Internal error: {0}")]
2416    Internal(Box<dyn ChromaError>),
2417    #[error("Error sending backfill request to compactor: {0}")]
2418    BackfillError(Box<dyn ChromaError>),
2419    #[error("Not implemented: {0}")]
2420    NotImplemented(String),
2421}
2422
2423impl ChromaError for ExecutorError {
2424    fn code(&self) -> ErrorCodes {
2425        match self {
2426            ExecutorError::Conversion(_) => ErrorCodes::InvalidArgument,
2427            ExecutorError::PlanToProto(_) => ErrorCodes::Internal,
2428            ExecutorError::Grpc(e) => e.code().into(),
2429            ExecutorError::InconsistentData => ErrorCodes::Internal,
2430            ExecutorError::CollectionMissingHnswConfiguration => ErrorCodes::Internal,
2431            ExecutorError::Internal(e) => e.code(),
2432            ExecutorError::BackfillError(e) => e.code(),
2433            ExecutorError::NotImplemented(_) => ErrorCodes::Unimplemented,
2434        }
2435    }
2436}
2437
2438//////////////////////////  Attached Function Operations //////////////////////////
2439
2440#[non_exhaustive]
2441#[derive(Clone, Debug, Deserialize, Serialize, Validate)]
2442#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2443pub struct AttachFunctionRequest {
2444    #[validate(length(min = 1))]
2445    pub name: String,
2446    pub function_id: String,
2447    pub output_collection: String,
2448    #[serde(default = "default_empty_json_object")]
2449    pub params: serde_json::Value,
2450}
2451
2452fn default_empty_json_object() -> serde_json::Value {
2453    serde_json::json!({})
2454}
2455
2456impl AttachFunctionRequest {
2457    pub fn try_new(
2458        name: String,
2459        function_id: String,
2460        output_collection: String,
2461        params: serde_json::Value,
2462    ) -> Result<Self, ChromaValidationError> {
2463        let request = Self {
2464            name,
2465            function_id,
2466            output_collection,
2467            params,
2468        };
2469        request.validate().map_err(ChromaValidationError::from)?;
2470        Ok(request)
2471    }
2472}
2473
2474#[derive(Clone, Debug, Serialize)]
2475#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2476pub struct AttachedFunctionInfo {
2477    /// Unique identifier for the attached function.
2478    pub id: String,
2479    /// Human-readable name for the attached function instance.
2480    pub name: String,
2481    /// Name of the function (e.g., "record_counter", "statistics").
2482    pub function_name: String,
2483}
2484
2485#[derive(Clone, Debug, Serialize)]
2486#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2487pub struct AttachFunctionResponse {
2488    pub attached_function: AttachedFunctionInfo,
2489    /// True if newly created, false if already existed (idempotent request).
2490    pub created: bool,
2491}
2492
2493/// API response struct for attached function with function_name instead of function_id
2494#[derive(Clone, Debug, Serialize)]
2495#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2496pub struct AttachedFunctionApiResponse {
2497    /// Unique identifier for the attached function
2498    pub id: AttachedFunctionUuid,
2499    /// Human-readable name for the attached function instance
2500    pub name: String,
2501    /// Name of the function (e.g., "record_counter", "statistics")
2502    pub function_name: String,
2503    /// Source collection that triggers the attached function
2504    pub input_collection_id: CollectionUuid,
2505    /// Name of target collection where attached function output is stored
2506    #[serde(rename = "output_collection")]
2507    pub output_collection_name: String,
2508    /// ID of the output collection (lazily filled in after creation)
2509    pub output_collection_id: Option<CollectionUuid>,
2510    /// Optional JSON parameters for the function
2511    pub params: Option<String>,
2512    /// Tenant name this attached function belongs to
2513    pub tenant_id: String,
2514    /// Database name this attached function belongs to
2515    pub database_id: String,
2516    /// Completion offset: the WAL position up to which the attached function has processed records
2517    pub completion_offset: u64,
2518    /// Minimum number of new records required before the attached function runs again
2519    pub min_records_for_invocation: u64,
2520}
2521
2522impl AttachedFunctionApiResponse {
2523    /// Convert an AttachedFunction to the API response format, mapping function_id UUID to function_name
2524    pub fn from_attached_function(af: AttachedFunction) -> Result<Self, GetAttachedFunctionError> {
2525        let function_name = match af.function_id {
2526            id if id == FUNCTION_RECORD_COUNTER_ID => FUNCTION_RECORD_COUNTER_NAME.to_string(),
2527            id if id == FUNCTION_STATISTICS_ID => FUNCTION_STATISTICS_NAME.to_string(),
2528            _ => {
2529                return Err(GetAttachedFunctionError::UnknownFunctionId(af.function_id));
2530            }
2531        };
2532
2533        Ok(Self {
2534            id: af.id,
2535            name: af.name,
2536            function_name,
2537            input_collection_id: af.input_collection_id,
2538            output_collection_name: af.output_collection_name,
2539            output_collection_id: af.output_collection_id,
2540            params: af.params,
2541            tenant_id: af.tenant_id,
2542            database_id: af.database_id,
2543            completion_offset: af.completion_offset,
2544            min_records_for_invocation: af.min_records_for_invocation,
2545        })
2546    }
2547}
2548
2549#[derive(Clone, Debug, Serialize)]
2550#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2551pub struct GetAttachedFunctionResponse {
2552    pub attached_function: AttachedFunctionApiResponse,
2553}
2554
2555#[derive(Error, Debug)]
2556pub enum AttachFunctionError {
2557    #[error("{0}")]
2558    AlreadyExists(String),
2559    #[error("{0}")]
2560    CollectionAlreadyHasFunction(String),
2561    #[error("Failed to get collection and segments")]
2562    GetCollectionError(#[from] GetCollectionError),
2563    #[error("Input collection [{0}] does not exist")]
2564    InputCollectionNotFound(String),
2565    #[error("Output collection [{0}] already exists")]
2566    OutputCollectionExists(String),
2567    #[error("{0}")]
2568    InvalidArgument(String),
2569    #[error("{0}")]
2570    FunctionNotFound(String),
2571    #[error(transparent)]
2572    Validation(#[from] ChromaValidationError),
2573    #[error(transparent)]
2574    FinishCreate(#[from] crate::FinishCreateAttachedFunctionError),
2575    #[error(transparent)]
2576    Internal(#[from] Box<dyn ChromaError>),
2577}
2578
2579impl ChromaError for AttachFunctionError {
2580    fn code(&self) -> ErrorCodes {
2581        match self {
2582            AttachFunctionError::AlreadyExists(_) => ErrorCodes::AlreadyExists,
2583            AttachFunctionError::CollectionAlreadyHasFunction(_) => ErrorCodes::FailedPrecondition,
2584            AttachFunctionError::GetCollectionError(err) => err.code(),
2585            AttachFunctionError::InputCollectionNotFound(_) => ErrorCodes::NotFound,
2586            AttachFunctionError::OutputCollectionExists(_) => ErrorCodes::AlreadyExists,
2587            AttachFunctionError::InvalidArgument(_) => ErrorCodes::InvalidArgument,
2588            AttachFunctionError::FunctionNotFound(_) => ErrorCodes::NotFound,
2589            AttachFunctionError::Validation(err) => err.code(),
2590            AttachFunctionError::FinishCreate(err) => err.code(),
2591            AttachFunctionError::Internal(err) => err.code(),
2592        }
2593    }
2594}
2595
2596#[derive(Error, Debug)]
2597pub enum GetAttachedFunctionError {
2598    #[error("Attached Function not found")]
2599    NotFound(String),
2600    #[error("Unknown function ID [{0}]. Function may not be registered in the system.")]
2601    UnknownFunctionId(Uuid),
2602    #[error(transparent)]
2603    Internal(#[from] Box<dyn ChromaError>),
2604}
2605
2606impl ChromaError for GetAttachedFunctionError {
2607    fn code(&self) -> ErrorCodes {
2608        match self {
2609            GetAttachedFunctionError::NotFound(_) => ErrorCodes::NotFound,
2610            GetAttachedFunctionError::UnknownFunctionId(_) => ErrorCodes::Internal,
2611            GetAttachedFunctionError::Internal(err) => err.code(),
2612        }
2613    }
2614}
2615
2616#[non_exhaustive]
2617#[derive(Clone, Debug, Deserialize, Validate, Serialize)]
2618#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2619pub struct DetachFunctionRequest {
2620    /// Whether to delete the output collection as well when detaching the function.
2621    #[serde(default)]
2622    pub delete_output: bool,
2623}
2624
2625impl DetachFunctionRequest {
2626    pub fn try_new(delete_output: bool) -> Result<Self, ChromaValidationError> {
2627        let request = Self { delete_output };
2628        request.validate().map_err(ChromaValidationError::from)?;
2629        Ok(request)
2630    }
2631}
2632
2633#[derive(Clone, Debug, Serialize)]
2634#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2635pub struct DetachFunctionResponse {
2636    pub success: bool,
2637}
2638
2639#[derive(Error, Debug)]
2640pub enum DetachFunctionError {
2641    #[error(" Attached Function with ID [{0}] does not exist")]
2642    NotFound(String),
2643    #[error(transparent)]
2644    Validation(#[from] ChromaValidationError),
2645    #[error(transparent)]
2646    Internal(#[from] Box<dyn ChromaError>),
2647}
2648
2649impl ChromaError for DetachFunctionError {
2650    fn code(&self) -> ErrorCodes {
2651        match self {
2652            DetachFunctionError::NotFound(_) => ErrorCodes::NotFound,
2653            DetachFunctionError::Validation(err) => err.code(),
2654            DetachFunctionError::Internal(err) => err.code(),
2655        }
2656    }
2657}
2658
2659#[cfg(test)]
2660mod test {
2661    use super::*;
2662    use crate::{MetadataValue, SparseVector, UpdateMetadataValue};
2663    use std::collections::HashMap;
2664
2665    #[test]
2666    fn test_create_database_min_length() {
2667        // DatabaseName requires at least 3 characters
2668        assert!(DatabaseName::new("a").is_none());
2669        assert!(DatabaseName::new("ab").is_none());
2670        assert!(DatabaseName::new("abc").is_some());
2671    }
2672
2673    #[test]
2674    fn test_create_tenant_min_length() {
2675        let request = CreateTenantRequest::try_new("a".to_string());
2676        assert!(request.is_err());
2677    }
2678
2679    #[test]
2680    fn test_add_request_validates_sparse_vectors() {
2681        let mut metadata = HashMap::new();
2682        // Add unsorted sparse vector - should fail validation
2683        metadata.insert(
2684            "sparse".to_string(),
2685            MetadataValue::SparseVector(
2686                SparseVector::new(vec![3, 1, 2], vec![0.3, 0.1, 0.2]).unwrap(),
2687            ),
2688        );
2689
2690        let result = AddCollectionRecordsRequest::try_new(
2691            "tenant".to_string(),
2692            "database".to_string(),
2693            CollectionUuid(uuid::Uuid::new_v4()),
2694            vec!["id1".to_string()],
2695            vec![vec![0.1, 0.2]],
2696            None,
2697            None,
2698            Some(vec![Some(metadata)]),
2699        );
2700
2701        // Should fail because sparse vector is not sorted
2702        assert!(result.is_err());
2703    }
2704
2705    #[test]
2706    fn test_update_request_validates_sparse_vectors() {
2707        let mut metadata = HashMap::new();
2708        // Add unsorted sparse vector - should fail validation
2709        metadata.insert(
2710            "sparse".to_string(),
2711            UpdateMetadataValue::SparseVector(
2712                SparseVector::new(vec![3, 1, 2], vec![0.3, 0.1, 0.2]).unwrap(),
2713            ),
2714        );
2715
2716        let result = UpdateCollectionRecordsRequest::try_new(
2717            "tenant".to_string(),
2718            "database".to_string(),
2719            CollectionUuid(uuid::Uuid::new_v4()),
2720            vec!["id1".to_string()],
2721            None,
2722            None,
2723            None,
2724            Some(vec![Some(metadata)]),
2725        );
2726
2727        // Should fail because sparse vector is not sorted
2728        assert!(result.is_err());
2729    }
2730
2731    #[test]
2732    fn test_upsert_request_validates_sparse_vectors() {
2733        let mut metadata = HashMap::new();
2734        // Add unsorted sparse vector - should fail validation
2735        metadata.insert(
2736            "sparse".to_string(),
2737            UpdateMetadataValue::SparseVector(
2738                SparseVector::new(vec![3, 1, 2], vec![0.3, 0.1, 0.2]).unwrap(),
2739            ),
2740        );
2741
2742        let result = UpsertCollectionRecordsRequest::try_new(
2743            "tenant".to_string(),
2744            "database".to_string(),
2745            CollectionUuid(uuid::Uuid::new_v4()),
2746            vec!["id1".to_string()],
2747            vec![vec![0.1, 0.2]],
2748            None,
2749            None,
2750            Some(vec![Some(metadata)]),
2751        );
2752
2753        // Should fail because sparse vector is not sorted
2754        assert!(result.is_err());
2755    }
2756}