1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::collections::{BTreeMap, HashMap, HashSet};
4use std::fmt::{self, Display, Formatter};
5use std::hash::{self, Hash, Hasher};
6use std::mem;
7use std::ops::Deref;
8use std::rc::Rc;
9use std::str::FromStr;
10use std::sync::Arc;
11
12use ahash::AHashSet;
13use bytemuck::{Pod, Zeroable};
14use crate::common::stable_hash::StableHash;
15use crate::common::types::{PointOffsetType, ScoreType};
16use ecow::EcoString;
17use fnv::FnvBuildHasher;
18use geo::{Contains, Coord, Distance as GeoDistance, Haversine, LineString, Point, Polygon};
19use indexmap::IndexSet;
20use itertools::Itertools;
21use num_derive::FromPrimitive;
22use ordered_float::OrderedFloat;
23use schemars::JsonSchema;
24use serde::{Deserialize, Deserializer, Serialize};
25use serde_json::{Map, Value};
26use strum::{EnumIter, EnumString};
27use uuid::Uuid;
28use validator::{Validate, ValidationError, ValidationErrors};
29use zerocopy::native_endian::U64;
30
31use crate::segment::common::anonymize::Anonymize;
32use crate::segment::common::operation_error::{OperationError, OperationResult};
33use crate::segment::common::utils::{self, MaybeOneOrMany, MultiValue};
34use crate::segment::data_types::index::{
35 BoolIndexParams, DatetimeIndexParams, FloatIndexParams, GeoIndexParams, IntegerIndexParams,
36 KeywordIndexParams, TextIndexParams, UuidIndexParams,
37};
38use crate::segment::data_types::modifier::Modifier;
39use crate::segment::data_types::order_by::OrderValue;
40use crate::segment::data_types::primitive::PrimitiveVectorElement;
41use crate::segment::data_types::tiny_map::TinyMap;
42use crate::segment::data_types::vectors::{DenseVector, VectorStructInternal};
43use crate::segment::index::field_index::CardinalityEstimation;
44use crate::segment::index::sparse_index::sparse_index_config::SparseIndexConfig;
45use crate::segment::json_path::JsonPath;
46use crate::segment::spaces::metric::{Metric, MetricPostProcessing};
47use crate::segment::spaces::simple::{CosineMetric, DotProductMetric, EuclidMetric, ManhattanMetric};
48use crate::segment::types::utils::unordered_hash_unique;
49use crate::segment::utils::maybe_arc::MaybeArc;
50
51pub type PayloadKeyType = JsonPath;
52pub type PayloadKeyTypeRef<'a> = &'a JsonPath;
53pub type SeqNumberType = u64;
55pub type FloatPayloadType = f64;
57pub type IntPayloadType = i64;
59pub type DateTimePayloadType = DateTimeWrapper;
61pub type UuidPayloadType = Uuid;
63pub type UuidIntType = u128;
65pub type VectorName = str;
67pub type VectorNameBuf = String;
69
70#[derive(Clone, Copy, Serialize, JsonSchema, Debug, PartialEq, Eq, PartialOrd, Hash)]
72#[serde(transparent)]
73pub struct DateTimeWrapper(pub chrono::DateTime<chrono::Utc>);
74
75impl DateTimeWrapper {
76 pub fn timestamp(&self) -> i64 {
78 self.0.timestamp_micros()
79 }
80
81 pub fn from_timestamp(ts: i64) -> Option<Self> {
82 Some(Self(chrono::DateTime::from_timestamp_micros(ts)?))
83 }
84}
85
86impl<'de> Deserialize<'de> for DateTimePayloadType {
87 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
94 where
95 D: Deserializer<'de>,
96 {
97 let str_datetime: Cow<'de, str> = Cow::deserialize(deserializer)?;
98
99 match DateTimePayloadType::from_str(str_datetime.as_ref()) {
100 Ok(datetime) => Ok(datetime),
101 Err(_) => Err(serde::de::Error::custom(format!(
102 "'{str_datetime}' does not match accepted datetime format (RFC3339). Example: 2014-01-01T00:00:00Z"
103 ))),
104 }
105 }
106}
107
108impl FromStr for DateTimePayloadType {
109 type Err = chrono::ParseError;
110
111 fn from_str(s: &str) -> Result<Self, Self::Err> {
112 if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(s)
114 .or_else(|_| chrono::DateTime::from_str(s))
116 .or_else(|_| chrono::DateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f%#z"))
120 .or_else(|_| chrono::DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%#z"))
121 .map(|dt| chrono::DateTime::<chrono::Utc>::from(dt).into())
122 {
123 return Ok(datetime);
124 }
125
126 let datetime = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f")
133 .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f"))
134 .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M"))
135 .or_else(|_| chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").map(Into::into))?;
136
137 let datetime_utc = datetime.and_utc().into();
139 Ok(datetime_utc)
140 }
141}
142
143impl Display for DateTimePayloadType {
144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145 write!(f, "{}", self.0)
146 }
147}
148
149impl From<chrono::DateTime<chrono::Utc>> for DateTimePayloadType {
150 fn from(dt: chrono::DateTime<chrono::Utc>) -> Self {
151 DateTimeWrapper(dt)
152 }
153}
154
155fn id_num_example() -> u64 {
156 42
157}
158
159fn id_uuid_example() -> String {
160 "550e8400-e29b-41d4-a716-446655440000".to_string()
161}
162
163#[derive(Debug, Serialize, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, JsonSchema)]
165#[serde(untagged)]
166pub enum ExtendedPointId {
167 #[schemars(example = "id_num_example")]
168 NumId(u64),
169 #[schemars(example = "id_uuid_example")]
170 Uuid(Uuid),
171}
172
173impl StableHash for ExtendedPointId {
174 fn stable_hash<W: FnMut(&[u8])>(&self, write: &mut W) {
175 match self {
176 ExtendedPointId::NumId(num) => {
177 0u64.stable_hash(write); num.stable_hash(write);
179 }
180 ExtendedPointId::Uuid(uuid) => {
181 1u64.stable_hash(write); uuid.as_bytes().len().stable_hash(write); write(uuid.as_bytes());
185 }
186 }
187 }
188}
189
190impl ExtendedPointId {
191 #[cfg(any(test, feature = "testing"))]
192 pub fn as_u64(&self) -> u64 {
193 match self {
194 ExtendedPointId::NumId(num) => *num,
195 ExtendedPointId::Uuid(_) => panic!("Cannot convert UUID to u64"),
196 }
197 }
198
199 pub fn is_num_id(&self) -> bool {
200 matches!(self, ExtendedPointId::NumId(..))
201 }
202
203 pub fn is_uuid(&self) -> bool {
204 matches!(self, ExtendedPointId::Uuid(..))
205 }
206}
207
208impl std::fmt::Display for ExtendedPointId {
209 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
210 match self {
211 ExtendedPointId::NumId(idx) => write!(f, "{idx}"),
212 ExtendedPointId::Uuid(uuid) => write!(f, "{uuid}"),
213 }
214 }
215}
216
217impl From<u64> for ExtendedPointId {
218 fn from(idx: u64) -> Self {
219 ExtendedPointId::NumId(idx)
220 }
221}
222
223impl FromStr for ExtendedPointId {
224 type Err = ();
225
226 fn from_str(s: &str) -> Result<Self, Self::Err> {
227 let try_num: Result<u64, _> = s.parse();
228 if let Ok(num) = try_num {
229 return Ok(Self::NumId(num));
230 }
231 let try_uuid = Uuid::from_str(s);
232 if let Ok(uuid) = try_uuid {
233 return Ok(Self::Uuid(uuid));
234 }
235 Err(())
236 }
237}
238
239impl<'de> serde::Deserialize<'de> for ExtendedPointId {
240 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
241 where
242 D: serde::Deserializer<'de>,
243 {
244 let value = serde_value::Value::deserialize(deserializer)?;
245
246 if let Ok(num) = value.clone().deserialize_into() {
247 return Ok(ExtendedPointId::NumId(num));
248 }
249
250 if let Ok(uuid) = value.clone().deserialize_into() {
251 return Ok(ExtendedPointId::Uuid(uuid));
252 }
253
254 let value = crate::segment::utils::fmt::SerdeValue(&value);
255
256 Err(serde::de::Error::custom(format!(
257 "value {value} is not a valid point ID, \
258 valid values are either an unsigned integer or a UUID",
259 )))
260 }
261}
262
263pub type PointIdType = ExtendedPointId;
265
266#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
269pub enum CompactExtendedPointId {
270 NumId(U64),
271 Uuid(Uuid),
272}
273
274impl From<ExtendedPointId> for CompactExtendedPointId {
275 fn from(id: ExtendedPointId) -> Self {
276 match id {
277 ExtendedPointId::NumId(num) => CompactExtendedPointId::NumId(U64::new(num)),
278 ExtendedPointId::Uuid(uuid) => CompactExtendedPointId::Uuid(uuid),
279 }
280 }
281}
282
283impl From<CompactExtendedPointId> for ExtendedPointId {
284 fn from(id: CompactExtendedPointId) -> Self {
285 match id {
286 CompactExtendedPointId::NumId(num) => ExtendedPointId::NumId(num.get()),
287 CompactExtendedPointId::Uuid(uuid) => ExtendedPointId::Uuid(uuid),
288 }
289 }
290}
291
292#[derive(
294 Debug,
295 Deserialize,
296 Serialize,
297 JsonSchema,
298
299 Clone,
300 Copy,
301 FromPrimitive,
302 PartialEq,
303 Eq,
304 Hash,
305 EnumString,
306 EnumIter,
307)]
308pub enum Distance {
310 Cosine,
312 Euclid,
314 Dot,
316 Manhattan,
318}
319
320impl Distance {
321 pub fn postprocess_score(&self, score: ScoreType) -> ScoreType {
322 match self {
323 Distance::Cosine => CosineMetric::postprocess(score),
324 Distance::Euclid => EuclidMetric::postprocess(score),
325 Distance::Dot => DotProductMetric::postprocess(score),
326 Distance::Manhattan => ManhattanMetric::postprocess(score),
327 }
328 }
329
330 pub fn preprocess_vector<T: PrimitiveVectorElement>(&self, vector: DenseVector) -> DenseVector
331 where
332 CosineMetric: Metric<T>,
333 EuclidMetric: Metric<T>,
334 DotProductMetric: Metric<T>,
335 ManhattanMetric: Metric<T>,
336 {
337 match self {
338 Distance::Cosine => CosineMetric::preprocess(vector),
339 Distance::Euclid => EuclidMetric::preprocess(vector),
340 Distance::Dot => DotProductMetric::preprocess(vector),
341 Distance::Manhattan => ManhattanMetric::preprocess(vector),
342 }
343 }
344
345 pub fn distance_order(&self) -> Order {
346 match self {
347 Distance::Cosine | Distance::Dot => Order::LargeBetter,
348 Distance::Euclid | Distance::Manhattan => Order::SmallBetter,
349 }
350 }
351
352 pub fn is_ordered(&self, left: ScoreType, right: ScoreType) -> bool {
353 match self.distance_order() {
354 Order::LargeBetter => left >= right,
355 Order::SmallBetter => left <= right,
356 }
357 }
358
359 pub fn check_threshold(&self, score: ScoreType, threshold: ScoreType) -> bool {
361 match self.distance_order() {
362 Order::LargeBetter => score > threshold,
363 Order::SmallBetter => score < threshold,
364 }
365 }
366}
367
368#[derive(Debug, PartialEq, Clone, Copy)]
369pub enum Order {
370 LargeBetter,
371 SmallBetter,
372}
373
374#[derive(Clone, Debug)]
376pub struct ScoredPoint {
377 pub id: PointIdType,
379 pub version: SeqNumberType,
381 pub score: ScoreType,
383 pub payload: Option<Payload>,
385 pub vector: Option<VectorStructInternal>,
387 pub shard_key: Option<ShardKey>,
389 pub order_value: Option<OrderValue>,
391}
392
393impl Eq for ScoredPoint {}
394
395impl Ord for ScoredPoint {
396 fn cmp(&self, other: &Self) -> Ordering {
398 match (&self.order_value, &other.order_value) {
399 (None, None) => OrderedFloat(self.score).cmp(&OrderedFloat(other.score)),
400 (Some(_), None) => Ordering::Greater,
401 (None, Some(_)) => Ordering::Less,
402 (Some(self_order), Some(other_order)) => self_order.cmp(other_order),
403 }
404 }
405}
406
407impl PartialOrd for ScoredPoint {
408 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
409 Some(self.cmp(other))
410 }
411}
412
413impl PartialEq for ScoredPoint {
414 fn eq(&self, other: &Self) -> bool {
415 (self.id, &self.score) == (other.id, &other.score)
416 }
417}
418
419#[derive(Debug, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq)]
421#[serde(rename_all = "snake_case")]
422pub enum SegmentType {
423 Plain,
425 Indexed,
427 Special,
429}
430
431#[derive(Debug, Serialize, JsonSchema, Clone, PartialEq, Eq)]
433#[serde(rename_all = "snake_case")]
434pub struct PayloadIndexInfo {
435 pub data_type: PayloadSchemaType,
436 #[serde(skip_serializing_if = "Option::is_none")]
437 pub params: Option<PayloadSchemaParams>,
438 pub points: usize,
440}
441
442impl PayloadIndexInfo {
443 pub fn new(field_type: PayloadFieldSchema, points_count: usize) -> Self {
444 match field_type {
445 PayloadFieldSchema::FieldType(data_type) => PayloadIndexInfo {
446 data_type,
447 params: None,
448 points: points_count,
449 },
450 PayloadFieldSchema::FieldParams(schema_params) => PayloadIndexInfo {
451 data_type: schema_params.kind(),
452 params: Some(schema_params),
453 points: points_count,
454 },
455 }
456 }
457}
458
459#[derive(Debug, Serialize, JsonSchema, Clone, PartialEq, Eq)]
460#[serde(rename_all = "snake_case")]
461pub struct VectorDataInfo {
462 pub num_vectors: usize,
463 pub num_indexed_vectors: usize,
464 pub num_deleted_vectors: usize,
465}
466
467#[derive(Debug, Serialize, JsonSchema, Clone, PartialEq, Eq)]
469#[serde(rename_all = "snake_case")]
470pub struct SegmentInfo {
471 pub uuid: Uuid,
472 pub segment_type: SegmentType,
473 pub num_vectors: usize,
474 pub num_points: usize,
475 pub num_deferred_points: Option<usize>,
476 pub num_deleted_deferred_points: Option<usize>,
477 pub num_indexed_vectors: usize,
478 pub num_deleted_vectors: usize,
479 pub vectors_size_bytes: usize,
482 pub payloads_size_bytes: usize,
484 pub ram_usage_bytes: usize,
485 pub disk_usage_bytes: usize,
486 pub is_appendable: bool,
487 pub index_schema: HashMap<PayloadKeyType, PayloadIndexInfo>,
488 pub vector_data: HashMap<String, VectorDataInfo>,
489 #[serde(skip_serializing_if = "Option::is_none")]
492
493 pub deferred_internal_id: Option<PointOffsetType>,
494}
495
496#[derive(Debug, Default)]
497pub struct SizeStats {
498 pub num_vectors: usize,
499 pub num_vectors_by_name: TinyMap<VectorNameBuf, usize>,
500 pub vectors_size_bytes: usize,
501 pub payloads_size_bytes: usize,
502 pub num_points: usize,
503}
504
505#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, Copy, PartialEq, Default)]
507#[serde(rename_all = "snake_case")]
508pub struct QuantizationSearchParams {
509 #[serde(default = "default_quantization_ignore_value")]
511 pub ignore: bool,
512
513 #[serde(default)]
517 #[serde(skip_serializing_if = "Option::is_none")]
518 pub rescore: Option<bool>,
519
520 #[serde(default = "default_quantization_oversampling_value")]
528 #[validate(range(min = 1.0))]
529 #[serde(skip_serializing_if = "Option::is_none")]
530 pub oversampling: Option<f64>,
531}
532
533impl Hash for QuantizationSearchParams {
534 fn hash<H: Hasher>(&self, state: &mut H) {
535 let Self {
536 ignore,
537 rescore,
538 oversampling,
539 } = self;
540 ignore.hash(state);
541 rescore.hash(state);
542 oversampling.map(OrderedFloat).hash(state);
543 }
544}
545
546pub const fn default_quantization_ignore_value() -> bool {
547 false
548}
549
550pub const fn default_quantization_oversampling_value() -> Option<f64> {
551 None
552}
553
554pub const ACORN_MAX_SELECTIVITY_DEFAULT: f64 = 0.4;
558
559#[derive(
561 Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, Copy, PartialEq, Default, Hash,
562)]
563#[serde(rename_all = "snake_case")]
564pub struct AcornSearchParams {
565 #[serde(default)]
571 pub enable: bool,
572
573 #[serde(default)]
581 #[serde(skip_serializing_if = "Option::is_none")]
582 #[validate(range(min = 0.0, max = 1.0))]
583 pub max_selectivity: Option<OrderedFloat<f64>>,
584}
585
586#[derive(
588 Debug, Deserialize, Serialize, JsonSchema, Validate, Copy, Clone, PartialEq, Default, Hash,
589)]
590#[serde(rename_all = "snake_case")]
591pub struct SearchParams {
592 #[serde(skip_serializing_if = "Option::is_none")]
595 pub hnsw_ef: Option<usize>,
596
597 #[serde(default)]
599 pub exact: bool,
600
601 #[serde(default)]
603 #[validate(nested)]
604 #[serde(skip_serializing_if = "Option::is_none")]
605 pub quantization: Option<QuantizationSearchParams>,
606
607 #[serde(default)]
611 pub indexed_only: bool,
612
613 #[serde(default)]
615 #[validate(nested)]
616 #[serde(skip_serializing_if = "Option::is_none")]
617 pub acorn: Option<AcornSearchParams>,
618}
619
620#[derive(Debug, Deserialize, Validate, Clone, PartialEq, Eq)]
622pub struct VectorsConfigDefaults {
623 #[serde(default)]
624 pub on_disk: Option<bool>,
625}
626
627#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
629#[serde(rename_all = "snake_case")]
630#[serde(tag = "type", content = "options")]
631pub enum Indexes {
632 Plain {},
635 Hnsw(HnswConfig),
638}
639
640impl Indexes {
641 pub fn is_indexed(&self) -> bool {
642 match self {
643 Indexes::Plain {} => false,
644 Indexes::Hnsw(_) => true,
645 }
646 }
647
648 pub fn is_on_disk(&self) -> bool {
649 match self {
650 Indexes::Plain {} => false,
651 Indexes::Hnsw(config) => config.on_disk.unwrap_or_default(),
652 }
653 }
654}
655
656#[derive(
658 Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Validate,
659)]
660#[serde(rename_all = "snake_case")]
661
662pub struct HnswConfig {
663 pub m: usize,
665 #[validate(range(min = 4))]
667 pub ef_construct: usize,
668 #[serde(alias = "full_scan_threshold_kb")]
675 pub full_scan_threshold: usize,
676 #[serde(default = "default_max_indexing_threads")]
681 pub max_indexing_threads: usize,
682 #[serde(default, skip_serializing_if = "Option::is_none")] pub on_disk: Option<bool>,
685 #[serde(default, skip_serializing_if = "Option::is_none")] pub payload_m: Option<usize>,
688 #[serde(default, skip_serializing_if = "Option::is_none")]
693 pub inline_storage: Option<bool>,
694}
695
696impl HnswConfig {
697 pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
706 let HnswConfig {
707 m,
708 ef_construct,
709 full_scan_threshold,
710 max_indexing_threads: _,
711 payload_m,
712 on_disk,
713 inline_storage,
714 } = *self;
715
716 m != other.m
717 || ef_construct != other.ef_construct
718 || full_scan_threshold != other.full_scan_threshold
719 || payload_m != other.payload_m
720 || on_disk != other.on_disk
724 || inline_storage != other.inline_storage
725 }
726}
727
728#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone)]
729#[serde(rename_all = "snake_case", default)]
730
731pub struct HnswGlobalConfig {
732 #[validate(range(min = 0.0, max = 1.0))]
735 pub healing_threshold: f64,
736}
737
738impl Default for HnswGlobalConfig {
739 fn default() -> Self {
740 Self {
741 healing_threshold: 0.3,
742 }
743 }
744}
745
746const fn default_max_indexing_threads() -> usize {
747 0
748}
749
750#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq, Hash)]
751#[serde(rename_all = "lowercase")]
752pub enum CompressionRatio {
753 X4,
754 X8,
755 X16,
756 X32,
757 X64,
758}
759
760#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema)]
761#[serde(rename_all = "lowercase")]
762pub enum ScalarType {
763 #[default]
764 Int8,
765}
766
767#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate)]
768#[serde(rename_all = "snake_case")]
769pub struct ScalarQuantizationConfig {
770 pub r#type: ScalarType,
773 #[serde(skip_serializing_if = "Option::is_none")]
775 #[validate(range(min = 0.5, max = 1.0))]
776 pub quantile: Option<f32>,
777 #[serde(skip_serializing_if = "Option::is_none")]
779 pub always_ram: Option<bool>,
780}
781
782impl ScalarQuantizationConfig {
783 pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
789 self != other
790 }
791}
792
793#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
794pub struct ScalarQuantization {
795 #[validate(nested)]
796 pub scalar: ScalarQuantizationConfig,
797}
798
799#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
800#[serde(rename_all = "snake_case")]
801pub struct ProductQuantizationConfig {
802 pub compression: CompressionRatio,
803
804 #[serde(skip_serializing_if = "Option::is_none")]
805 pub always_ram: Option<bool>,
806}
807
808impl ProductQuantizationConfig {
809 pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
815 self != other
816 }
817}
818
819#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
820pub struct ProductQuantization {
821 #[validate(nested)]
822 pub product: ProductQuantizationConfig,
823}
824
825impl Hash for ScalarQuantizationConfig {
826 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
827 self.always_ram.hash(state);
828 self.r#type.hash(state);
829 }
830}
831
832impl Eq for ScalarQuantizationConfig {}
833
834#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq, Hash, Default)]
835#[serde(rename_all = "snake_case")]
836pub enum BinaryQuantizationEncoding {
837 #[default]
838 OneBit,
839 TwoBits,
840 OneAndHalfBits,
841}
842
843impl BinaryQuantizationEncoding {
844 pub fn is_one_bit(&self) -> bool {
845 matches!(self, BinaryQuantizationEncoding::OneBit)
846 }
847}
848
849#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
850#[serde(rename_all = "snake_case")]
851pub struct BinaryQuantizationConfig {
852 #[serde(skip_serializing_if = "Option::is_none")]
853 pub always_ram: Option<bool>,
854 #[serde(default)]
855 #[serde(skip_serializing_if = "Option::is_none")]
856 pub encoding: Option<BinaryQuantizationEncoding>,
857
858 #[serde(default)]
861 #[serde(skip_serializing_if = "Option::is_none")]
862 pub query_encoding: Option<BinaryQuantizationQueryEncoding>,
863}
864
865#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
866pub struct BinaryQuantization {
867 #[validate(nested)]
868 pub binary: BinaryQuantizationConfig,
869}
870
871#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, )]
872#[serde(untagged, rename_all = "snake_case")]
873
874pub enum QuantizationConfig {
875 Scalar(ScalarQuantization),
876 Product(ProductQuantization),
877 Binary(BinaryQuantization),
878}
879
880impl QuantizationConfig {
881 pub fn for_appendable_segment(opt: Option<&Self>) -> Option<Self> {
884 let appendable = crate::common::flags::feature_flags().appendable_quantization;
885 opt.filter(|q| appendable && q.supports_appendable())
886 .cloned()
887 }
888
889 pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
895 self != other
896 }
897
898 pub fn supports_appendable(&self) -> bool {
899 matches!(self, QuantizationConfig::Binary(_))
900 }
901}
902
903impl Validate for QuantizationConfig {
904 fn validate(&self) -> Result<(), ValidationErrors> {
905 match self {
906 QuantizationConfig::Scalar(scalar) => scalar.validate(),
907 QuantizationConfig::Product(product) => product.validate(),
908 QuantizationConfig::Binary(binary) => binary.validate(),
909 }
910 }
911}
912
913#[derive(
914 Default, Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq, Hash,
915)]
916#[serde(rename_all = "lowercase")]
917
918pub enum BinaryQuantizationQueryEncoding {
919 #[default]
920 Default,
921 Binary,
922 Scalar4Bits,
923 Scalar8Bits,
924}
925
926impl From<ScalarQuantizationConfig> for QuantizationConfig {
927 fn from(config: ScalarQuantizationConfig) -> Self {
928 QuantizationConfig::Scalar(ScalarQuantization { scalar: config })
929 }
930}
931
932impl From<ProductQuantizationConfig> for QuantizationConfig {
933 fn from(config: ProductQuantizationConfig) -> Self {
934 QuantizationConfig::Product(ProductQuantization { product: config })
935 }
936}
937
938impl From<BinaryQuantizationConfig> for QuantizationConfig {
939 fn from(config: BinaryQuantizationConfig) -> Self {
940 QuantizationConfig::Binary(BinaryQuantization { binary: config })
941 }
942}
943
944#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
945pub struct StrictModeSparse {
946 #[serde(skip_serializing_if = "Option::is_none")]
948 #[validate(range(min = 1))]
949 pub max_length: Option<usize>,
950}
951
952#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
953#[schemars(deny_unknown_fields)]
954pub struct StrictModeSparseConfig {
955 #[validate(nested)]
956 #[serde(flatten)]
957 pub config: BTreeMap<VectorNameBuf, StrictModeSparse>,
958}
959
960#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Default)]
961#[schemars(deny_unknown_fields)]
962pub struct StrictModeSparseConfigOutput {
963 #[serde(flatten)]
964 pub config: BTreeMap<VectorNameBuf, StrictModeSparseOutput>,
965}
966
967#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Default)]
968pub struct StrictModeSparseOutput {
969 #[serde(skip_serializing_if = "Option::is_none")]
971
972 pub max_length: Option<usize>,
973}
974
975impl From<StrictModeSparseConfig> for StrictModeSparseConfigOutput {
976 fn from(config: StrictModeSparseConfig) -> Self {
977 let StrictModeSparseConfig { config } = config;
978 let mut new_config = StrictModeSparseConfigOutput::default();
979 for (key, value) in config {
980 new_config
981 .config
982 .insert(key, StrictModeSparseOutput::from(value));
983 }
984 new_config
985 }
986}
987
988impl From<StrictModeSparse> for StrictModeSparseOutput {
989 fn from(config: StrictModeSparse) -> Self {
990 let StrictModeSparse { max_length } = config;
991 StrictModeSparseOutput { max_length }
992 }
993}
994
995#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
996pub struct StrictModeMultivector {
997 #[serde(skip_serializing_if = "Option::is_none")]
999 #[validate(range(min = 1))]
1000 pub max_vectors: Option<usize>,
1001}
1002
1003#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
1004#[schemars(deny_unknown_fields)]
1005pub struct StrictModeMultivectorConfig {
1006 #[validate(nested)]
1007 #[serde(flatten)]
1008 pub config: BTreeMap<VectorNameBuf, StrictModeMultivector>,
1009}
1010
1011#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Default)]
1012#[schemars(deny_unknown_fields)]
1013pub struct StrictModeMultivectorConfigOutput {
1014 #[serde(flatten)]
1015 pub config: BTreeMap<VectorNameBuf, StrictModeMultivectorOutput>,
1016}
1017
1018impl From<StrictModeMultivectorConfig> for StrictModeMultivectorConfigOutput {
1019 fn from(config: StrictModeMultivectorConfig) -> Self {
1020 let StrictModeMultivectorConfig { config } = config;
1021 let mut new_config = StrictModeMultivectorConfigOutput::default();
1022 for (key, value) in config {
1023 new_config
1024 .config
1025 .insert(key, StrictModeMultivectorOutput::from(value));
1026 }
1027 new_config
1028 }
1029}
1030
1031#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Default)]
1032pub struct StrictModeMultivectorOutput {
1033 #[serde(skip_serializing_if = "Option::is_none")]
1035
1036 pub max_vectors: Option<usize>,
1037}
1038
1039impl From<StrictModeMultivector> for StrictModeMultivectorOutput {
1040 fn from(config: StrictModeMultivector) -> Self {
1041 let StrictModeMultivector { max_vectors } = config;
1042 StrictModeMultivectorOutput { max_vectors }
1043 }
1044}
1045
1046#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default)]
1047pub struct StrictModeConfig {
1048 #[serde(skip_serializing_if = "Option::is_none")]
1051 pub enabled: Option<bool>,
1052
1053 #[serde(skip_serializing_if = "Option::is_none")]
1055 #[validate(range(min = 1))]
1056 pub max_query_limit: Option<usize>,
1057
1058 #[serde(skip_serializing_if = "Option::is_none")]
1060 #[validate(range(min = 1))]
1061 pub max_timeout: Option<usize>,
1062
1063 #[serde(skip_serializing_if = "Option::is_none")]
1065 pub unindexed_filtering_retrieve: Option<bool>,
1066
1067 #[serde(skip_serializing_if = "Option::is_none")]
1069 pub unindexed_filtering_update: Option<bool>,
1070
1071 #[serde(skip_serializing_if = "Option::is_none")]
1074 pub search_max_hnsw_ef: Option<usize>,
1075
1076 #[serde(skip_serializing_if = "Option::is_none")]
1078 pub search_allow_exact: Option<bool>,
1079
1080 #[serde(skip_serializing_if = "Option::is_none")]
1082 pub search_max_oversampling: Option<f64>,
1083
1084 #[serde(skip_serializing_if = "Option::is_none")]
1086 pub upsert_max_batchsize: Option<usize>,
1087
1088 #[serde(skip_serializing_if = "Option::is_none")]
1090 pub search_max_batchsize: Option<usize>,
1091
1092 #[serde(skip_serializing_if = "Option::is_none")]
1094 pub max_collection_vector_size_bytes: Option<usize>,
1095
1096 #[serde(skip_serializing_if = "Option::is_none")]
1098 #[validate(range(min = 1))]
1099 pub read_rate_limit: Option<usize>,
1100
1101 #[serde(skip_serializing_if = "Option::is_none")]
1103 #[validate(range(min = 1))]
1104 pub write_rate_limit: Option<usize>,
1105
1106 #[serde(skip_serializing_if = "Option::is_none")]
1108 pub max_collection_payload_size_bytes: Option<usize>,
1109
1110 #[serde(skip_serializing_if = "Option::is_none")]
1112 #[validate(range(min = 1))]
1113 pub max_points_count: Option<usize>,
1114
1115 #[serde(skip_serializing_if = "Option::is_none")]
1117 pub filter_max_conditions: Option<usize>,
1118
1119 #[serde(skip_serializing_if = "Option::is_none")]
1121 pub condition_max_size: Option<usize>,
1122
1123 #[serde(skip_serializing_if = "Option::is_none")]
1125 #[validate(nested)]
1126 pub multivector_config: Option<StrictModeMultivectorConfig>,
1127
1128 #[serde(skip_serializing_if = "Option::is_none")]
1130 #[validate(nested)]
1131 pub sparse_config: Option<StrictModeSparseConfig>,
1132
1133 #[serde(skip_serializing_if = "Option::is_none")]
1135 #[validate(range(min = 0))]
1136 pub max_payload_index_count: Option<usize>,
1137}
1138
1139impl Eq for StrictModeConfig {}
1140
1141impl Hash for StrictModeConfig {
1142 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1143 let Self {
1144 enabled,
1145 max_query_limit,
1146 max_timeout,
1147 unindexed_filtering_retrieve,
1148 unindexed_filtering_update,
1149 search_max_hnsw_ef,
1150 search_allow_exact,
1151 search_max_oversampling: _,
1153 upsert_max_batchsize,
1154 search_max_batchsize,
1155 max_collection_vector_size_bytes,
1156 read_rate_limit,
1157 write_rate_limit,
1158 max_collection_payload_size_bytes,
1159 max_points_count,
1160 filter_max_conditions,
1161 condition_max_size,
1162 multivector_config,
1163 sparse_config,
1164 max_payload_index_count,
1165 } = self;
1166 enabled.hash(state);
1167 max_query_limit.hash(state);
1168 max_timeout.hash(state);
1169 unindexed_filtering_retrieve.hash(state);
1170 unindexed_filtering_update.hash(state);
1171 search_max_hnsw_ef.hash(state);
1172 search_allow_exact.hash(state);
1173 upsert_max_batchsize.hash(state);
1174 search_max_batchsize.hash(state);
1175 max_collection_vector_size_bytes.hash(state);
1176 read_rate_limit.hash(state);
1177 write_rate_limit.hash(state);
1178 max_collection_payload_size_bytes.hash(state);
1179 max_points_count.hash(state);
1180 filter_max_conditions.hash(state);
1181 condition_max_size.hash(state);
1182 multivector_config.hash(state);
1183 sparse_config.hash(state);
1184 max_payload_index_count.hash(state);
1185 }
1186}
1187
1188#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Default)]
1190pub struct StrictModeConfigOutput {
1191 #[serde(skip_serializing_if = "Option::is_none")]
1194 pub enabled: Option<bool>,
1195
1196 #[serde(skip_serializing_if = "Option::is_none")]
1198 #[validate(range(min = 1))]
1199
1200 pub max_query_limit: Option<usize>,
1201
1202 #[serde(skip_serializing_if = "Option::is_none")]
1204 #[validate(range(min = 1))]
1205
1206 pub max_timeout: Option<usize>,
1207
1208 #[serde(skip_serializing_if = "Option::is_none")]
1210 pub unindexed_filtering_retrieve: Option<bool>,
1211
1212 #[serde(skip_serializing_if = "Option::is_none")]
1214 pub unindexed_filtering_update: Option<bool>,
1215
1216 #[serde(skip_serializing_if = "Option::is_none")]
1219
1220 pub search_max_hnsw_ef: Option<usize>,
1221
1222 #[serde(skip_serializing_if = "Option::is_none")]
1224 pub search_allow_exact: Option<bool>,
1225
1226 #[serde(skip_serializing_if = "Option::is_none")]
1228
1229 pub search_max_oversampling: Option<f64>,
1230
1231 #[serde(skip_serializing_if = "Option::is_none")]
1233
1234 pub upsert_max_batchsize: Option<usize>,
1235 #[serde(skip_serializing_if = "Option::is_none")]
1237
1238 pub search_max_batchsize: Option<usize>,
1239
1240 #[serde(skip_serializing_if = "Option::is_none")]
1242
1243 pub max_collection_vector_size_bytes: Option<usize>,
1244
1245 #[serde(skip_serializing_if = "Option::is_none")]
1247
1248 pub read_rate_limit: Option<usize>,
1249
1250 #[serde(skip_serializing_if = "Option::is_none")]
1252
1253 pub write_rate_limit: Option<usize>,
1254
1255 #[serde(skip_serializing_if = "Option::is_none")]
1257
1258 pub max_collection_payload_size_bytes: Option<usize>,
1259
1260 #[serde(skip_serializing_if = "Option::is_none")]
1262
1263 pub max_points_count: Option<usize>,
1264
1265 #[serde(skip_serializing_if = "Option::is_none")]
1267
1268 pub filter_max_conditions: Option<usize>,
1269
1270 #[serde(skip_serializing_if = "Option::is_none")]
1272
1273 pub condition_max_size: Option<usize>,
1274
1275 #[serde(skip_serializing_if = "Option::is_none")]
1277 pub multivector_config: Option<StrictModeMultivectorConfigOutput>,
1278
1279 #[serde(skip_serializing_if = "Option::is_none")]
1281 pub sparse_config: Option<StrictModeSparseConfigOutput>,
1282
1283 #[serde(skip_serializing_if = "Option::is_none")]
1285 #[validate(range(min = 0))]
1286 pub max_payload_index_count: Option<usize>,
1287}
1288
1289impl From<StrictModeConfig> for StrictModeConfigOutput {
1290 fn from(config: StrictModeConfig) -> Self {
1291 let StrictModeConfig {
1292 enabled,
1293 max_query_limit,
1294 max_timeout,
1295 unindexed_filtering_retrieve,
1296 unindexed_filtering_update,
1297 search_max_hnsw_ef,
1298 search_allow_exact,
1299 search_max_oversampling,
1300 upsert_max_batchsize,
1301 search_max_batchsize,
1302 max_collection_vector_size_bytes,
1303 read_rate_limit,
1304 write_rate_limit,
1305 max_collection_payload_size_bytes,
1306 max_points_count,
1307 filter_max_conditions,
1308 condition_max_size,
1309 multivector_config,
1310 sparse_config,
1311 max_payload_index_count,
1312 } = config;
1313
1314 Self {
1315 enabled,
1316 max_query_limit,
1317 max_timeout,
1318 unindexed_filtering_retrieve,
1319 unindexed_filtering_update,
1320 search_max_hnsw_ef,
1321 search_allow_exact,
1322 search_max_oversampling,
1323 upsert_max_batchsize,
1324 search_max_batchsize,
1325 max_collection_vector_size_bytes,
1326 read_rate_limit,
1327 write_rate_limit,
1328 max_collection_payload_size_bytes,
1329 max_points_count,
1330 filter_max_conditions,
1331 condition_max_size,
1332 multivector_config: multivector_config.map(StrictModeMultivectorConfigOutput::from),
1333 sparse_config: sparse_config.map(StrictModeSparseConfigOutput::from),
1334 max_payload_index_count,
1335 }
1336 }
1337}
1338
1339pub const DEFAULT_HNSW_EF_CONSTRUCT: usize = 100;
1340
1341impl Default for HnswConfig {
1342 fn default() -> Self {
1343 HnswConfig {
1344 m: 16,
1345 ef_construct: DEFAULT_HNSW_EF_CONSTRUCT,
1346 full_scan_threshold: DEFAULT_FULL_SCAN_THRESHOLD,
1347 max_indexing_threads: 0,
1348 on_disk: Some(false),
1349 payload_m: None,
1350 inline_storage: None,
1351 }
1352 }
1353}
1354
1355impl Default for Indexes {
1356 fn default() -> Self {
1357 Indexes::Plain {}
1358 }
1359}
1360
1361#[derive( Debug, Deserialize, Serialize, JsonSchema, Copy, Clone, PartialEq, Eq)]
1363#[serde(tag = "type", content = "options", rename_all = "snake_case")]
1364pub enum PayloadStorageType {
1365 #[cfg(feature = "rocksdb")]
1367 InMemory,
1368 #[cfg(feature = "rocksdb")]
1370 OnDisk,
1371 Mmap,
1373 InRamMmap,
1375}
1376
1377#[cfg(any(test, feature = "testing"))]
1378#[allow(clippy::derivable_impls)]
1379impl Default for PayloadStorageType {
1380 fn default() -> Self {
1381 PayloadStorageType::Mmap
1382 }
1383}
1384
1385impl PayloadStorageType {
1386 pub fn from_on_disk_payload(on_disk: bool) -> Self {
1389 if on_disk { Self::Mmap } else { Self::InRamMmap }
1390 }
1391
1392 pub fn is_on_disk(&self) -> bool {
1393 match self {
1394 #[cfg(feature = "rocksdb")]
1395 PayloadStorageType::InMemory => false,
1396 #[cfg(feature = "rocksdb")]
1397 PayloadStorageType::OnDisk => true,
1398 PayloadStorageType::Mmap => true,
1399 PayloadStorageType::InRamMmap => false,
1400 }
1401 }
1402}
1403
1404#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, )]
1405#[serde(rename_all = "snake_case")]
1406pub struct SegmentConfig {
1407 #[serde(default)]
1408 pub vector_data: HashMap<VectorNameBuf, VectorDataConfig>,
1409 #[serde(default)]
1410 #[serde(skip_serializing_if = "HashMap::is_empty")]
1411 pub sparse_vector_data: HashMap<VectorNameBuf, SparseVectorDataConfig>,
1412 pub payload_storage_type: PayloadStorageType,
1414}
1415
1416impl SegmentConfig {
1417 pub fn quantization_config(&self, vector_name: &VectorName) -> Option<&QuantizationConfig> {
1423 self.vector_data
1424 .get(vector_name)
1425 .and_then(|v| v.quantization_config.as_ref())
1426 }
1427
1428 pub fn is_any_vector_indexed(&self) -> bool {
1430 self.vector_data
1431 .values()
1432 .any(|config| config.index.is_indexed())
1433 || self
1434 .sparse_vector_data
1435 .values()
1436 .any(|config| config.is_indexed())
1437 }
1438
1439 pub fn are_all_vectors_indexed(&self) -> bool {
1441 self.vector_data
1442 .values()
1443 .all(|config| config.index.is_indexed())
1444 && self
1445 .sparse_vector_data
1446 .values()
1447 .all(|config| config.is_indexed())
1448 }
1449
1450 pub fn is_any_on_disk(&self) -> bool {
1452 self.vector_data
1453 .values()
1454 .any(|config| config.storage_type.is_on_disk())
1455 || self
1456 .sparse_vector_data
1457 .values()
1458 .any(|config| config.index.index_type.is_on_disk())
1459 }
1460
1461 pub fn is_appendable(&self) -> bool {
1462 self.vector_data
1463 .values()
1464 .map(|vector_config| vector_config.is_appendable())
1465 .chain(
1466 self.sparse_vector_data
1467 .values()
1468 .map(|sparse_vector_config| {
1469 sparse_vector_config.index.index_type.is_appendable()
1470 }),
1471 )
1472 .all(|v| v)
1473 }
1474
1475 pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1476 let Self {
1483 vector_data: _,
1484 sparse_vector_data: _,
1485 payload_storage_type: _,
1486 } = self;
1487
1488 check_vectors_map_compatible(
1489 &self.vector_data,
1490 &other.vector_data,
1491 VectorDataConfig::check_compatible,
1492 )?;
1493
1494 check_vectors_map_compatible(
1495 &self.sparse_vector_data,
1496 &other.sparse_vector_data,
1497 SparseVectorDataConfig::check_compatible,
1498 )?;
1499
1500 Ok(())
1501 }
1502}
1503
1504fn check_vectors_map_compatible<C, F>(
1505 this: &HashMap<String, C>,
1506 other: &HashMap<String, C>,
1507 check: F,
1508) -> Result<(), String>
1509where
1510 F: Fn(&C, &C) -> Result<(), String>,
1511{
1512 if this.len() != other.len() {
1513 let expected_keys: Vec<String> = this.keys().map(|k| format!("{k:?}")).collect();
1514 let actual_keys: Vec<String> = other.keys().map(|k| format!("{k:?}")).collect();
1515 return Err(format!(
1516 "Incompatible configs: expected vector storages with keys {expected_keys:?}, but got {actual_keys:?}"
1517 ));
1518 }
1519
1520 for (vector_name, config) in this {
1521 let Some(other_config) = other.get(vector_name) else {
1522 return Err(format!(
1523 "Incompatible configs: expected vector storage with key {vector_name:?} not found in other config"
1524 ));
1525 };
1526
1527 check(config, other_config)
1528 .map_err(|err| format!("Incompatible config for vector {vector_name:?}: {err}"))?;
1529 }
1530
1531 Ok(())
1532}
1533
1534#[derive(Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, Copy, Clone)]
1536pub enum VectorStorageType {
1537 Memory,
1541 Mmap,
1545 ChunkedMmap,
1549 InRamChunkedMmap,
1554 InRamMmap,
1557}
1558
1559#[cfg(any(test, feature = "testing"))]
1560#[allow(clippy::derivable_impls)]
1561impl Default for VectorStorageType {
1562 fn default() -> Self {
1563 VectorStorageType::InRamChunkedMmap
1564 }
1565}
1566
1567#[derive(
1569 Default, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, Copy, Clone,
1570)]
1571#[serde(rename_all = "snake_case")]
1572pub enum VectorStorageDatatype {
1573 #[default]
1575 Float32,
1576 Float16,
1578 Uint8,
1580}
1581
1582#[derive(
1583 Debug, Default, Deserialize, Serialize, JsonSchema, Eq, PartialEq, Copy, Clone, Hash,
1584)]
1585#[serde(rename_all = "snake_case")]
1586pub struct MultiVectorConfig {
1587 pub comparator: MultiVectorComparator,
1589}
1590
1591impl MultiVectorConfig {
1592 fn check_compatible(&self, other: &Self) -> Result<(), String> {
1593 let Self { comparator } = self;
1595
1596 if *comparator != other.comparator {
1597 return Err(format!(
1598 "Incompatible configs: expected multi-vector comparator {comparator:?}, but got {other_comparator:?}",
1599 other_comparator = other.comparator
1600 ));
1601 }
1602
1603 Ok(())
1604 }
1605}
1606
1607#[derive(
1608 Debug, Default, Deserialize, Serialize, JsonSchema, Eq, PartialEq, Copy, Clone, Hash,
1609)]
1610#[serde(rename_all = "snake_case")]
1611pub enum MultiVectorComparator {
1612 #[default]
1613 MaxSim,
1614}
1615
1616impl VectorStorageType {
1617 pub fn from_on_disk(on_disk: bool) -> Self {
1620 if on_disk {
1621 Self::ChunkedMmap
1622 } else {
1623 Self::InRamChunkedMmap
1624 }
1625 }
1626
1627 pub fn is_on_disk(&self) -> bool {
1629 match self {
1630 Self::Memory | Self::InRamChunkedMmap | Self::InRamMmap => false,
1631 Self::Mmap | Self::ChunkedMmap => true,
1632 }
1633 }
1634}
1635
1636#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, )]
1638#[serde(rename_all = "snake_case")]
1639pub struct VectorDataConfig {
1640 pub size: usize,
1642 pub distance: Distance,
1644 pub storage_type: VectorStorageType,
1646 pub index: Indexes,
1648 pub quantization_config: Option<QuantizationConfig>,
1650 #[serde(default, skip_serializing_if = "Option::is_none")]
1652 pub multivector_config: Option<MultiVectorConfig>,
1653 #[serde(default, skip_serializing_if = "Option::is_none")]
1655 pub datatype: Option<VectorStorageDatatype>,
1656}
1657
1658impl VectorDataConfig {
1659 pub fn is_appendable(&self) -> bool {
1663 let is_index_appendable = match self.index {
1664 Indexes::Plain {} => true,
1665 Indexes::Hnsw(_) => false,
1666 };
1667 let is_storage_appendable = match self.storage_type {
1668 VectorStorageType::Memory => true,
1669 VectorStorageType::Mmap => false,
1670 VectorStorageType::ChunkedMmap => true,
1671 VectorStorageType::InRamChunkedMmap => true,
1672 VectorStorageType::InRamMmap => false,
1673 };
1674 is_index_appendable && is_storage_appendable
1675 }
1676
1677 pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1678 let Self {
1683 size,
1684 distance,
1685 storage_type: _,
1686 index: _,
1687 quantization_config: _,
1688 multivector_config,
1689 datatype,
1690 } = self;
1691
1692 if *size != other.size {
1693 return Err(format!(
1694 "Incompatible configs: expected vector size {size}, but got {other_size}",
1695 other_size = other.size
1696 ));
1697 }
1698
1699 if *distance != other.distance {
1700 return Err(format!(
1701 "Incompatible configs: expected distance {distance:?}, but got {other_distance:?}",
1702 other_distance = other.distance
1703 ));
1704 }
1705
1706 let left_datatype = datatype.unwrap_or(VectorStorageDatatype::Float32);
1707 let right_datatype = other.datatype.unwrap_or(VectorStorageDatatype::Float32);
1708 if left_datatype != right_datatype {
1709 return Err(format!(
1710 "Incompatible configs: expected vector storage datatype {left_datatype:?}, but got {right_datatype:?}",
1711 ));
1712 }
1713
1714 match (multivector_config, &other.multivector_config) {
1715 (None, None) => {}
1716 (Some(this), Some(other)) => {
1717 MultiVectorConfig::check_compatible(this, other)?;
1718 }
1719 _ => {
1720 return Err(format!(
1721 "Incompatible configs: expected multivector config {this_multivector_config:?}, but got {other_multivector_config:?}",
1722 this_multivector_config = multivector_config,
1723 other_multivector_config = other.multivector_config
1724 ));
1725 }
1726 }
1727 Ok(())
1728 }
1729}
1730
1731#[derive(
1732 Copy, Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize, JsonSchema,
1733)]
1734#[serde(rename_all = "snake_case")]
1735pub enum SparseVectorStorageType {
1736 #[cfg(feature = "rocksdb")]
1738 OnDisk,
1739 #[default]
1741 Mmap,
1742}
1743
1744impl SparseVectorStorageType {
1745 pub fn is_on_disk(&self) -> bool {
1747 match self {
1748 #[cfg(feature = "rocksdb")]
1751 Self::OnDisk => true,
1752 Self::Mmap => true,
1753 }
1754 }
1755}
1756
1757#[derive(
1759 Copy, Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate,
1760)]
1761#[serde(rename_all = "snake_case")]
1762pub struct SparseVectorDataConfig {
1763 pub index: SparseIndexConfig,
1765
1766 #[serde(default = "default_sparse_vector_storage_type_when_not_in_config")]
1768 pub storage_type: SparseVectorStorageType,
1769
1770 #[serde(default, skip_serializing_if = "Option::is_none")]
1773 pub modifier: Option<Modifier>,
1774}
1775
1776fn default_sparse_vector_storage_type_when_not_in_config() -> SparseVectorStorageType {
1778 #[cfg(feature = "rocksdb")]
1779 {
1780 SparseVectorStorageType::OnDisk
1781 }
1782 #[cfg(not(feature = "rocksdb"))]
1783 {
1784 SparseVectorStorageType::default()
1785 }
1786}
1787
1788impl SparseVectorDataConfig {
1789 pub fn is_indexed(&self) -> bool {
1790 true
1791 }
1792
1793 pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1794 let Self {
1798 index: _,
1799 storage_type: _,
1800 modifier,
1801 } = self;
1802
1803 if modifier != &other.modifier {
1804 return Err(format!(
1805 "Incompatible configs: expected sparse vector modifier {modifier:?}, but got {other_modifier:?}",
1806 other_modifier = other.modifier
1807 ));
1808 }
1809
1810 Ok(())
1811 }
1812}
1813
1814pub const DEFAULT_FULL_SCAN_THRESHOLD: usize = 10_000;
1816
1817pub const DEFAULT_SPARSE_FULL_SCAN_THRESHOLD: usize = 5_000;
1818
1819#[derive(Debug, Deserialize, Serialize, Clone)]
1821#[serde(rename_all = "snake_case")]
1822pub struct SegmentState {
1823 #[serde(default)]
1824 pub initial_version: Option<SeqNumberType>,
1825 pub version: Option<SeqNumberType>,
1826 pub config: SegmentConfig,
1827}
1828
1829pub type RawGeoPoint = (f64, f64);
1830
1831#[derive(
1833 Debug,
1834 Deserialize,
1835 Serialize,
1836 JsonSchema,
1837 Clone,
1838 Copy,
1839 PartialEq,
1840 Eq,
1841 Hash,
1842 Default,
1843 PartialOrd,
1844 Ord,
1845 Pod,
1846 Zeroable,
1847)]
1848#[serde(try_from = "GeoPointShadow")]
1849#[repr(C)]
1850pub struct GeoPoint {
1851 pub lon: OrderedFloat<f64>,
1852 pub lat: OrderedFloat<f64>,
1853}
1854
1855#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
1857pub struct GeoLineString {
1858 pub points: Vec<GeoPoint>,
1859}
1860
1861#[derive(Deserialize)]
1862struct GeoPointShadow {
1863 pub lon: f64,
1864 pub lat: f64,
1865}
1866
1867#[derive(Debug)]
1868pub struct GeoPointValidationError {
1869 pub lon: f64,
1870 pub lat: f64,
1871}
1872
1873impl std::fmt::Display for GeoPointValidationError {
1875 fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1876 write!(
1877 formatter,
1878 "Wrong format of GeoPoint payload: expected `lat` = {} within [-90;90] and `lon` = {} within [-180;180]",
1879 self.lat, self.lon,
1880 )
1881 }
1882}
1883
1884impl GeoPoint {
1885 pub fn validate(lon: f64, lat: f64) -> Result<(), GeoPointValidationError> {
1886 let max_lon = 180f64;
1887 let min_lon = -180f64;
1888 let max_lat = 90f64;
1889 let min_lat = -90f64;
1890
1891 if !(min_lon..=max_lon).contains(&lon) || !(min_lat..=max_lat).contains(&lat) {
1892 return Err(GeoPointValidationError { lon, lat });
1893 }
1894 Ok(())
1895 }
1896
1897 pub fn new(lon: f64, lat: f64) -> Result<Self, GeoPointValidationError> {
1898 Self::validate(lon, lat)?;
1899 Ok(Self::new_unchecked(lon, lat))
1900 }
1901
1902 pub const fn new_unchecked(lon: f64, lat: f64) -> Self {
1903 GeoPoint {
1904 lon: OrderedFloat(lon),
1905 lat: OrderedFloat(lat),
1906 }
1907 }
1908}
1909
1910impl TryFrom<GeoPointShadow> for GeoPoint {
1911 type Error = GeoPointValidationError;
1912
1913 fn try_from(value: GeoPointShadow) -> Result<Self, Self::Error> {
1914 let GeoPointShadow { lon, lat } = value;
1915 GeoPoint::validate(lon, lat)?;
1916
1917 Ok(Self::new_unchecked(lon, lat))
1918 }
1919}
1920
1921impl From<GeoPoint> for geo::Point {
1922 fn from(
1923 GeoPoint {
1924 lon: OrderedFloat(lon),
1925 lat: OrderedFloat(lat),
1926 }: GeoPoint,
1927 ) -> Self {
1928 Self::new(lon, lat)
1929 }
1930}
1931
1932impl From<RawGeoPoint> for GeoPoint {
1933 fn from((lon, lat): RawGeoPoint) -> Self {
1934 GeoPoint::new(lon, lat).expect("invalid GeoPoint coordinates")
1935 }
1936}
1937
1938impl From<GeoPoint> for RawGeoPoint {
1939 fn from(geo_point: GeoPoint) -> Self {
1940 (geo_point.lon.0, geo_point.lat.0)
1941 }
1942}
1943
1944pub trait PayloadContainer {
1945 fn get_value(&self, path: &JsonPath) -> MultiValue<&Value>;
1948
1949 fn get_value_cloned(&self, path: &JsonPath) -> MultiValue<Value> {
1950 self.get_value(path).into_iter().cloned().collect()
1951 }
1952}
1953
1954macro_rules! payload_json {
1958 ($($tt:tt)*) => {
1959 match ::serde_json::json!( { $($tt)* } ) {
1960 ::serde_json::Value::Object(map) => $crate::segment::types::Payload(map),
1961 _ => unreachable!(),
1962 }
1963 };
1964}
1965
1966#[allow(clippy::unnecessary_wraps)] fn payload_example() -> Option<Payload> {
1968 Some(payload_json! {
1969 "city": "London",
1970 "color": "green",
1971 })
1972}
1973
1974#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash)]
1975#[schemars(example = "payload_example")]
1976pub struct Payload(pub Map<String, Value>);
1977
1978impl Payload {
1979 pub fn merge(&mut self, value: &Payload) {
1980 utils::merge_map(&mut self.0, &value.0)
1981 }
1982
1983 pub fn merge_by_key(&mut self, value: &Payload, key: &JsonPath) {
1984 JsonPath::value_set(Some(key), &mut self.0, &value.0);
1985 }
1986
1987 pub fn remove(&mut self, path: &JsonPath) -> Vec<Value> {
1988 path.value_remove(&mut self.0).to_vec()
1989 }
1990
1991 pub fn len(&self) -> usize {
1992 self.0.len()
1993 }
1994
1995 pub fn is_empty(&self) -> bool {
1996 self.0.is_empty()
1997 }
1998
1999 pub fn contains_key(&self, key: &str) -> bool {
2000 self.0.contains_key(key)
2001 }
2002
2003 pub fn keys(&self) -> impl Iterator<Item = &String> {
2004 self.0.keys()
2005 }
2006}
2007
2008impl PayloadContainer for Map<String, Value> {
2009 fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2010 path.value_get(self)
2011 }
2012}
2013
2014impl PayloadContainer for Payload {
2015 fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2016 path.value_get(&self.0)
2017 }
2018}
2019
2020impl PayloadContainer for OwnedPayloadRef<'_> {
2021 fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2022 path.value_get(self.as_ref())
2023 }
2024}
2025
2026impl Default for Payload {
2027 fn default() -> Self {
2028 Payload(Map::new())
2029 }
2030}
2031
2032impl IntoIterator for Payload {
2033 type Item = (String, Value);
2034 type IntoIter = serde_json::map::IntoIter;
2035
2036 fn into_iter(self) -> serde_json::map::IntoIter {
2037 self.0.into_iter()
2038 }
2039}
2040
2041impl From<Map<String, Value>> for Payload {
2042 fn from(value: serde_json::Map<String, Value>) -> Self {
2043 Payload(value)
2044 }
2045}
2046
2047#[derive(Clone, Debug)]
2048pub enum OwnedPayloadRef<'a> {
2049 Ref(&'a Map<String, Value>),
2050 Owned(Rc<Map<String, Value>>),
2051}
2052
2053impl Deref for OwnedPayloadRef<'_> {
2054 type Target = Map<String, Value>;
2055
2056 fn deref(&self) -> &Self::Target {
2057 match self {
2058 OwnedPayloadRef::Ref(reference) => reference,
2059 OwnedPayloadRef::Owned(owned) => owned.deref(),
2060 }
2061 }
2062}
2063
2064impl AsRef<Map<String, Value>> for OwnedPayloadRef<'_> {
2065 fn as_ref(&self) -> &Map<String, Value> {
2066 match self {
2067 OwnedPayloadRef::Ref(reference) => reference,
2068 OwnedPayloadRef::Owned(owned) => owned.deref(),
2069 }
2070 }
2071}
2072
2073impl From<Payload> for OwnedPayloadRef<'_> {
2074 fn from(payload: Payload) -> Self {
2075 OwnedPayloadRef::Owned(Rc::new(payload.0))
2076 }
2077}
2078
2079impl From<Map<String, Value>> for OwnedPayloadRef<'_> {
2080 fn from(payload: Map<String, Value>) -> Self {
2081 OwnedPayloadRef::Owned(Rc::new(payload))
2082 }
2083}
2084
2085impl<'a> From<&'a Payload> for OwnedPayloadRef<'a> {
2086 fn from(payload: &'a Payload) -> Self {
2087 OwnedPayloadRef::Ref(&payload.0)
2088 }
2089}
2090
2091impl<'a> From<&'a Map<String, Value>> for OwnedPayloadRef<'a> {
2092 fn from(payload: &'a Map<String, Value>) -> Self {
2093 OwnedPayloadRef::Ref(payload)
2094 }
2095}
2096
2097#[derive(Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Clone)]
2108#[serde(untagged, rename_all = "snake_case")]
2109pub enum PayloadVariant<T> {
2110 List(Vec<T>),
2111 Value(T),
2112}
2113
2114#[derive(
2116 Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Hash, Eq, EnumIter,
2117)]
2118#[serde(rename_all = "snake_case")]
2119pub enum PayloadSchemaType {
2120 Keyword,
2121 Integer,
2122 Float,
2123 Geo,
2124 Text,
2125 Bool,
2126 Datetime,
2127 Uuid,
2128}
2129
2130impl PayloadSchemaType {
2131 pub fn name(&self) -> &'static str {
2133 serde_variant::to_variant_name(&self).unwrap_or("unknown")
2134 }
2135
2136 pub fn expand(&self) -> PayloadSchemaParams {
2137 match self {
2138 Self::Keyword => PayloadSchemaParams::Keyword(KeywordIndexParams::default()),
2139 Self::Integer => PayloadSchemaParams::Integer(IntegerIndexParams::default()),
2140 Self::Float => PayloadSchemaParams::Float(FloatIndexParams::default()),
2141 Self::Geo => PayloadSchemaParams::Geo(GeoIndexParams::default()),
2142 Self::Text => PayloadSchemaParams::Text(TextIndexParams::default()),
2143 Self::Bool => PayloadSchemaParams::Bool(BoolIndexParams::default()),
2144 Self::Datetime => PayloadSchemaParams::Datetime(DatetimeIndexParams::default()),
2145 Self::Uuid => PayloadSchemaParams::Uuid(UuidIndexParams::default()),
2146 }
2147 }
2148}
2149
2150#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Hash, Eq)]
2152#[serde(untagged, rename_all = "snake_case")]
2153
2154pub enum PayloadSchemaParams {
2155 Keyword(KeywordIndexParams),
2156 Integer(IntegerIndexParams),
2157 Float(FloatIndexParams),
2158 Geo(GeoIndexParams),
2159 Text(TextIndexParams),
2160 Bool(BoolIndexParams),
2161 Datetime(DatetimeIndexParams),
2162 Uuid(UuidIndexParams),
2163}
2164
2165impl PayloadSchemaParams {
2166 pub fn name(&self) -> &'static str {
2168 self.kind().name()
2169 }
2170
2171 pub fn kind(&self) -> PayloadSchemaType {
2172 match self {
2173 PayloadSchemaParams::Keyword(_) => PayloadSchemaType::Keyword,
2174 PayloadSchemaParams::Integer(_) => PayloadSchemaType::Integer,
2175 PayloadSchemaParams::Float(_) => PayloadSchemaType::Float,
2176 PayloadSchemaParams::Geo(_) => PayloadSchemaType::Geo,
2177 PayloadSchemaParams::Text(_) => PayloadSchemaType::Text,
2178 PayloadSchemaParams::Bool(_) => PayloadSchemaType::Bool,
2179 PayloadSchemaParams::Datetime(_) => PayloadSchemaType::Datetime,
2180 PayloadSchemaParams::Uuid(_) => PayloadSchemaType::Uuid,
2181 }
2182 }
2183
2184 pub fn tenant_optimization(&self) -> bool {
2185 match self {
2186 PayloadSchemaParams::Keyword(keyword) => keyword.is_tenant.unwrap_or_default(),
2187 PayloadSchemaParams::Integer(integer) => integer.is_principal.unwrap_or_default(),
2188 PayloadSchemaParams::Float(float) => float.is_principal.unwrap_or_default(),
2189 PayloadSchemaParams::Datetime(datetime) => datetime.is_principal.unwrap_or_default(),
2190 PayloadSchemaParams::Uuid(uuid) => uuid.is_tenant.unwrap_or_default(),
2191 PayloadSchemaParams::Geo(_)
2192 | PayloadSchemaParams::Text(_)
2193 | PayloadSchemaParams::Bool(_) => false,
2194 }
2195 }
2196
2197 pub fn is_on_disk(&self) -> bool {
2198 match self {
2199 PayloadSchemaParams::Keyword(i) => i.on_disk.unwrap_or_default(),
2200 PayloadSchemaParams::Integer(i) => i.on_disk.unwrap_or_default(),
2201 PayloadSchemaParams::Float(i) => i.on_disk.unwrap_or_default(),
2202 PayloadSchemaParams::Datetime(i) => i.on_disk.unwrap_or_default(),
2203 PayloadSchemaParams::Uuid(i) => i.on_disk.unwrap_or_default(),
2204 PayloadSchemaParams::Text(i) => i.on_disk.unwrap_or_default(),
2205 PayloadSchemaParams::Geo(i) => i.on_disk.unwrap_or_default(),
2206 PayloadSchemaParams::Bool(i) => i.on_disk.unwrap_or_default(),
2207 }
2208 }
2209
2210 pub fn enable_hnsw(&self) -> bool {
2211 match self {
2212 PayloadSchemaParams::Keyword(params) => params.enable_hnsw.unwrap_or(true),
2213 PayloadSchemaParams::Integer(params) => params.enable_hnsw.unwrap_or(true),
2214 PayloadSchemaParams::Float(params) => params.enable_hnsw.unwrap_or(true),
2215 PayloadSchemaParams::Datetime(params) => params.enable_hnsw.unwrap_or(true),
2216 PayloadSchemaParams::Uuid(params) => params.enable_hnsw.unwrap_or(true),
2217 PayloadSchemaParams::Text(params) => params.enable_hnsw.unwrap_or(true),
2218 PayloadSchemaParams::Geo(params) => params.enable_hnsw.unwrap_or(true),
2219 PayloadSchemaParams::Bool(params) => params.enable_hnsw.unwrap_or(true),
2220 }
2221 }
2222}
2223
2224impl Validate for PayloadSchemaParams {
2225 fn validate(&self) -> Result<(), ValidationErrors> {
2226 match self {
2227 PayloadSchemaParams::Keyword(_) => Ok(()),
2228 PayloadSchemaParams::Integer(integer_index_params) => integer_index_params.validate(),
2229 PayloadSchemaParams::Float(_) => Ok(()),
2230 PayloadSchemaParams::Geo(_) => Ok(()),
2231 PayloadSchemaParams::Text(_) => Ok(()),
2232 PayloadSchemaParams::Bool(_) => Ok(()),
2233 PayloadSchemaParams::Datetime(_) => Ok(()),
2234 PayloadSchemaParams::Uuid(_) => Ok(()),
2235 }
2236 }
2237}
2238
2239#[derive(Clone, Debug, Eq, Deserialize, Serialize, JsonSchema)]
2240#[serde(untagged, rename_all = "snake_case")]
2241pub enum PayloadFieldSchema {
2242 FieldType(PayloadSchemaType),
2243 FieldParams(PayloadSchemaParams),
2244}
2245
2246impl PartialEq for PayloadFieldSchema {
2247 fn eq(&self, other: &Self) -> bool {
2248 match (self, other) {
2249 (Self::FieldType(this), Self::FieldType(other)) => this == other,
2250 (Self::FieldParams(this), Self::FieldParams(other)) => this == other,
2251 (Self::FieldType(this), Self::FieldParams(other)) => &this.expand() == other,
2252 (Self::FieldParams(this), Self::FieldType(other)) => this == &other.expand(),
2253 }
2254 }
2255}
2256
2257impl hash::Hash for PayloadFieldSchema {
2258 fn hash<H: hash::Hasher>(&self, state: &mut H) {
2259 match self {
2260 PayloadFieldSchema::FieldType(default) => default.expand().hash(state),
2261 PayloadFieldSchema::FieldParams(params) => params.hash(state),
2262 }
2263 }
2264}
2265
2266impl Validate for PayloadFieldSchema {
2267 fn validate(&self) -> Result<(), ValidationErrors> {
2268 match self {
2269 PayloadFieldSchema::FieldType(_) => Ok(()), PayloadFieldSchema::FieldParams(payload_schema_params) => {
2271 payload_schema_params.validate()
2272 }
2273 }
2274 }
2275}
2276
2277impl Display for PayloadFieldSchema {
2278 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2279 match self {
2280 PayloadFieldSchema::FieldType(t) => write!(f, "{}", t.name()),
2281 PayloadFieldSchema::FieldParams(params) => match params {
2282 PayloadSchemaParams::Keyword(_)
2283 | PayloadSchemaParams::Float(_)
2284 | PayloadSchemaParams::Geo(_)
2285 | PayloadSchemaParams::Bool(_)
2286 | PayloadSchemaParams::Datetime(_)
2287 | PayloadSchemaParams::Uuid(_) => write!(f, "{}", params.name()),
2288 PayloadSchemaParams::Integer(integer_params) => {
2289 let range = integer_params.range.unwrap_or(true);
2290 let lookup = integer_params.lookup.unwrap_or(true);
2291 if range && lookup {
2292 write!(f, "integer")
2293 } else {
2294 write!(f, "integer (with range: {range}, lookup: {lookup})")
2295 }
2296 }
2297 PayloadSchemaParams::Text(text_params) => {
2298 if text_params.phrase_matching.unwrap_or_default() {
2299 write!(f, "text (with phrase_matching: true)")
2300 } else {
2301 write!(f, "text")
2302 }
2303 }
2304 },
2305 }
2306 }
2307}
2308
2309impl PayloadFieldSchema {
2310 pub fn expand(&self) -> Cow<'_, PayloadSchemaParams> {
2311 match self {
2312 PayloadFieldSchema::FieldType(t) => Cow::Owned(t.expand()),
2313 PayloadFieldSchema::FieldParams(p) => Cow::Borrowed(p),
2314 }
2315 }
2316
2317 pub fn name(&self) -> &'static str {
2319 match self {
2320 PayloadFieldSchema::FieldType(field_type) => field_type.name(),
2321 PayloadFieldSchema::FieldParams(field_params) => field_params.name(),
2322 }
2323 }
2324
2325 pub fn is_tenant(&self) -> bool {
2326 match self {
2327 PayloadFieldSchema::FieldType(_) => false,
2328 PayloadFieldSchema::FieldParams(params) => params.tenant_optimization(),
2329 }
2330 }
2331
2332 pub fn is_on_disk(&self) -> bool {
2333 match self {
2334 PayloadFieldSchema::FieldType(_) => false,
2335 PayloadFieldSchema::FieldParams(params) => params.is_on_disk(),
2336 }
2337 }
2338
2339 pub fn kind(&self) -> PayloadSchemaType {
2340 match self {
2341 PayloadFieldSchema::FieldType(t) => *t,
2342 PayloadFieldSchema::FieldParams(p) => p.kind(),
2343 }
2344 }
2345
2346 pub fn supports_match(&self) -> bool {
2348 match self {
2349 PayloadFieldSchema::FieldType(payload_schema_type) => match payload_schema_type {
2350 PayloadSchemaType::Keyword => true,
2351 PayloadSchemaType::Integer => true,
2352 PayloadSchemaType::Uuid => true,
2353 PayloadSchemaType::Bool => true,
2354 PayloadSchemaType::Float => false,
2355 PayloadSchemaType::Geo => false,
2356 PayloadSchemaType::Text => false,
2357 PayloadSchemaType::Datetime => false,
2358 },
2359 PayloadFieldSchema::FieldParams(payload_schema_params) => match payload_schema_params {
2360 PayloadSchemaParams::Keyword(_) => true,
2361 PayloadSchemaParams::Integer(integer_index_params) => {
2362 integer_index_params.lookup == Some(true)
2363 }
2364 PayloadSchemaParams::Uuid(_) => true,
2365 PayloadSchemaParams::Bool(_) => true,
2366 PayloadSchemaParams::Float(_) => false,
2367 PayloadSchemaParams::Geo(_) => false,
2368 PayloadSchemaParams::Text(_) => false,
2369 PayloadSchemaParams::Datetime(_) => false,
2370 },
2371 }
2372 }
2373
2374 pub fn enable_hnsw(&self) -> bool {
2375 match self {
2376 PayloadFieldSchema::FieldType(_) => true,
2377 PayloadFieldSchema::FieldParams(p) => p.enable_hnsw(),
2378 }
2379 }
2380}
2381
2382impl From<PayloadSchemaType> for PayloadFieldSchema {
2383 fn from(payload_schema_type: PayloadSchemaType) -> Self {
2384 PayloadFieldSchema::FieldType(payload_schema_type)
2385 }
2386}
2387
2388impl TryFrom<PayloadIndexInfo> for PayloadFieldSchema {
2389 type Error = String;
2390
2391 fn try_from(index_info: PayloadIndexInfo) -> Result<Self, Self::Error> {
2392 let PayloadIndexInfo {
2393 data_type,
2394 params,
2395 points: _,
2396 } = index_info;
2397
2398 match params {
2399 None => Ok(PayloadFieldSchema::FieldType(data_type)),
2400
2401 Some(params) if data_type == params.kind() => {
2402 Ok(PayloadFieldSchema::FieldParams(params))
2403 }
2404
2405 Some(params) => Err(format!(
2406 "payload field with type {data_type:?} has parameters of type {:?}",
2407 params.kind(),
2408 )),
2409 }
2410 }
2411}
2412
2413pub fn value_type(value: &Value) -> Option<PayloadSchemaType> {
2414 match value {
2415 Value::Null => None,
2416 Value::Bool(_) => None,
2417 Value::Number(num) => {
2418 if num.is_i64() {
2419 Some(PayloadSchemaType::Integer)
2420 } else if num.is_f64() {
2421 Some(PayloadSchemaType::Float)
2422 } else {
2423 None
2424 }
2425 }
2426 Value::String(_) => Some(PayloadSchemaType::Keyword),
2427 Value::Array(_) => None,
2428 Value::Object(obj) => {
2429 let lon_op = obj.get("lon").and_then(|x| x.as_f64());
2430 let lat_op = obj.get("lat").and_then(|x| x.as_f64());
2431
2432 if let (Some(_), Some(_)) = (lon_op, lat_op) {
2433 return Some(PayloadSchemaType::Geo);
2434 }
2435 None
2436 }
2437 }
2438}
2439
2440#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2441#[serde(untagged)]
2442pub enum ValueVariants {
2443 String(String),
2444 Integer(IntPayloadType),
2445 Bool(bool),
2446}
2447
2448impl ValueVariants {
2449 pub fn to_value(&self) -> Value {
2450 match self {
2451 ValueVariants::String(keyword) => Value::String(keyword.clone()),
2452 &ValueVariants::Integer(integer) => Value::Number(integer.into()),
2453 &ValueVariants::Bool(flag) => Value::Bool(flag),
2454 }
2455 }
2456}
2457
2458#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
2459#[serde(untagged)]
2460pub enum AnyVariants {
2461 Strings(IndexSet<String, FnvBuildHasher>),
2462 Integers(IndexSet<IntPayloadType, FnvBuildHasher>),
2463}
2464
2465impl Hash for AnyVariants {
2466 fn hash<H: Hasher>(&self, state: &mut H) {
2467 mem::discriminant(self).hash(state);
2468 match self {
2469 AnyVariants::Strings(index_set) => {
2470 for item in index_set.iter() {
2471 item.hash(state);
2472 }
2473 }
2474 AnyVariants::Integers(index_set) => {
2475 for item in index_set.iter() {
2476 item.hash(state);
2477 }
2478 }
2479 }
2480 }
2481}
2482
2483impl AnyVariants {
2484 pub fn len(&self) -> usize {
2485 match self {
2486 AnyVariants::Strings(index_set) => index_set.len(),
2487 AnyVariants::Integers(index_set) => index_set.len(),
2488 }
2489 }
2490
2491 pub fn is_empty(&self) -> bool {
2492 match self {
2493 AnyVariants::Strings(index_set) => index_set.is_empty(),
2494 AnyVariants::Integers(index_set) => index_set.is_empty(),
2495 }
2496 }
2497}
2498
2499#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2501#[serde(rename_all = "snake_case")]
2502pub struct MatchValue {
2503 pub value: ValueVariants,
2504}
2505
2506#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2508#[serde(rename_all = "snake_case")]
2509pub struct MatchText {
2510 pub text: String,
2511}
2512
2513#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2515#[serde(rename_all = "snake_case")]
2516pub struct MatchTextAny {
2517 pub text_any: String,
2518}
2519
2520impl<S: Into<String>> From<S> for MatchText {
2521 fn from(text: S) -> Self {
2522 MatchText { text: text.into() }
2523 }
2524}
2525
2526#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2528#[serde(rename_all = "snake_case")]
2529pub struct MatchPhrase {
2530 pub phrase: String,
2531}
2532
2533impl<S: Into<String>> From<S> for MatchPhrase {
2534 fn from(text: S) -> Self {
2535 MatchPhrase {
2536 phrase: text.into(),
2537 }
2538 }
2539}
2540
2541#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2543#[serde(rename_all = "snake_case")]
2544pub struct MatchAny {
2545 pub any: AnyVariants,
2546}
2547
2548#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2550#[serde(rename_all = "snake_case")]
2551pub struct MatchExcept {
2552 pub except: AnyVariants,
2553}
2554
2555#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
2557#[serde(untagged, rename_all = "snake_case")]
2558pub enum MatchInterface {
2559 Value(MatchValue),
2560 Text(MatchText),
2561 TextAny(MatchTextAny),
2562 Phrase(MatchPhrase),
2563 Any(MatchAny),
2564 Except(MatchExcept),
2565}
2566
2567#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2569#[serde(untagged, from = "MatchInterface")]
2570pub enum Match {
2571 Value(MatchValue),
2572 Text(MatchText),
2573 TextAny(MatchTextAny),
2574 Phrase(MatchPhrase),
2575 Any(MatchAny),
2576 Except(MatchExcept),
2577}
2578
2579impl Match {
2580 pub fn new_value(value: ValueVariants) -> Self {
2581 Self::Value(MatchValue { value })
2582 }
2583
2584 pub fn new_text(text: &str) -> Self {
2585 Self::Text(MatchText { text: text.into() })
2586 }
2587
2588 pub fn new_phrase(phrase: &str) -> Self {
2589 Self::Phrase(MatchPhrase {
2590 phrase: phrase.into(),
2591 })
2592 }
2593
2594 pub fn new_any(any: AnyVariants) -> Self {
2595 Self::Any(MatchAny { any })
2596 }
2597
2598 pub fn new_except(except: AnyVariants) -> Self {
2599 Self::Except(MatchExcept { except })
2600 }
2601}
2602
2603impl From<AnyVariants> for Match {
2604 fn from(any: AnyVariants) -> Self {
2605 Self::Any(MatchAny { any })
2606 }
2607}
2608
2609impl From<MatchInterface> for Match {
2610 fn from(value: MatchInterface) -> Self {
2611 match value {
2612 MatchInterface::Value(value) => Self::Value(MatchValue { value: value.value }),
2613 MatchInterface::Text(text) => Self::Text(MatchText { text: text.text }),
2614 MatchInterface::TextAny(text_any) => Self::TextAny(MatchTextAny {
2615 text_any: text_any.text_any,
2616 }),
2617 MatchInterface::Any(any) => Self::Any(MatchAny { any: any.any }),
2618 MatchInterface::Except(except) => Self::Except(MatchExcept {
2619 except: except.except,
2620 }),
2621 MatchInterface::Phrase(MatchPhrase { phrase }) => Self::Phrase(MatchPhrase { phrase }),
2622 }
2623 }
2624}
2625
2626impl From<bool> for Match {
2627 fn from(flag: bool) -> Self {
2628 Self::Value(MatchValue {
2629 value: ValueVariants::Bool(flag),
2630 })
2631 }
2632}
2633
2634impl From<String> for Match {
2635 fn from(keyword: String) -> Self {
2636 Self::Value(MatchValue {
2637 value: ValueVariants::String(keyword),
2638 })
2639 }
2640}
2641
2642impl From<EcoString> for Match {
2643 fn from(keyword: EcoString) -> Self {
2644 Self::Value(MatchValue {
2645 value: ValueVariants::String(keyword.into()),
2646 })
2647 }
2648}
2649
2650impl From<IntPayloadType> for Match {
2651 fn from(integer: IntPayloadType) -> Self {
2652 Self::Value(MatchValue {
2653 value: ValueVariants::Integer(integer),
2654 })
2655 }
2656}
2657
2658impl From<Vec<String>> for Match {
2659 fn from(keywords: Vec<String>) -> Self {
2660 let keywords: IndexSet<String, FnvBuildHasher> = keywords.into_iter().collect();
2661 Self::Any(MatchAny {
2662 any: AnyVariants::Strings(keywords),
2663 })
2664 }
2665}
2666
2667impl From<ValueVariants> for Match {
2668 fn from(value: ValueVariants) -> Self {
2669 Self::Value(MatchValue { value })
2670 }
2671}
2672
2673impl From<Vec<String>> for MatchExcept {
2674 fn from(keywords: Vec<String>) -> Self {
2675 let keywords: IndexSet<String, FnvBuildHasher> = keywords.into_iter().collect();
2676 MatchExcept {
2677 except: AnyVariants::Strings(keywords),
2678 }
2679 }
2680}
2681
2682impl From<Vec<IntPayloadType>> for Match {
2683 fn from(integers: Vec<IntPayloadType>) -> Self {
2684 let integers: IndexSet<_, FnvBuildHasher> = integers.into_iter().collect();
2685 Self::Any(MatchAny {
2686 any: AnyVariants::Integers(integers),
2687 })
2688 }
2689}
2690
2691impl From<Vec<IntPayloadType>> for MatchExcept {
2692 fn from(integers: Vec<IntPayloadType>) -> Self {
2693 let integers: IndexSet<_, FnvBuildHasher> = integers.into_iter().collect();
2694 MatchExcept {
2695 except: AnyVariants::Integers(integers),
2696 }
2697 }
2698}
2699
2700#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, JsonSchema)]
2701#[serde(untagged)]
2702pub enum RangeInterface {
2703 Float(Range<OrderedFloat<FloatPayloadType>>),
2704 DateTime(Range<DateTimePayloadType>),
2705}
2706
2707impl Hash for RangeInterface {
2708 fn hash<H: hash::Hasher>(&self, state: &mut H) {
2709 match self {
2710 RangeInterface::Float(range) => {
2711 let Range { lt, gt, gte, lte } = range;
2712 lt.hash(state);
2713 gt.hash(state);
2714 gte.hash(state);
2715 lte.hash(state);
2716 }
2717 RangeInterface::DateTime(range) => {
2718 let Range { lt, gt, gte, lte } = range;
2719 lt.hash(state);
2720 gt.hash(state);
2721 gte.hash(state);
2722 lte.hash(state);
2723 }
2724 }
2725 }
2726}
2727
2728#[derive(serde::Deserialize)]
2729#[serde(untagged)]
2730enum RangeInterfaceUntagged {
2731 Float(Range<OrderedFloatPayloadType>),
2732 DateTime(Range<DateTimePayloadType>),
2733}
2734
2735impl<'de> serde::Deserialize<'de> for RangeInterface {
2736 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
2740 where
2741 D: serde::Deserializer<'de>,
2742 {
2743 if !deserializer.is_human_readable() {
2744 return RangeInterfaceUntagged::deserialize(deserializer).map(|parsed| match parsed {
2745 RangeInterfaceUntagged::Float(r) => RangeInterface::Float(r),
2746 RangeInterfaceUntagged::DateTime(r) => RangeInterface::DateTime(r),
2747 });
2748 }
2749
2750 let value = serde_json::Value::deserialize(deserializer)?;
2751
2752 if let Some(obj) = value.as_object() {
2754 let keys = ["lt", "gt", "lte", "gte"];
2755 let has_string_bound = keys
2756 .iter()
2757 .any(|k| obj.get(*k).map(|v| v.is_string()).unwrap_or(false));
2758
2759 if has_string_bound {
2760 return serde_json::from_value::<Range<DateTimePayloadType>>(value)
2761 .map(RangeInterface::DateTime)
2762 .map_err(serde::de::Error::custom);
2763 }
2764 }
2765
2766 let parsed = serde_json::from_value::<RangeInterfaceUntagged>(value)
2768 .map_err(serde::de::Error::custom)?;
2769
2770 Ok(match parsed {
2771 RangeInterfaceUntagged::Float(r) => RangeInterface::Float(r),
2772 RangeInterfaceUntagged::DateTime(r) => RangeInterface::DateTime(r),
2773 })
2774 }
2775}
2776
2777type OrderedFloatPayloadType = OrderedFloat<FloatPayloadType>;
2778
2779#[macro_rules_attribute::macro_rules_derive(crate::segment::common::macros::schemars_rename_generics)]
2781#[derive_args(< OrderedFloatPayloadType > => "Range", < DateTimePayloadType > => "DatetimeRange")]
2782#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
2783#[serde(rename_all = "snake_case")]
2784pub struct Range<T> {
2785 pub lt: Option<T>,
2787 pub gt: Option<T>,
2789 pub gte: Option<T>,
2791 pub lte: Option<T>,
2793}
2794
2795impl<T: Copy> Range<T> {
2796 pub fn map<U, F: Fn(T) -> U>(&self, f: F) -> Range<U> {
2798 let Self { lt, gt, gte, lte } = self;
2799 Range {
2800 lt: lt.map(&f),
2801 gt: gt.map(&f),
2802 gte: gte.map(&f),
2803 lte: lte.map(&f),
2804 }
2805 }
2806}
2807
2808impl<T: Copy + PartialOrd> Range<T> {
2809 pub fn check_range(&self, number: T) -> bool {
2810 let Self { lt, gt, gte, lte } = self;
2811 lt.is_none_or(|x| number < x)
2812 && gt.is_none_or(|x| number > x)
2813 && lte.is_none_or(|x| number <= x)
2814 && gte.is_none_or(|x| number >= x)
2815 }
2816}
2817
2818#[derive(Debug, Deserialize, Serialize, JsonSchema, Copy, Clone, PartialEq, Eq, Hash)]
2820#[serde(rename_all = "snake_case")]
2821pub struct ValuesCount {
2822 pub lt: Option<usize>,
2824 pub gt: Option<usize>,
2826 pub gte: Option<usize>,
2828 pub lte: Option<usize>,
2830}
2831
2832impl ValuesCount {
2833 pub fn check_count(&self, count: usize) -> bool {
2834 let Self { lt, gt, gte, lte } = self;
2835 lt.is_none_or(|x| count < x)
2836 && gt.is_none_or(|x| count > x)
2837 && lte.is_none_or(|x| count <= x)
2838 && gte.is_none_or(|x| count >= x)
2839 }
2840
2841 pub fn check_count_from(&self, value: &Value) -> bool {
2842 let count = match value {
2843 Value::Null => 0,
2844 Value::Array(array) => array.len(),
2845 _ => 1,
2846 };
2847
2848 self.check_count(count)
2849 }
2850}
2851
2852#[cfg(test)]
2853impl From<std::ops::Range<usize>> for ValuesCount {
2854 fn from(range: std::ops::Range<usize>) -> Self {
2855 Self {
2856 gte: Some(range.start),
2857 lt: Some(range.end),
2858 gt: None,
2859 lte: None,
2860 }
2861 }
2862}
2863
2864#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema)]
2868#[serde(rename_all = "snake_case")]
2869pub struct GeoBoundingBox {
2870 pub top_left: GeoPoint,
2872 pub bottom_right: GeoPoint,
2874}
2875
2876impl GeoBoundingBox {
2877 pub fn check_point(&self, point: &GeoPoint) -> bool {
2878 let longitude_check = if self.top_left.lon > self.bottom_right.lon {
2879 point.lon > self.top_left.lon || point.lon < self.bottom_right.lon
2881 } else {
2882 self.top_left.lon < point.lon && point.lon < self.bottom_right.lon
2883 };
2884
2885 let latitude_check = self.bottom_right.lat < point.lat && point.lat < self.top_left.lat;
2886
2887 longitude_check && latitude_check
2888 }
2889}
2890
2891#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)]
2895#[serde(rename_all = "snake_case")]
2896pub struct GeoRadius {
2897 pub center: GeoPoint,
2899 pub radius: OrderedFloat<f64>,
2901}
2902
2903impl Hash for GeoRadius {
2904 fn hash<H: hash::Hasher>(&self, state: &mut H) {
2905 let GeoRadius { center, radius } = self;
2906 center.hash(state);
2907 OrderedFloat(*radius).hash(state);
2909 }
2910}
2911
2912impl GeoRadius {
2913 pub fn check_point(&self, point: &GeoPoint) -> bool {
2914 let query_center = Point::from(self.center);
2915 Haversine.distance(query_center, Point::from(*point)) < self.radius.0
2916 }
2917}
2918
2919#[derive(Deserialize)]
2920pub struct GeoPolygonShadow {
2921 pub exterior: GeoLineString,
2922 pub interiors: Option<Vec<GeoLineString>>,
2923}
2924
2925pub struct PolygonWrapper {
2926 pub polygon: Polygon,
2927}
2928
2929impl PolygonWrapper {
2930 pub fn check_point(&self, point: &GeoPoint) -> bool {
2931 let point_new = Point::new(point.lon.0, point.lat.0);
2932 self.polygon.contains(&point_new)
2933 }
2934}
2935
2936#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2940#[serde(try_from = "GeoPolygonShadow", rename_all = "snake_case")]
2941pub struct GeoPolygon {
2942 pub exterior: GeoLineString,
2946 pub interiors: Option<Vec<GeoLineString>>,
2950}
2951
2952impl GeoPolygon {
2953 pub fn validate_line_string(line: &GeoLineString) -> OperationResult<()> {
2954 if line.points.len() <= 3 {
2955 return Err(OperationError::ValidationError {
2956 description: format!(
2957 "polygon invalid, the size must be at least 4, got {}",
2958 line.points.len()
2959 ),
2960 });
2961 }
2962
2963 if let (Some(first), Some(last)) = (line.points.first(), line.points.last())
2964 && ((first.lat - last.lat).abs() > f64::EPSILON
2965 || (first.lon - last.lon).abs() > f64::EPSILON)
2966 {
2967 return Err(OperationError::ValidationError {
2968 description: String::from(
2969 "polygon invalid, the first and the last points should be the same to form a closed line",
2970 ),
2971 });
2972 }
2973
2974 Ok(())
2975 }
2976
2977 pub fn convert(&self) -> PolygonWrapper {
2979 let exterior_line: LineString = LineString(
2980 self.exterior
2981 .points
2982 .iter()
2983 .map(|p| Coord {
2984 x: p.lon.0,
2985 y: p.lat.0,
2986 })
2987 .collect(),
2988 );
2989
2990 let interior_lines: Vec<LineString> = match &self.interiors {
2992 None => vec![],
2993 Some(interiors) => interiors
2994 .iter()
2995 .map(|interior_points| {
2996 interior_points
2997 .points
2998 .iter()
2999 .map(|p| Coord {
3000 x: p.lon.0,
3001 y: p.lat.0,
3002 })
3003 .collect()
3004 })
3005 .map(LineString)
3006 .collect(),
3007 };
3008 PolygonWrapper {
3009 polygon: Polygon::new(exterior_line, interior_lines),
3010 }
3011 }
3012}
3013
3014impl TryFrom<GeoPolygonShadow> for GeoPolygon {
3015 type Error = OperationError;
3016
3017 fn try_from(value: GeoPolygonShadow) -> OperationResult<Self> {
3018 let GeoPolygonShadow {
3019 exterior,
3020 interiors,
3021 } = value;
3022 Self::validate_line_string(&exterior)?;
3023
3024 if let Some(interiors) = &interiors {
3025 for interior in interiors {
3026 Self::validate_line_string(interior)?;
3027 }
3028 }
3029
3030 Ok(GeoPolygon {
3031 exterior,
3032 interiors,
3033 })
3034 }
3035}
3036
3037#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Hash)]
3039#[validate(schema(function = "validate_field_condition"))]
3040#[serde(rename_all = "snake_case")]
3041pub struct FieldCondition {
3042 pub key: PayloadKeyType,
3044 #[serde(skip_serializing_if = "Option::is_none")]
3046 pub r#match: Option<Match>,
3047 #[serde(skip_serializing_if = "Option::is_none")]
3049 pub range: Option<RangeInterface>,
3050 #[serde(skip_serializing_if = "Option::is_none")]
3052 pub geo_bounding_box: Option<GeoBoundingBox>,
3053 #[serde(skip_serializing_if = "Option::is_none")]
3055 pub geo_radius: Option<GeoRadius>,
3056 #[serde(skip_serializing_if = "Option::is_none")]
3058 pub geo_polygon: Option<GeoPolygon>,
3059 #[serde(skip_serializing_if = "Option::is_none")]
3061 pub values_count: Option<ValuesCount>,
3062 #[serde(skip_serializing_if = "Option::is_none")]
3064 pub is_empty: Option<bool>,
3065 #[serde(skip_serializing_if = "Option::is_none")]
3067 pub is_null: Option<bool>,
3068}
3069
3070impl FieldCondition {
3071 pub fn new_match(key: PayloadKeyType, r#match: Match) -> Self {
3072 Self {
3073 key,
3074 r#match: Some(r#match),
3075 range: None,
3076 geo_bounding_box: None,
3077 geo_radius: None,
3078 geo_polygon: None,
3079 values_count: None,
3080 is_empty: None,
3081 is_null: None,
3082 }
3083 }
3084
3085 pub fn new_range(key: PayloadKeyType, range: Range<OrderedFloat<FloatPayloadType>>) -> Self {
3086 Self {
3087 key,
3088 r#match: None,
3089 range: Some(RangeInterface::Float(range)),
3090 geo_bounding_box: None,
3091 geo_radius: None,
3092 geo_polygon: None,
3093 values_count: None,
3094 is_empty: None,
3095 is_null: None,
3096 }
3097 }
3098
3099 pub fn new_datetime_range(
3100 key: PayloadKeyType,
3101 datetime_range: Range<DateTimePayloadType>,
3102 ) -> Self {
3103 Self {
3104 key,
3105 r#match: None,
3106 range: Some(RangeInterface::DateTime(datetime_range)),
3107 geo_bounding_box: None,
3108 geo_radius: None,
3109 geo_polygon: None,
3110 values_count: None,
3111 is_empty: None,
3112 is_null: None,
3113 }
3114 }
3115
3116 pub fn new_geo_bounding_box(key: PayloadKeyType, geo_bounding_box: GeoBoundingBox) -> Self {
3117 Self {
3118 key,
3119 r#match: None,
3120 range: None,
3121 geo_bounding_box: Some(geo_bounding_box),
3122 geo_radius: None,
3123 geo_polygon: None,
3124 values_count: None,
3125 is_empty: None,
3126 is_null: None,
3127 }
3128 }
3129
3130 pub fn new_geo_radius(key: PayloadKeyType, geo_radius: GeoRadius) -> Self {
3131 Self {
3132 key,
3133 r#match: None,
3134 range: None,
3135 geo_bounding_box: None,
3136 geo_radius: Some(geo_radius),
3137 geo_polygon: None,
3138 values_count: None,
3139 is_empty: None,
3140 is_null: None,
3141 }
3142 }
3143
3144 pub fn new_geo_polygon(key: PayloadKeyType, geo_polygon: GeoPolygon) -> Self {
3145 Self {
3146 key,
3147 r#match: None,
3148 range: None,
3149 geo_bounding_box: None,
3150 geo_radius: None,
3151 geo_polygon: Some(geo_polygon),
3152 values_count: None,
3153 is_empty: None,
3154 is_null: None,
3155 }
3156 }
3157
3158 pub fn new_values_count(key: PayloadKeyType, values_count: ValuesCount) -> Self {
3159 Self {
3160 key,
3161 r#match: None,
3162 range: None,
3163 geo_bounding_box: None,
3164 geo_radius: None,
3165 geo_polygon: None,
3166 values_count: Some(values_count),
3167 is_empty: None,
3168 is_null: None,
3169 }
3170 }
3171
3172 pub fn new_is_empty(key: PayloadKeyType, is_empty: bool) -> Self {
3173 Self {
3174 key,
3175 r#match: None,
3176 range: None,
3177 geo_bounding_box: None,
3178 geo_radius: None,
3179 geo_polygon: None,
3180 values_count: None,
3181 is_empty: Some(is_empty),
3182 is_null: None,
3183 }
3184 }
3185
3186 pub fn new_is_null(key: PayloadKeyType, is_null: bool) -> Self {
3187 Self {
3188 key,
3189 r#match: None,
3190 range: None,
3191 geo_bounding_box: None,
3192 geo_radius: None,
3193 geo_polygon: None,
3194 values_count: None,
3195 is_empty: None,
3196 is_null: Some(is_null),
3197 }
3198 }
3199
3200 pub fn all_fields_none(&self) -> bool {
3201 matches!(
3202 self,
3203 FieldCondition {
3204 r#match: None,
3205 range: None,
3206 geo_bounding_box: None,
3207 geo_radius: None,
3208 geo_polygon: None,
3209 values_count: None,
3210 key: _,
3211 is_empty: None,
3212 is_null: None,
3213 }
3214 )
3215 }
3216
3217 fn input_size(&self) -> usize {
3218 if self.r#match.is_none() {
3219 return 0;
3220 }
3221
3222 match self.r#match.as_ref().unwrap() {
3223 Match::Any(match_any) => match_any.any.len(),
3224 Match::Except(match_except) => match_except.except.len(),
3225 Match::Value(_) => 0,
3226 Match::Text(_) => 0,
3227 Match::Phrase(_) => 0,
3228 Match::TextAny(_) => 0,
3229 }
3230 }
3231}
3232
3233pub fn validate_field_condition(field_condition: &FieldCondition) -> Result<(), ValidationError> {
3234 if field_condition.all_fields_none() {
3235 Err(ValidationError::new(
3236 "At least one field condition must be specified",
3237 ))
3238 } else {
3239 Ok(())
3240 }
3241}
3242
3243#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3245pub struct PayloadField {
3246 pub key: PayloadKeyType,
3248}
3249
3250#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3252pub struct IsEmptyCondition {
3253 pub is_empty: PayloadField,
3254}
3255
3256#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3258pub struct IsNullCondition {
3259 pub is_null: PayloadField,
3260}
3261
3262impl From<JsonPath> for IsNullCondition {
3263 fn from(key: PayloadKeyType) -> Self {
3264 IsNullCondition {
3265 is_null: PayloadField { key },
3266 }
3267 }
3268}
3269
3270impl From<JsonPath> for IsEmptyCondition {
3271 fn from(key: PayloadKeyType) -> Self {
3272 IsEmptyCondition {
3273 is_empty: PayloadField { key },
3274 }
3275 }
3276}
3277
3278#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
3280pub struct HasIdCondition {
3281 #[schemars(schema_with = "HashSet::<PointIdType>::json_schema")]
3282 pub has_id: MaybeArc<AHashSet<PointIdType>>,
3283}
3284
3285impl Hash for HasIdCondition {
3286 fn hash<H: hash::Hasher>(&self, state: &mut H) {
3287 unordered_hash_unique(state, self.has_id.iter());
3288 }
3289}
3290
3291#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3293pub struct HasVectorCondition {
3294 pub has_vector: VectorNameBuf,
3295}
3296
3297impl From<VectorNameBuf> for HasVectorCondition {
3298 fn from(vector: VectorNameBuf) -> Self {
3299 HasVectorCondition { has_vector: vector }
3300 }
3301}
3302
3303const HAS_ID_CONDITION_ARC_THRESHOLD: usize = 1_000;
3307
3308impl From<AHashSet<PointIdType>> for HasIdCondition {
3309 fn from(has_id: AHashSet<PointIdType>) -> Self {
3310 if has_id.len() > HAS_ID_CONDITION_ARC_THRESHOLD {
3311 HasIdCondition {
3312 has_id: MaybeArc::arc(has_id),
3313 }
3314 } else {
3315 HasIdCondition {
3316 has_id: MaybeArc::no_arc(has_id),
3317 }
3318 }
3319 }
3320}
3321
3322impl FromIterator<PointIdType> for HasIdCondition {
3323 fn from_iter<T: IntoIterator<Item = PointIdType>>(iter: T) -> Self {
3324 let items: AHashSet<_> = iter.into_iter().collect();
3325 Self::from(items)
3327 }
3328}
3329
3330#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Validate, Hash)]
3332pub struct Nested {
3333 pub key: PayloadKeyType,
3334 #[validate(nested)]
3335 pub filter: Filter,
3336}
3337
3338#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Validate, Hash)]
3339pub struct NestedCondition {
3340 #[validate(nested)]
3341 pub nested: Nested,
3342}
3343
3344impl NestedCondition {
3346 pub fn new(nested: Nested) -> Self {
3347 Self { nested }
3348 }
3349
3350 pub fn raw_key(&self) -> &PayloadKeyType {
3352 &self.nested.key
3353 }
3354
3355 pub fn array_key(&self) -> PayloadKeyType {
3357 self.raw_key().array_key()
3358 }
3359
3360 pub fn filter(&self) -> &Filter {
3361 &self.nested.filter
3362 }
3363}
3364
3365#[derive(Clone, Debug, Serialize, JsonSchema, PartialEq, Eq, Hash)]
3366#[serde(untagged)]
3367#[serde(
3368 expecting = "Expected some form of condition, which can be a field condition (like {\"key\": ..., \"match\": ... }), or some other mentioned in the documentation: https://qdrant.tech/documentation/concepts/filtering/#filtering-conditions"
3369)]
3370#[allow(clippy::large_enum_variant)]
3371pub enum Condition {
3372 Field(FieldCondition),
3374 IsEmpty(IsEmptyCondition),
3376 IsNull(IsNullCondition),
3378 HasId(HasIdCondition),
3380 HasVector(HasVectorCondition),
3382 Nested(NestedCondition),
3384 Filter(Filter),
3386
3387 #[serde(skip)]
3388 CustomIdChecker(CustomIdChecker),
3389}
3390
3391#[derive(Deserialize)]
3392#[serde(untagged)]
3393#[serde(
3394 expecting = "Expected some form of condition, which can be a field condition (like {\"key\": ..., \"match\": ... }), or some other mentioned in the documentation: https://qdrant.tech/documentation/concepts/filtering/#filtering-conditions"
3395)]
3396#[allow(clippy::large_enum_variant)]
3397#[allow(dead_code)]
3398enum ConditionUntagged {
3399 Field(FieldCondition),
3400 IsEmpty(IsEmptyCondition),
3401 IsNull(IsNullCondition),
3402 HasId(HasIdCondition),
3403 HasVector(HasVectorCondition),
3404 Nested(NestedCondition),
3405 Filter(Filter),
3406
3407 #[serde(skip)]
3408 CustomIdChecker(CustomIdChecker),
3409}
3410
3411impl From<ConditionUntagged> for Condition {
3412 fn from(condition: ConditionUntagged) -> Self {
3413 match condition {
3414 ConditionUntagged::Field(condition) => Condition::Field(condition),
3415 ConditionUntagged::IsEmpty(condition) => Condition::IsEmpty(condition),
3416 ConditionUntagged::IsNull(condition) => Condition::IsNull(condition),
3417 ConditionUntagged::HasId(condition) => Condition::HasId(condition),
3418 ConditionUntagged::HasVector(condition) => Condition::HasVector(condition),
3419 ConditionUntagged::Nested(condition) => Condition::Nested(condition),
3420 ConditionUntagged::Filter(condition) => Condition::Filter(condition),
3421 ConditionUntagged::CustomIdChecker(condition) => Condition::CustomIdChecker(condition),
3422 }
3423 }
3424}
3425
3426impl<'de> serde::Deserialize<'de> for Condition {
3427 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
3432 where
3433 D: serde::Deserializer<'de>,
3434 {
3435 let value = serde_value::Value::deserialize(deserializer)?;
3441
3442 if let serde_value::Value::Map(obj) = &value
3445 && obj.contains_key(&serde_value::Value::String("key".into()))
3446 {
3447 return value
3448 .deserialize_into()
3449 .map(Condition::Field)
3450 .map_err(serde::de::Error::custom);
3451 }
3452
3453 value
3455 .deserialize_into::<ConditionUntagged>()
3456 .map(Condition::from)
3457 .map_err(serde::de::Error::custom)
3458 }
3459}
3460
3461impl Condition {
3462 pub fn new_custom(checker: Arc<dyn CustomIdCheckerCondition + Send + Sync + 'static>) -> Self {
3463 Condition::CustomIdChecker(CustomIdChecker(checker))
3464 }
3465}
3466
3467#[derive(Debug, Clone)]
3468pub struct CustomIdChecker(pub Arc<dyn CustomIdCheckerCondition + Send + Sync + 'static>);
3469
3470impl Hash for CustomIdChecker {
3471 fn hash<H: hash::Hasher>(&self, state: &mut H) {
3472 std::ptr::hash(Arc::as_ptr(&self.0), state);
3477 }
3478}
3479
3480impl PartialEq for CustomIdChecker {
3481 fn eq(&self, other: &Self) -> bool {
3482 Arc::ptr_eq(&self.0, &other.0)
3487 }
3488}
3489
3490impl Eq for CustomIdChecker {}
3491
3492impl Condition {
3493 pub fn new_nested(key: JsonPath, filter: Filter) -> Self {
3494 Self::Nested(NestedCondition {
3495 nested: Nested { key, filter },
3496 })
3497 }
3498
3499 pub fn size_estimation(&self) -> usize {
3500 match self {
3501 Condition::Field(field_condition) => field_condition.input_size(),
3502 Condition::HasId(has_id_condition) => has_id_condition.has_id.len(),
3503 Condition::Filter(filter) => filter.max_condition_input_size(),
3504 Condition::Nested(nested) => nested.filter().max_condition_input_size(),
3505 Condition::IsEmpty(_)
3506 | Condition::IsNull(_)
3507 | Condition::HasVector(_)
3508 | Condition::CustomIdChecker(_) => 0,
3509 }
3510 }
3511
3512 pub fn sub_conditions_count(&self) -> usize {
3513 match self {
3514 Condition::Nested(nested_condition) => {
3515 nested_condition.filter().total_conditions_count()
3516 }
3517 Condition::Filter(filter) => filter.total_conditions_count(),
3518 Condition::Field(_)
3519 | Condition::IsEmpty(_)
3520 | Condition::IsNull(_)
3521 | Condition::CustomIdChecker(_)
3522 | Condition::HasId(_)
3523 | Condition::HasVector(_) => 1,
3524 }
3525 }
3526
3527 pub fn targeted_key(&self) -> Option<PayloadKeyType> {
3528 match self {
3529 Condition::Field(field_condition) => Some(field_condition.key.clone()),
3530 Condition::IsEmpty(is_empty_condition) => Some(is_empty_condition.is_empty.key.clone()),
3531 Condition::IsNull(is_null_condition) => Some(is_null_condition.is_null.key.clone()),
3532 Condition::Nested(nested_condition) => Some(nested_condition.array_key()),
3533 Condition::Filter(filter) => filter.iter_conditions().find_map(|c| c.targeted_key()),
3534 Condition::HasId(_) | Condition::HasVector(_) | Condition::CustomIdChecker(_) => None,
3535 }
3536 }
3537}
3538
3539impl Validate for Condition {
3541 fn validate(&self) -> Result<(), ValidationErrors> {
3542 match self {
3543 Condition::HasId(_)
3544 | Condition::IsEmpty(_)
3545 | Condition::IsNull(_)
3546 | Condition::HasVector(_) => Ok(()),
3547 Condition::Field(field_condition) => field_condition.validate(),
3548 Condition::Nested(nested_condition) => nested_condition.validate(),
3549 Condition::Filter(filter) => filter.validate(),
3550 Condition::CustomIdChecker(_) => Ok(()),
3551 }
3552 }
3553}
3554
3555pub trait CustomIdCheckerCondition: fmt::Debug {
3556 fn estimate_cardinality(&self, points: usize) -> CardinalityEstimation;
3557 fn check(&self, point_id: ExtendedPointId) -> bool;
3558}
3559
3560#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Hash)]
3562#[serde(untagged, rename_all = "snake_case")]
3563#[serde(
3564 expecting = "Expected a boolean, an array of strings, or an object with an include/exclude field"
3565)]
3566pub enum WithPayloadInterface {
3567 Bool(bool),
3570 Fields(Vec<JsonPath>),
3572 Selector(PayloadSelector),
3574}
3575
3576impl From<bool> for WithPayloadInterface {
3577 fn from(b: bool) -> Self {
3578 WithPayloadInterface::Bool(b)
3579 }
3580}
3581
3582impl Default for WithPayloadInterface {
3583 fn default() -> Self {
3584 WithPayloadInterface::Bool(false)
3585 }
3586}
3587
3588#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3590#[serde(untagged, rename_all = "snake_case")]
3591#[serde(expecting = "Expected a boolean, or an array of strings")]
3592pub enum WithVector {
3593 Bool(bool),
3596 Selector(Vec<VectorNameBuf>),
3598}
3599
3600impl WithVector {
3601 pub fn is_enabled(&self) -> bool {
3602 match self {
3603 WithVector::Bool(b) => *b,
3604 WithVector::Selector(_) => true,
3605 }
3606 }
3607
3608 pub fn merge(&self, other: &WithVector) -> WithVector {
3610 match (self, other) {
3611 (WithVector::Bool(true), _) => WithVector::Bool(true),
3613 (_, WithVector::Bool(true)) => WithVector::Bool(true),
3614
3615 (WithVector::Bool(false), WithVector::Bool(false)) => WithVector::Bool(false),
3617
3618 (WithVector::Selector(s1), WithVector::Selector(s2)) => {
3620 WithVector::Selector(s1.iter().chain(s2).unique().cloned().collect())
3621 }
3622
3623 (WithVector::Bool(false), WithVector::Selector(s)) => WithVector::Selector(s.clone()),
3625 (WithVector::Selector(s), WithVector::Bool(false)) => WithVector::Selector(s.clone()),
3626 }
3627 }
3628}
3629
3630impl From<bool> for WithVector {
3631 fn from(b: bool) -> Self {
3632 WithVector::Bool(b)
3633 }
3634}
3635
3636impl From<VectorNameBuf> for WithVector {
3637 fn from(name: VectorNameBuf) -> Self {
3638 WithVector::Selector(vec![name])
3639 }
3640}
3641
3642impl Default for WithVector {
3643 fn default() -> Self {
3644 WithVector::Bool(false)
3645 }
3646}
3647
3648impl WithPayloadInterface {
3649 pub fn is_required(&self) -> bool {
3650 match self {
3651 WithPayloadInterface::Bool(b) => *b,
3652 _ => true,
3653 }
3654 }
3655}
3656
3657impl From<bool> for WithPayload {
3658 fn from(x: bool) -> Self {
3659 WithPayload {
3660 enable: x,
3661 payload_selector: None,
3662 }
3663 }
3664}
3665
3666impl From<WithPayloadInterface> for WithPayload {
3667 fn from(interface: WithPayloadInterface) -> Self {
3668 match interface {
3669 WithPayloadInterface::Bool(enable) => WithPayload {
3670 enable,
3671 payload_selector: None,
3672 },
3673 WithPayloadInterface::Fields(fields) => WithPayload {
3674 enable: true,
3675 payload_selector: Some(PayloadSelector::new_include(fields)),
3676 },
3677 WithPayloadInterface::Selector(selector) => WithPayload {
3678 enable: true,
3679 payload_selector: Some(selector),
3680 },
3681 }
3682 }
3683}
3684
3685impl From<&WithPayloadInterface> for WithPayload {
3686 fn from(interface: &WithPayloadInterface) -> Self {
3687 WithPayload::from(interface.clone())
3688 }
3689}
3690
3691#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3692#[serde(deny_unknown_fields, rename_all = "snake_case")]
3693pub struct PayloadSelectorInclude {
3694 pub include: Vec<PayloadKeyType>,
3696}
3697
3698impl PayloadSelectorInclude {
3699 pub fn new(include: Vec<PayloadKeyType>) -> Self {
3700 Self { include }
3701 }
3702}
3703
3704#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3705#[serde(deny_unknown_fields, rename_all = "snake_case")]
3706pub struct PayloadSelectorExclude {
3707 pub exclude: Vec<PayloadKeyType>,
3709}
3710
3711impl PayloadSelectorExclude {
3712 pub fn new(exclude: Vec<PayloadKeyType>) -> Self {
3713 Self { exclude }
3714 }
3715}
3716
3717#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3719#[serde(untagged, rename_all = "snake_case")]
3720pub enum PayloadSelector {
3721 Include(PayloadSelectorInclude),
3723 Exclude(PayloadSelectorExclude),
3725}
3726
3727impl From<PayloadSelectorExclude> for WithPayloadInterface {
3728 fn from(selector: PayloadSelectorExclude) -> Self {
3729 WithPayloadInterface::Selector(PayloadSelector::Exclude(selector))
3730 }
3731}
3732
3733impl From<PayloadSelectorInclude> for WithPayloadInterface {
3734 fn from(selector: PayloadSelectorInclude) -> Self {
3735 WithPayloadInterface::Selector(PayloadSelector::Include(selector))
3736 }
3737}
3738
3739impl PayloadSelector {
3740 pub fn new_include(vecs_payload_key_type: Vec<PayloadKeyType>) -> Self {
3741 PayloadSelector::Include(PayloadSelectorInclude {
3742 include: vecs_payload_key_type,
3743 })
3744 }
3745
3746 pub fn new_exclude(vecs_payload_key_type: Vec<PayloadKeyType>) -> Self {
3747 PayloadSelector::Exclude(PayloadSelectorExclude {
3748 exclude: vecs_payload_key_type,
3749 })
3750 }
3751
3752 pub fn process(&self, x: Payload) -> Payload {
3754 match self {
3755 PayloadSelector::Include(selector) => JsonPath::value_filter(&x.0, |key, _| {
3756 selector
3757 .include
3758 .iter()
3759 .any(|pattern| pattern.check_include_pattern(key))
3760 })
3761 .into(),
3762 PayloadSelector::Exclude(selector) => JsonPath::value_filter(&x.0, |key, _| {
3763 selector
3764 .exclude
3765 .iter()
3766 .all(|pattern| !pattern.check_exclude_pattern(key))
3767 })
3768 .into(),
3769 }
3770 }
3771}
3772
3773#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Default, PartialEq, Eq)]
3774#[serde(deny_unknown_fields, rename_all = "snake_case")]
3775pub struct WithPayload {
3776 pub enable: bool,
3778 pub payload_selector: Option<PayloadSelector>,
3780}
3781
3782#[derive(
3783 Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Default, Hash,
3784)]
3785#[serde(rename_all = "snake_case")]
3786pub struct MinShould {
3787 #[validate(nested)]
3788 pub conditions: Vec<Condition>,
3789 pub min_count: usize,
3790}
3791
3792#[derive(
3793 Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Default, Hash,
3794)]
3795#[serde(deny_unknown_fields, rename_all = "snake_case")]
3796pub struct Filter {
3797 #[validate(nested)]
3799 #[serde(
3800 default,
3801 with = "MaybeOneOrMany",
3802 skip_serializing_if = "Option::is_none"
3803 )]
3804 #[schemars(with = "MaybeOneOrMany<Condition>")]
3805 pub should: Option<Vec<Condition>>,
3806 #[validate(nested)]
3808 #[serde(skip_serializing_if = "Option::is_none")]
3809 pub min_should: Option<MinShould>,
3810 #[validate(nested)]
3812 #[serde(
3813 default,
3814 with = "MaybeOneOrMany",
3815 skip_serializing_if = "Option::is_none"
3816 )]
3817 #[schemars(with = "MaybeOneOrMany<Condition>")]
3818 pub must: Option<Vec<Condition>>,
3819 #[validate(nested)]
3821 #[serde(
3822 default,
3823 with = "MaybeOneOrMany",
3824 skip_serializing_if = "Option::is_none"
3825 )]
3826 #[schemars(with = "MaybeOneOrMany<Condition>")]
3827 pub must_not: Option<Vec<Condition>>,
3828}
3829
3830impl Filter {
3831 pub fn new() -> Self {
3832 Filter {
3833 should: None,
3834 min_should: None,
3835 must: None,
3836 must_not: None,
3837 }
3838 }
3839
3840 pub fn new_should(condition: Condition) -> Self {
3841 Filter {
3842 should: Some(vec![condition]),
3843 min_should: None,
3844 must: None,
3845 must_not: None,
3846 }
3847 }
3848
3849 pub fn new_min_should(min_should: MinShould) -> Self {
3850 Filter {
3851 should: None,
3852 min_should: Some(min_should),
3853 must: None,
3854 must_not: None,
3855 }
3856 }
3857
3858 pub fn new_must(condition: Condition) -> Self {
3859 Filter {
3860 should: None,
3861 min_should: None,
3862 must: Some(vec![condition]),
3863 must_not: None,
3864 }
3865 }
3866
3867 pub fn new_must_not(condition: Condition) -> Self {
3868 Filter {
3869 should: None,
3870 min_should: None,
3871 must: None,
3872 must_not: Some(vec![condition]),
3873 }
3874 }
3875
3876 pub fn with_point_ids(self, ids: impl IntoIterator<Item = PointIdType>) -> Filter {
3878 let has_id_condition: HasIdCondition = ids.into_iter().collect();
3879
3880 let Filter {
3881 should,
3882 min_should,
3883 must,
3884 must_not,
3885 } = self;
3886
3887 let new_must = match must {
3888 Some(mut must) => {
3889 must.push(Condition::HasId(has_id_condition));
3890 Some(must)
3891 }
3892 None => Some(vec![Condition::HasId(has_id_condition)]),
3893 };
3894
3895 Filter {
3896 should,
3897 min_should,
3898 must: new_must,
3899 must_not,
3900 }
3901 }
3902
3903 pub fn merge(&self, other: &Filter) -> Filter {
3904 self.clone().merge_owned(other.clone())
3905 }
3906
3907 pub fn merge_owned(self, other: Filter) -> Filter {
3908 let merge_component = |this, other| -> Option<Vec<Condition>> {
3909 match (this, other) {
3910 (None, None) => None,
3911 (Some(this), None) => Some(this),
3912 (None, Some(other)) => Some(other),
3913 (Some(mut this), Some(mut other)) => {
3914 this.append(&mut other);
3915 Some(this)
3916 }
3917 }
3918 };
3919 Filter {
3920 should: merge_component(self.should, other.should),
3921 min_should: {
3922 match (self.min_should, other.min_should) {
3923 (None, None) => None,
3924 (Some(this), None) => Some(this),
3925 (None, Some(other)) => Some(other),
3926 (Some(mut this), Some(mut other)) => {
3927 this.conditions.append(&mut other.conditions);
3928
3929 this.min_count = this.min_count.max(other.min_count);
3931
3932 Some(this)
3933 }
3934 }
3935 },
3936 must: merge_component(self.must, other.must),
3937 must_not: merge_component(self.must_not, other.must_not),
3938 }
3939 }
3940
3941 pub fn merge_opts(this: Option<Self>, other: Option<Self>) -> Option<Self> {
3942 match (this, other) {
3943 (None, None) => None,
3944 (Some(this), None) => Some(this),
3945 (None, Some(other)) => Some(other),
3946 (Some(this), Some(other)) => Some(this.merge_owned(other)),
3947 }
3948 }
3949
3950 pub fn iter_conditions(&self) -> impl Iterator<Item = &Condition> {
3951 self.must
3952 .iter()
3953 .flatten()
3954 .chain(self.must_not.iter().flatten())
3955 .chain(self.should.iter().flatten())
3956 .chain(self.min_should.iter().flat_map(|i| &i.conditions))
3957 }
3958
3959 pub fn total_conditions_count(&self) -> usize {
3961 fn count_all_conditions(field: Option<&Vec<Condition>>) -> usize {
3962 field
3963 .map(|i| i.iter().map(|j| j.sub_conditions_count()).sum::<usize>())
3964 .unwrap_or(0)
3965 }
3966
3967 count_all_conditions(self.should.as_ref())
3968 + count_all_conditions(self.min_should.as_ref().map(|i| &i.conditions))
3969 + count_all_conditions(self.must.as_ref())
3970 + count_all_conditions(self.must_not.as_ref())
3971 }
3972
3973 pub fn max_condition_input_size(&self) -> usize {
3975 self.iter_conditions()
3976 .map(|i| i.size_estimation())
3977 .max()
3978 .unwrap_or(0)
3979 }
3980}
3981
3982#[derive(Debug, Clone, Copy, Eq, PartialEq)]
3983pub enum SnapshotFormat {
3984 Ancient,
4001 Regular,
4027 Streamable,
4047}
4048
4049#[cfg(test)]
4050pub(crate) mod test_utils {
4051 use super::{GeoLineString, GeoPoint, GeoPolygon};
4052
4053 pub fn build_polygon(exterior_points: Vec<(f64, f64)>) -> GeoPolygon {
4054 let exterior_line = GeoLineString {
4055 points: exterior_points
4056 .into_iter()
4057 .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4058 .collect(),
4059 };
4060
4061 GeoPolygon {
4062 exterior: exterior_line,
4063 interiors: None,
4064 }
4065 }
4066
4067 pub fn build_polygon_with_interiors(
4068 exterior_points: Vec<(f64, f64)>,
4069 interiors_points: Vec<Vec<(f64, f64)>>,
4070 ) -> GeoPolygon {
4071 let exterior_line = GeoLineString {
4072 points: exterior_points
4073 .into_iter()
4074 .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4075 .collect(),
4076 };
4077
4078 let interior_lines = Some(
4079 interiors_points
4080 .into_iter()
4081 .map(|points| GeoLineString {
4082 points: points
4083 .into_iter()
4084 .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4085 .collect(),
4086 })
4087 .collect(),
4088 );
4089
4090 GeoPolygon {
4091 exterior: exterior_line,
4092 interiors: interior_lines,
4093 }
4094 }
4095}
4096
4097#[cfg(test)]
4098mod tests {
4099 use itertools::Itertools;
4100 use rstest::rstest;
4101 use serde::de::DeserializeOwned;
4102 use serde_json;
4103
4104 use super::test_utils::build_polygon_with_interiors;
4105 use super::*;
4106
4107 #[allow(dead_code)]
4108 fn check_rms_serialization<T: Serialize + DeserializeOwned + PartialEq + std::fmt::Debug>(
4109 record: T,
4110 ) {
4111 let binary_entity = rmp_serde::to_vec(&record).expect("serialization ok");
4112 let de_record: T = rmp_serde::from_slice(&binary_entity).expect("deserialization ok");
4113
4114 assert_eq!(record, de_record);
4115 }
4116
4117 #[test]
4118 #[ignore]
4119 fn test_rmp_vs_cbor_deserialize() {
4120 let payload = payload_json! {"payload_key": "payload_value"};
4121 let raw = rmp_serde::to_vec(&payload).unwrap();
4122 let de_record: Payload = serde_cbor::from_slice(&raw).unwrap();
4123 eprintln!("payload = {payload:#?}");
4124 eprintln!("de_record = {de_record:#?}");
4125 }
4126
4127 #[rstest]
4128 #[case::rfc_3339("2020-03-01T00:00:00Z")]
4129 #[case::rfc_3339_custom_tz("2020-03-01T00:00:00-09:00")]
4130 #[case::rfc_3339_custom_tz_no_colon("2020-03-01 00:00:00-0900")]
4131 #[case::rfc_3339_custom_tz_no_colon_and_t("2020-03-01T00:00:00-0900")]
4132 #[case::rfc_3339_custom_tz_no_minutes("2020-03-01 00:00:00-09")]
4133 #[case::rfc_3339_and_decimals("2020-03-01T00:00:00.123456Z")]
4134 #[case::without_z("2020-03-01T00:00:00")]
4135 #[case::without_z_and_decimals("2020-03-01T00:00:00.12")]
4136 #[case::space_sep_without_z("2020-03-01 00:00:00")]
4137 #[case::space_sep_without_z_and_decimals("2020-03-01 00:00:00.123456")]
4138 fn test_datetime_deserialization(#[case] datetime: &str) {
4139 let datetime = DateTimePayloadType::from_str(datetime).unwrap();
4140 let serialized = serde_json::to_string(&datetime).unwrap();
4141 let deserialized: DateTimePayloadType = serde_json::from_str(&serialized).unwrap();
4142 assert_eq!(datetime, deserialized);
4143 }
4144
4145 #[test]
4146 fn test_datetime_deserialization_equivalency() {
4147 let datetime_str = "2020-03-01T01:02:03.123456Z";
4148 let datetime_str_no_z = "2020-03-01T01:02:03.123456";
4149 let datetime = DateTimePayloadType::from_str(datetime_str).unwrap();
4150 let datetime_no_z = DateTimePayloadType::from_str(datetime_str_no_z).unwrap();
4151
4152 assert_eq!(datetime.timestamp(), datetime_no_z.timestamp());
4154 }
4155
4156 #[test]
4157 fn test_invalid_datetime_range_returns_clear_rfc3339_error() {
4158 let json = r#"{
4159 "key": "created_at",
4160 "range": {
4161 "gte": "2014-01-01T00:00:00BAD"
4162 }
4163 }"#;
4164
4165 let err = serde_json::from_str::<Condition>(json)
4166 .unwrap_err()
4167 .to_string();
4168
4169 assert!(err.contains("RFC3339"), "err was: {err}");
4170 assert!(err.contains("2014-01-01T00:00:00BAD"), "err was: {err}");
4171 assert!(err.contains("Example"), "err was: {err}");
4172 }
4173
4174 #[test]
4177 fn test_datetime_payload_type_binary_roundtrip() {
4178 let original = DateTimePayloadType::from_str("2024-06-15T12:30:45Z").unwrap();
4179
4180 let binary = rmp_serde::to_vec(&original).expect("serialize");
4182 let restored: DateTimePayloadType = rmp_serde::from_slice(&binary).expect("deserialize");
4183
4184 assert_eq!(original, restored);
4185 }
4186
4187 #[test]
4190 fn test_range_interface_datetime_binary_roundtrip() {
4191 let dt_gte = DateTimePayloadType::from_str("2024-01-01T00:00:00Z").unwrap();
4192 let dt_lte = DateTimePayloadType::from_str("2024-12-31T23:59:59Z").unwrap();
4193
4194 let range = RangeInterface::DateTime(Range {
4195 lt: None,
4196 gt: None,
4197 gte: Some(dt_gte),
4198 lte: Some(dt_lte),
4199 });
4200
4201 let binary = rmp_serde::to_vec(&range).expect("serialize");
4203 let restored: RangeInterface = rmp_serde::from_slice(&binary).expect("deserialize");
4204
4205 assert_eq!(range, restored);
4206 }
4207
4208 #[test]
4211 fn test_condition_json_fallback_to_untagged() {
4212 let is_empty_json = r#"{"is_empty": {"key": "optional_field"}}"#;
4214 let condition: Condition = serde_json::from_str(is_empty_json).unwrap();
4215 assert!(matches!(condition, Condition::IsEmpty(_)));
4216
4217 let has_id_json = r#"{"has_id": [1, 2, 3]}"#;
4219 let condition: Condition = serde_json::from_str(has_id_json).unwrap();
4220 assert!(matches!(condition, Condition::HasId(_)));
4221
4222 let nested_json = r#"{"nested": {"key": "items", "filter": {"must": []}}}"#;
4224 let condition: Condition = serde_json::from_str(nested_json).unwrap();
4225 assert!(matches!(condition, Condition::Nested(_)));
4226 }
4227
4228 #[test]
4229 fn test_datetime_wrapper_transcoding() {
4230 let expected = DateTimeWrapper(chrono::Utc::now());
4231 let transcoded = DateTimeWrapper::from_str(&expected.to_string()).unwrap();
4232 assert_eq!(expected, transcoded);
4233 }
4234
4235 #[test]
4236 fn test_timezone_ordering() {
4237 let datetimes = [
4238 "2000-06-08 00:18:53+0900",
4239 "2000-06-07 07:25:34-1100",
4240 "2000-07-10T00:18:53+0100",
4241 "2000-07-11 00:25:34-01:00",
4242 "2000-07-11 00:25:35-01",
4243 ];
4244
4245 let sorted_datetimes: Vec<_> = datetimes
4246 .iter()
4247 .enumerate()
4248 .map(|(i, s)| (i, DateTimePayloadType::from_str(s).unwrap()))
4249 .sorted_by_key(|(_, dt)| dt.timestamp())
4250 .collect();
4251
4252 sorted_datetimes
4253 .array_windows()
4254 .for_each(|[(i1, dt1), (i2, dt2)]| {
4255 assert!(
4256 i1 < i2,
4257 "i1: {}, dt1: {}, ts1: {}\ni2: {}, dt2: {}, ts2: {}",
4258 i1,
4259 dt1.0,
4260 dt1.timestamp(),
4261 i2,
4262 dt2.0,
4263 dt2.timestamp()
4264 );
4265 });
4266 }
4267
4268 #[test]
4269 fn test_geo_radius_check_point() {
4270 let radius = GeoRadius {
4271 center: GeoPoint::new_unchecked(0.0, 0.0),
4272 radius: OrderedFloat(80000.0),
4273 };
4274
4275 let inside_result = radius.check_point(&GeoPoint::new_unchecked(0.5, 0.5));
4276 assert!(inside_result);
4277
4278 let outside_result = radius.check_point(&GeoPoint::new_unchecked(1.5, 1.5));
4279 assert!(!outside_result);
4280 }
4281
4282 #[test]
4283 fn test_geo_boundingbox_check_point() {
4284 let bounding_box = GeoBoundingBox {
4285 top_left: GeoPoint::new_unchecked(-1.0, 1.0),
4286 bottom_right: GeoPoint::new_unchecked(1.0, -1.0),
4287 };
4288
4289 let inside_result = bounding_box.check_point(&GeoPoint::new_unchecked(-0.5, 0.5));
4291 assert!(inside_result);
4292
4293 let outside_result = bounding_box.check_point(&GeoPoint::new_unchecked(1.5, 1.5));
4295 assert!(!outside_result);
4296 }
4297
4298 #[test]
4299 fn test_geo_boundingbox_antimeridian_check_point() {
4300 let bounding_box = GeoBoundingBox {
4302 top_left: GeoPoint::new_unchecked(167.0, 74.071028),
4303 bottom_right: GeoPoint::new_unchecked(-66.885417, 18.7763),
4304 };
4305
4306 let inside_result =
4308 bounding_box.check_point(&GeoPoint::new_unchecked(-73.991516, 40.75798));
4309 assert!(inside_result);
4310
4311 let outside_result = bounding_box.check_point(&GeoPoint::new_unchecked(13.41053, 52.52437));
4313 assert!(!outside_result);
4314 }
4315
4316 #[test]
4317 fn test_geo_polygon_check_point() {
4318 let test_cases = [
4319 (
4321 vec![
4323 (-1.0, -1.0),
4324 (1.0, -1.0),
4325 (1.0, 1.0),
4326 (-1.0, 1.0),
4327 (-1.0, -1.0),
4328 ],
4329 vec![vec![]],
4331 vec![((0.5, 0.5), true), ((1.5, 1.5), false), ((1.0, 0.0), false)],
4333 ),
4334 (
4336 vec![
4338 (-1.0, -1.0),
4339 (1.0, 1.0),
4340 (1.0, -1.0),
4341 (-1.0, 1.0),
4342 (-1.0, -1.0),
4343 ],
4344 vec![vec![]],
4346 vec![((0.5, 0.0), true), ((0.0, 0.5), false), ((0.0, 0.0), false)],
4348 ),
4349 (
4351 vec![
4353 (-1.0, -1.0),
4354 (1.5, -1.0),
4355 (1.5, 1.5),
4356 (-1.0, 1.5),
4357 (-1.0, -1.0),
4358 ],
4359 vec![vec![
4361 (-0.5, -0.5),
4362 (-0.5, 0.5),
4363 (0.5, 0.5),
4364 (0.5, -0.5),
4365 (-0.5, -0.5),
4366 ]],
4367 vec![((0.6, 0.6), true), ((0.0, 0.0), false), ((0.5, 0.5), false)],
4369 ),
4370 ];
4371
4372 for (exterior, interiors, points) in test_cases {
4373 let polygon = build_polygon_with_interiors(exterior, interiors);
4374
4375 for ((lon, lat), expected_result) in points {
4376 let inside_result = polygon
4377 .convert()
4378 .check_point(&GeoPoint::new_unchecked(lon, lat));
4379 assert_eq!(inside_result, expected_result);
4380 }
4381 }
4382 }
4383
4384 #[test]
4385 fn test_serialize_query() {
4386 let filter = Filter {
4387 must: Some(vec![Condition::Field(FieldCondition::new_match(
4388 JsonPath::new("hello"),
4389 "world".to_owned().into(),
4390 ))]),
4391 must_not: None,
4392 should: None,
4393 min_should: None,
4394 };
4395 let json = serde_json::to_string_pretty(&filter).unwrap();
4396 eprintln!("{json}")
4397 }
4398
4399 #[test]
4400 fn test_deny_unknown_fields() {
4401 let query1 = r#"
4402 {
4403 "wrong": "query"
4404 }
4405 "#;
4406 let filter: Result<Filter, _> = serde_json::from_str(query1);
4407
4408 assert!(filter.is_err())
4409 }
4410
4411 #[test]
4412 fn test_parse_match_query() {
4413 let query = r#"
4414 {
4415 "key": "hello",
4416 "match": { "value": 42 }
4417 }
4418 "#;
4419 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4420 assert_eq!(
4421 condition.r#match.unwrap(),
4422 Match::Value(MatchValue {
4423 value: ValueVariants::Integer(42)
4424 })
4425 );
4426
4427 let query = r#"
4428 {
4429 "key": "hello",
4430 "match": { "value": true }
4431 }
4432 "#;
4433 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4434 assert_eq!(
4435 condition.r#match.unwrap(),
4436 Match::Value(MatchValue {
4437 value: ValueVariants::Bool(true)
4438 })
4439 );
4440
4441 let query = r#"
4442 {
4443 "key": "hello",
4444 "match": { "value": "world" }
4445 }
4446 "#;
4447
4448 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4449 assert_eq!(
4450 condition.r#match.unwrap(),
4451 Match::Value(MatchValue {
4452 value: ValueVariants::String("world".to_owned())
4453 })
4454 );
4455 }
4456
4457 #[test]
4458 fn test_parse_match_any() {
4459 let query = r#"
4460 {
4461 "should": [
4462 {
4463 "key": "Jason",
4464 "match": {
4465 "any": [
4466 "Bourne",
4467 "Momoa",
4468 "Statham"
4469 ]
4470 }
4471 }
4472 ]
4473 }
4474 "#;
4475
4476 let filter: Filter = serde_json::from_str(query).unwrap();
4477 let should = filter.should.unwrap();
4478
4479 assert_eq!(should.len(), 1);
4480 let Some(Condition::Field(c)) = should.first() else {
4481 panic!("Condition::Field expected")
4482 };
4483
4484 assert_eq!(c.key.to_string(), "Jason");
4485
4486 let Match::Any(m) = c.r#match.as_ref().unwrap() else {
4487 panic!("Match::Any expected")
4488 };
4489 if let AnyVariants::Strings(kws) = &m.any {
4490 assert_eq!(kws.len(), 3);
4491 let expect: IndexSet<_, FnvBuildHasher> = ["Bourne", "Momoa", "Statham"]
4492 .into_iter()
4493 .map(|i| i.to_string())
4494 .collect();
4495 assert_eq!(kws, &expect);
4496 } else {
4497 panic!("AnyVariants::Keywords expected");
4498 }
4499 }
4500
4501 #[test]
4502 fn test_parse_match_any_mixed_types() {
4503 let query = r#"
4504 {
4505 "should": [
4506 {
4507 "key": "Jason",
4508 "match": {
4509 "any": [
4510 "Bourne",
4511 42
4512 ]
4513 }
4514 }
4515 ]
4516 }
4517 "#;
4518
4519 let result: Result<Filter, _> = serde_json::from_str(query);
4520 assert!(result.is_err());
4521 }
4522
4523 #[test]
4524 fn test_parse_nested_match_query() {
4525 let query = r#"
4526 {
4527 "key": "hello.nested",
4528 "match": { "value": 42 }
4529 }
4530 "#;
4531 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4532 assert_eq!(
4533 condition.r#match.unwrap(),
4534 Match::Value(MatchValue {
4535 value: ValueVariants::Integer(42)
4536 })
4537 );
4538
4539 let query = r#"
4540 {
4541 "key": "hello.nested",
4542 "match": { "value": true }
4543 }
4544 "#;
4545 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4546 assert_eq!(
4547 condition.r#match.unwrap(),
4548 Match::Value(MatchValue {
4549 value: ValueVariants::Bool(true)
4550 })
4551 );
4552
4553 let query = r#"
4554 {
4555 "key": "hello.nested",
4556 "match": { "value": "world" }
4557 }
4558 "#;
4559
4560 let condition: FieldCondition = serde_json::from_str(query).unwrap();
4561 assert_eq!(
4562 condition.r#match.unwrap(),
4563 Match::Value(MatchValue {
4564 value: ValueVariants::String("world".to_owned())
4565 })
4566 );
4567 }
4568
4569 #[test]
4570 fn test_parse_empty_query() {
4571 let query = r#"
4572 {
4573 "should": [
4574 {
4575 "is_empty" : {
4576 "key" : "Jason"
4577 }
4578 }
4579 ]
4580 }
4581 "#;
4582
4583 let filter: Filter = serde_json::from_str(query).unwrap();
4584 let should = filter.should.unwrap();
4585
4586 assert_eq!(should.len(), 1);
4587 let Some(Condition::IsEmpty(c)) = should.first() else {
4588 panic!("Condition::IsEmpty expected")
4589 };
4590
4591 assert_eq!(c.is_empty.key.to_string(), "Jason");
4592 }
4593
4594 #[test]
4595 fn test_parse_null_query() {
4596 let query = r#"
4597 {
4598 "should": [
4599 {
4600 "is_null" : {
4601 "key" : "Jason"
4602 }
4603 }
4604 ]
4605 }
4606 "#;
4607
4608 let filter: Filter = serde_json::from_str(query).unwrap();
4609 let should = filter.should.unwrap();
4610
4611 assert_eq!(should.len(), 1);
4612 let Some(Condition::IsNull(c)) = should.first() else {
4613 panic!("Condition::IsNull expected")
4614 };
4615
4616 assert_eq!(c.is_null.key.to_string(), "Jason");
4617 }
4618
4619 #[test]
4620 fn test_parse_nested_filter_query() {
4621 let query = r#"
4622 {
4623 "must": [
4624 {
4625 "nested": {
4626 "key": "country.cities",
4627 "filter": {
4628 "must": [
4629 {
4630 "key": "population",
4631 "range": {
4632 "gte": 8
4633 }
4634 },
4635 {
4636 "key": "sightseeing",
4637 "values_count": {
4638 "lt": 3
4639 }
4640 }
4641 ]
4642 }
4643 }
4644 }
4645 ]
4646 }
4647 "#;
4648 let filter: Filter = serde_json::from_str(query).unwrap();
4649 let musts = filter.must.unwrap();
4650 assert_eq!(musts.len(), 1);
4651 match musts.first() {
4652 Some(Condition::Nested(nested_condition)) => {
4653 assert_eq!(nested_condition.raw_key().to_string(), "country.cities");
4654 assert_eq!(nested_condition.array_key().to_string(), "country.cities[]");
4655 let nested_musts = nested_condition.filter().must.as_ref().unwrap();
4656 assert_eq!(nested_musts.len(), 2);
4657 let first_must = nested_musts.first().unwrap();
4658 match first_must {
4659 Condition::Field(c) => {
4660 assert_eq!(c.key.to_string(), "population");
4661 assert!(c.range.is_some());
4662 }
4663 _ => panic!("Condition::Field expected"),
4664 }
4665
4666 let second_must = nested_musts.get(1).unwrap();
4667 match second_must {
4668 Condition::Field(c) => {
4669 assert_eq!(c.key.to_string(), "sightseeing");
4670 assert!(c.values_count.is_some());
4671 }
4672 _ => panic!("Condition::Field expected"),
4673 }
4674 }
4675 o => panic!("Condition::Nested expected but got {o:?}"),
4676 };
4677 }
4678
4679 #[test]
4680 fn test_parse_single_nested_filter_query() {
4681 let query = r#"
4682 {
4683 "must": {
4684 "nested": {
4685 "key": "country.cities",
4686 "filter": {
4687 "must": {
4688 "key": "population",
4689 "range": {
4690 "gte": 8
4691 }
4692 }
4693 }
4694 }
4695 }
4696 }
4697 "#;
4698 let filter: Filter = serde_json::from_str(query).unwrap();
4699 let musts = filter.must.unwrap();
4700 assert_eq!(musts.len(), 1);
4701
4702 let first_must = musts.first().unwrap();
4703 let Condition::Nested(nested_condition) = first_must else {
4704 panic!("Condition::Nested expected but got {first_must:?}")
4705 };
4706
4707 assert_eq!(nested_condition.raw_key().to_string(), "country.cities");
4708 assert_eq!(nested_condition.array_key().to_string(), "country.cities[]");
4709
4710 let nested_must = nested_condition.filter().must.as_ref().unwrap();
4711 assert_eq!(nested_must.len(), 1);
4712
4713 let must = nested_must.first().unwrap();
4714 let Condition::Field(c) = must else {
4715 panic!("Condition::Field expected, got {must:?}")
4716 };
4717
4718 assert_eq!(c.key.to_string(), "population");
4719 assert!(c.range.is_some());
4720 }
4721
4722 #[test]
4723 fn test_payload_query_parse() {
4724 let query1 = r#"
4725 {
4726 "must": [
4727 {
4728 "key": "hello",
4729 "match": {
4730 "value": 42
4731 }
4732 },
4733 {
4734 "must_not": [
4735 {
4736 "has_id": [1, 2, 3, 4]
4737 },
4738 {
4739 "key": "geo_field",
4740 "geo_bounding_box": {
4741 "top_left": {
4742 "lon": 13.410146,
4743 "lat": 52.519289
4744 },
4745 "bottom_right": {
4746 "lon": 13.432683,
4747 "lat": 52.505582
4748 }
4749 }
4750 }
4751 ]
4752 }
4753 ]
4754 }
4755 "#;
4756
4757 let filter: Filter = serde_json::from_str(query1).unwrap();
4758 eprintln!("{filter:?}");
4759 let must = filter.must.unwrap();
4760 let _must_not = filter.must_not;
4761 assert_eq!(must.len(), 2);
4762 match must.get(1) {
4763 Some(Condition::Filter(f)) => {
4764 let must_not = &f.must_not;
4765 match must_not {
4766 Some(v) => assert_eq!(v.len(), 2),
4767 None => panic!("Filter expected"),
4768 }
4769 }
4770 _ => panic!("Condition expected"),
4771 }
4772 }
4773
4774 #[test]
4775 fn test_nested_payload_query_parse() {
4776 let query1 = r#"
4777 {
4778 "must": [
4779 {
4780 "key": "hello.nested.world",
4781 "match": {
4782 "value": 42
4783 }
4784 },
4785 {
4786 "key": "foo.nested.bar",
4787 "match": {
4788 "value": 1
4789 }
4790 }
4791 ]
4792 }
4793 "#;
4794
4795 let filter: Filter = serde_json::from_str(query1).unwrap();
4796 let must = filter.must.unwrap();
4797 assert_eq!(must.len(), 2);
4798 }
4799
4800 #[test]
4801 fn test_min_should_query_parse() {
4802 let query1 = r#"
4803 {
4804 "min_should": {
4805 "conditions": [
4806 {
4807 "key": "hello.nested.world",
4808 "match": {
4809 "value": 42
4810 }
4811 },
4812 {
4813 "key": "foo.nested.bar",
4814 "match": {
4815 "value": 1
4816 }
4817 }
4818 ],
4819 "min_count": 2
4820 }
4821 }
4822 "#;
4823
4824 let filter: Filter = serde_json::from_str(query1).unwrap();
4825 let min_should = filter.min_should.unwrap();
4826 assert_eq!(min_should.conditions.len(), 2);
4827 }
4828
4829 #[test]
4830 fn test_min_should_nested_parse() {
4831 let query1 = r#"
4832 {
4833 "must": [
4834 {
4835 "min_should": {
4836 "conditions": [
4837 {
4838 "key": "hello.nested.world",
4839 "match": {
4840 "value": 42
4841 }
4842 },
4843 {
4844 "key": "foo.nested.bar",
4845 "match": {
4846 "value": 1
4847 }
4848 }
4849 ],
4850 "min_count": 2
4851 }
4852 }
4853 ]
4854 }
4855 "#;
4856
4857 let filter: Filter = serde_json::from_str(query1).unwrap();
4858 let must = filter.must.unwrap();
4859 assert_eq!(must.len(), 1);
4860
4861 match must.first() {
4862 Some(Condition::Filter(f)) => {
4863 let min_should = &f.min_should;
4864 match min_should {
4865 Some(v) => assert_eq!(v.conditions.len(), 2),
4866 None => panic!("Filter expected"),
4867 }
4868 }
4869 _ => panic!("Condition expected"),
4870 }
4871 }
4872
4873 #[test]
4874 fn test_geo_validation() {
4875 let query1 = r#"
4876 {
4877 "must": [
4878 {
4879 "key": "geo_field",
4880 "geo_bounding_box": {
4881 "top_left": {
4882 "lon": 1113.410146,
4883 "lat": 52.519289
4884 },
4885 "bottom_right": {
4886 "lon": 13.432683,
4887 "lat": 52.505582
4888 }
4889 }
4890 }
4891 ]
4892 }
4893 "#;
4894 let filter: Result<Filter, _> = serde_json::from_str(query1);
4895 assert!(filter.is_err());
4896
4897 let query2 = r#"
4898 {
4899 "must": [
4900 {
4901 "key": "geo_field",
4902 "geo_polygon": {
4903 "exterior": {},
4904 "interiors": []
4905 }
4906 }
4907 ]
4908 }
4909 "#;
4910 let filter: Result<Filter, _> = serde_json::from_str(query2);
4911 assert!(filter.is_err());
4912
4913 let query3 = r#"
4914 {
4915 "must": [
4916 {
4917 "key": "geo_field",
4918 "geo_polygon": {
4919 "exterior":{
4920 "points": [
4921 {"lon": -12.0, "lat": -34.0},
4922 {"lon": 11.0, "lat": -22.0},
4923 {"lon": -32.0, "lat": -14.0}
4924 ]
4925 },
4926 "interiors": []
4927 }
4928 }
4929 ]
4930 }
4931 "#;
4932 let filter: Result<Filter, _> = serde_json::from_str(query3);
4933 assert!(filter.is_err());
4934
4935 let query4 = r#"
4936 {
4937 "must": [
4938 {
4939 "key": "geo_field",
4940 "geo_polygon": {
4941 "exterior": {
4942 "points": [
4943 {"lon": -12.0, "lat": -34.0},
4944 {"lon": 11.0, "lat": -22.0},
4945 {"lon": -32.0, "lat": -14.0},
4946 {"lon": -12.0, "lat": -34.0}
4947 ]
4948 },
4949 "interiors": []
4950 }
4951 }
4952 ]
4953 }
4954 "#;
4955 let filter: Result<Filter, _> = serde_json::from_str(query4);
4956 assert!(filter.is_ok());
4957
4958 let query5 = r#"
4959 {
4960 "must": [
4961 {
4962 "key": "geo_field",
4963 "geo_polygon": {
4964 "exterior": {
4965 "points": [
4966 {"lon": -12.0, "lat": -34.0},
4967 {"lon": 11.0, "lat": -22.0},
4968 {"lon": -32.0, "lat": -14.0},
4969 {"lon": -12.0, "lat": -34.0}
4970 ]
4971 },
4972 "interiors": [
4973 {
4974 "points": [
4975 {"lon": -12.0, "lat": -34.0},
4976 {"lon": 11.0, "lat": -22.0},
4977 {"lon": -32.0, "lat": -14.0}
4978 ]
4979 }
4980 ]
4981 }
4982 }
4983 ]
4984 }
4985 "#;
4986 let filter: Result<Filter, _> = serde_json::from_str(query5);
4987 assert!(filter.is_err());
4988
4989 let query6 = r#"
4990 {
4991 "must": [
4992 {
4993 "key": "geo_field",
4994 "geo_polygon": {
4995 "exterior": {
4996 "points": [
4997 {"lon": -12.0, "lat": -34.0},
4998 {"lon": 11.0, "lat": -22.0},
4999 {"lon": -32.0, "lat": -14.0},
5000 {"lon": -12.0, "lat": -34.0}
5001 ]
5002 },
5003 "interiors": [
5004 {
5005 "points": [
5006 {"lon": -12.0, "lat": -34.0},
5007 {"lon": 11.0, "lat": -22.0},
5008 {"lon": -32.0, "lat": -14.0},
5009 {"lon": -12.0, "lat": -34.0}
5010 ]
5011 }
5012 ]
5013 }
5014 }
5015 ]
5016 }
5017 "#;
5018 let filter: Result<Filter, _> = serde_json::from_str(query6);
5019 assert!(filter.is_ok());
5020 }
5021
5022 #[test]
5023 fn test_payload_parsing() {
5024 let ft = PayloadFieldSchema::FieldType(PayloadSchemaType::Keyword);
5025 let ft_json = serde_json::to_string(&ft).unwrap();
5026 eprintln!("ft_json = {ft_json:?}");
5027
5028 let ft = PayloadFieldSchema::FieldParams(PayloadSchemaParams::Text(Default::default()));
5029 let ft_json = serde_json::to_string(&ft).unwrap();
5030 eprintln!("ft_json = {ft_json:?}");
5031
5032 let query = r#""keyword""#;
5033 let field_type: PayloadSchemaType = serde_json::from_str(query).unwrap();
5034 eprintln!("field_type = {field_type:?}");
5035 }
5036
5037 #[test]
5038 fn merge_filters() {
5039 let condition1 = Condition::Field(FieldCondition::new_match(
5040 JsonPath::new("summary"),
5041 Match::new_text("Berlin"),
5042 ));
5043 let mut this = Filter::new_must(condition1.clone());
5044 this.should = Some(vec![condition1.clone()]);
5045
5046 let condition2 = Condition::Field(FieldCondition::new_match(
5047 JsonPath::new("city"),
5048 Match::new_value(ValueVariants::String("Osaka".into())),
5049 ));
5050 let other = Filter::new_must(condition2.clone());
5051
5052 let merged = this.merge(&other);
5053
5054 assert!(merged.must.is_some());
5055 assert_eq!(merged.must.as_ref().unwrap().len(), 2);
5056 assert!(merged.must_not.is_none());
5057 assert!(merged.should.is_some());
5058 assert_eq!(merged.should.as_ref().unwrap().len(), 1);
5059
5060 assert!(merged.must.as_ref().unwrap().contains(&condition1));
5061 assert!(merged.must.as_ref().unwrap().contains(&condition2));
5062 assert!(merged.should.as_ref().unwrap().contains(&condition1));
5063 }
5064
5065 #[test]
5066 fn test_payload_selector_include() {
5067 let payload = payload_json! {
5068 "a": 1,
5069 "b": {
5070 "c": 123,
5071 "e": {
5072 "f": [1,2,3],
5073 "g": 7,
5074 "h": "text",
5075 "i": [
5076 {
5077 "j": 1,
5078 "k": 2
5079
5080 },
5081 {
5082 "j": 3,
5083 "k": 4
5084 }
5085 ]
5086 }
5087 }
5088 };
5089
5090 let selector =
5092 PayloadSelector::new_include(vec![JsonPath::new("a"), JsonPath::new("b.e.f")]);
5093 let payload = selector.process(payload);
5094
5095 let expected = payload_json! {
5096 "a": 1,
5097 "b": {
5098 "e": {
5099 "f": [1,2,3],
5100 }
5101 }
5102 };
5103 assert_eq!(payload, expected);
5104 }
5105
5106 #[test]
5107 fn test_payload_selector_array_include() {
5108 let payload = payload_json! {
5109 "a": 1,
5110 "b": {
5111 "c": 123,
5112 "f": [1,2,3,4,5],
5113 }
5114 };
5115
5116 let selector = PayloadSelector::new_include(vec![JsonPath::new("a"), JsonPath::new("a")]);
5118 let payload = selector.process(payload);
5119
5120 let expected = payload_json! {
5121 "a": 1
5122 };
5123 assert_eq!(payload, expected);
5124
5125 let selector = PayloadSelector::new_include(vec![JsonPath::new("b.f[0]")]);
5127 let payload = selector.process(payload);
5128
5129 let expected = payload_json! {};
5131 assert_eq!(payload, expected);
5132 }
5133
5134 #[test]
5135 fn test_payload_selector_no_implicit_array_include() {
5136 let payload = payload_json! {
5137 "a": 1,
5138 "b": {
5139 "c": [
5140 {
5141 "d": 1,
5142 "e": 2
5143 },
5144 {
5145 "d": 3,
5146 "e": 4
5147 }
5148 ],
5149 }
5150 };
5151
5152 let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c")]);
5153 let selected_payload = selector.process(payload.clone());
5154
5155 let expected = payload_json! {
5156 "b": {
5157 "c": [
5158 {
5159 "d": 1,
5160 "e": 2
5161 },
5162 {
5163 "d": 3,
5164 "e": 4
5165 }
5166 ]
5167 }
5168 };
5169 assert_eq!(selected_payload, expected);
5170
5171 let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c[].d")]);
5173 let selected_payload = selector.process(payload.clone());
5174
5175 let expected = payload_json! {
5176 "b": {
5177 "c": [
5178 {"d": 1},
5179 {"d": 3}
5180 ]
5181 }
5182 };
5183 assert_eq!(selected_payload, expected);
5184
5185 let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c.d")]);
5187 let selected_payload = selector.process(payload);
5188
5189 let expected = payload_json! {
5190 "b": {
5191 "c": []
5192 }
5193 };
5194 assert_eq!(selected_payload, expected);
5195 }
5196
5197 #[test]
5198 fn test_payload_selector_exclude() {
5199 let payload = payload_json! {
5200 "a": 1,
5201 "b": {
5202 "c": 123,
5203 "e": {
5204 "f": [1,2,3],
5205 "g": 7,
5206 "h": "text",
5207 "i": [
5208 {
5209 "j": 1,
5210 "k": 2
5211
5212 },
5213 {
5214 "j": 3,
5215 "k": 4
5216 }
5217 ]
5218 }
5219 }
5220 };
5221
5222 let selector =
5224 PayloadSelector::new_exclude(vec![JsonPath::new("a"), JsonPath::new("b.e.f")]);
5225 let payload = selector.process(payload);
5226
5227 let expected = payload_json! {
5229 "b": {
5230 "c": 123,
5231 "e": {
5232 "g": 7,
5233 "h": "text",
5234 "i": [
5235 {
5236 "j": 1,
5237 "k": 2
5238
5239 },
5240 {
5241 "j": 3,
5242 "k": 4
5243 }
5244 ]
5245 }
5246 }
5247 };
5248 assert_eq!(payload, expected);
5249 }
5250
5251 #[test]
5252 fn test_payload_selector_array_exclude() {
5253 let payload = payload_json! {
5254 "a": 1,
5255 "b": {
5256 "c": 123,
5257 "f": [1,2,3,4,5],
5258 }
5259 };
5260
5261 let selector = PayloadSelector::new_exclude(vec![JsonPath::new("a"), JsonPath::new("a")]);
5263 let payload = selector.process(payload);
5264
5265 let expected = payload_json! {
5267 "b": {
5268 "c": 123,
5269 "f": [1,2,3,4,5],
5270 }
5271 };
5272 assert_eq!(payload, expected);
5273
5274 let selector = PayloadSelector::new_exclude(vec![JsonPath::new("b.f[0]")]);
5276
5277 let payload = selector.process(payload);
5278
5279 let expected = payload_json! {
5281 "b": {
5282 "c": 123,
5283 "f": [1,2,3,4,5],
5284 }
5285 };
5286 assert_eq!(payload, expected);
5287 }
5288
5289 #[test]
5290 fn test_extended_point_id_cbor_roundtrip() {
5291 let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
5292
5293 for point_id in [ExtendedPointId::Uuid(uuid), ExtendedPointId::NumId(42)] {
5294 let cbor_bytes = serde_cbor::to_vec(&point_id).unwrap();
5295 let deserialized: ExtendedPointId = serde_cbor::from_slice(&cbor_bytes).unwrap();
5296 assert_eq!(point_id, deserialized);
5297 }
5298 }
5299
5300 #[test]
5301 fn test_filter_with_match_and_has_id_uuid_cbor_roundtrip() {
5302 let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
5303 let filter = Filter {
5304 should: None,
5305 min_should: None,
5306 must: Some(vec![Condition::Field(FieldCondition::new_match(
5307 crate::segment::json_path::JsonPath::new("org_id"),
5308 Match::new_value(ValueVariants::String("test_org".to_string())),
5309 ))]),
5310 must_not: Some(vec![Condition::HasId(HasIdCondition {
5311 has_id: [ExtendedPointId::Uuid(uuid)].into_iter().collect(),
5312 })]),
5313 };
5314
5315 let cbor_bytes = serde_cbor::to_vec(&filter).unwrap();
5316 let deserialized: Filter = serde_cbor::from_slice(&cbor_bytes).unwrap();
5317 assert_eq!(filter, deserialized);
5318 }
5319}
5320
5321fn shard_key_string_example() -> String {
5322 "region_1".to_string()
5323}
5324
5325fn shard_key_number_example() -> u64 {
5326 12
5327}
5328
5329#[derive(Deserialize, Serialize, JsonSchema, Debug, Clone, PartialEq, Eq, Hash)]
5330#[serde(untagged)]
5331pub enum ShardKey {
5332 #[schemars(
5333 schema_with = "String::json_schema",
5334 example = "shard_key_string_example"
5335 )]
5336 Keyword(EcoString),
5337 #[schemars(example = "shard_key_number_example")]
5338
5339 Number(u64),
5340}
5341
5342impl From<String> for ShardKey {
5343 fn from(s: String) -> Self {
5344 ShardKey::Keyword(EcoString::from(s))
5345 }
5346}
5347
5348impl From<EcoString> for ShardKey {
5349 fn from(s: EcoString) -> Self {
5350 ShardKey::Keyword(s)
5351 }
5352}
5353
5354impl From<&str> for ShardKey {
5355 fn from(s: &str) -> Self {
5356 ShardKey::Keyword(EcoString::from(s))
5357 }
5358}
5359
5360impl From<u64> for ShardKey {
5361 fn from(n: u64) -> Self {
5362 ShardKey::Number(n)
5363 }
5364}
5365
5366impl Display for ShardKey {
5367 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
5368 match self {
5369 ShardKey::Keyword(keyword) => write!(f, "\"{keyword}\""),
5370 ShardKey::Number(number) => write!(f, "{number}"),
5371 }
5372 }
5373}