qdrant_edge/segment/
types.rs

1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::collections::{BTreeMap, HashMap, HashSet};
4use std::fmt::{self, Display, Formatter};
5use std::hash::{self, Hash, Hasher};
6use std::mem;
7use std::ops::Deref;
8use std::rc::Rc;
9use std::str::FromStr;
10use std::sync::Arc;
11
12use ahash::AHashSet;
13use bytemuck::{Pod, Zeroable};
14use crate::common::stable_hash::StableHash;
15use crate::common::types::{PointOffsetType, ScoreType};
16use ecow::EcoString;
17use fnv::FnvBuildHasher;
18use geo::{Contains, Coord, Distance as GeoDistance, Haversine, LineString, Point, Polygon};
19use indexmap::IndexSet;
20use itertools::Itertools;
21use num_derive::FromPrimitive;
22use ordered_float::OrderedFloat;
23use schemars::JsonSchema;
24use serde::{Deserialize, Deserializer, Serialize};
25use serde_json::{Map, Value};
26use strum::{EnumIter, EnumString};
27use uuid::Uuid;
28use validator::{Validate, ValidationError, ValidationErrors};
29use zerocopy::native_endian::U64;
30
31use crate::segment::common::anonymize::Anonymize;
32use crate::segment::common::operation_error::{OperationError, OperationResult};
33use crate::segment::common::utils::{self, MaybeOneOrMany, MultiValue};
34use crate::segment::data_types::index::{
35    BoolIndexParams, DatetimeIndexParams, FloatIndexParams, GeoIndexParams, IntegerIndexParams,
36    KeywordIndexParams, TextIndexParams, UuidIndexParams,
37};
38use crate::segment::data_types::modifier::Modifier;
39use crate::segment::data_types::order_by::OrderValue;
40use crate::segment::data_types::primitive::PrimitiveVectorElement;
41use crate::segment::data_types::tiny_map::TinyMap;
42use crate::segment::data_types::vectors::{DenseVector, VectorStructInternal};
43use crate::segment::index::field_index::CardinalityEstimation;
44use crate::segment::index::sparse_index::sparse_index_config::SparseIndexConfig;
45use crate::segment::json_path::JsonPath;
46use crate::segment::spaces::metric::{Metric, MetricPostProcessing};
47use crate::segment::spaces::simple::{CosineMetric, DotProductMetric, EuclidMetric, ManhattanMetric};
48use crate::segment::types::utils::unordered_hash_unique;
49use crate::segment::utils::maybe_arc::MaybeArc;
50
51pub type PayloadKeyType = JsonPath;
52pub type PayloadKeyTypeRef<'a> = &'a JsonPath;
53/// Sequential number of modification, applied to segment
54pub type SeqNumberType = u64;
55/// Type of float point payload
56pub type FloatPayloadType = f64;
57/// Type of integer point payload
58pub type IntPayloadType = i64;
59/// Type of datetime point payload
60pub type DateTimePayloadType = DateTimeWrapper;
61/// Type of Uuid point payload
62pub type UuidPayloadType = Uuid;
63/// Type of Uuid point payload key
64pub type UuidIntType = u128;
65/// Name of a vector
66pub type VectorName = str;
67/// Name of a vector (owned variant)
68pub type VectorNameBuf = String;
69
70/// Wraps `DateTime<Utc>` to allow more flexible deserialization
71#[derive(Clone, Copy, Serialize, JsonSchema, Debug, PartialEq, Eq, PartialOrd, Hash)]
72#[serde(transparent)]
73pub struct DateTimeWrapper(pub chrono::DateTime<chrono::Utc>);
74
75impl DateTimeWrapper {
76    /// Qdrant's representation of datetime as timestamp is an i64 of microseconds
77    pub fn timestamp(&self) -> i64 {
78        self.0.timestamp_micros()
79    }
80
81    pub fn from_timestamp(ts: i64) -> Option<Self> {
82        Some(Self(chrono::DateTime::from_timestamp_micros(ts)?))
83    }
84}
85
86impl<'de> Deserialize<'de> for DateTimePayloadType {
87    /// Parses RFC3339 datetime strings used in REST/JSON `datetime_range` filters.
88    /// Returns a clear user-facing error when the format is invalid.
89    /// Example accepted value: `2014-01-01T00:00:00Z`.
90    ///
91    /// Binary formats (CBOR/MessagePack/WAL) also serialize as RFC3339 strings, so we reuse
92    /// the same parsing path everywhere.
93    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
94    where
95        D: Deserializer<'de>,
96    {
97        let str_datetime: Cow<'de, str> = Cow::deserialize(deserializer)?;
98
99        match DateTimePayloadType::from_str(str_datetime.as_ref()) {
100            Ok(datetime) => Ok(datetime),
101            Err(_) => Err(serde::de::Error::custom(format!(
102                "'{str_datetime}' does not match accepted datetime format (RFC3339). Example: 2014-01-01T00:00:00Z"
103            ))),
104        }
105    }
106}
107
108impl FromStr for DateTimePayloadType {
109    type Err = chrono::ParseError;
110
111    fn from_str(s: &str) -> Result<Self, Self::Err> {
112        // Attempt to parse the input string in RFC 3339 format
113        if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(s)
114            // Attempt to parse default to-string format
115            .or_else(|_| chrono::DateTime::from_str(s))
116            // Attempt to parse the input string in the specified formats:
117            // - YYYY-MM-DD'T'HH:MM:SS-HHMM (timezone without colon)
118            // - YYYY-MM-DD HH:MM:SS-HHMM (timezone without colon)
119            .or_else(|_| chrono::DateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f%#z"))
120            .or_else(|_| chrono::DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%#z"))
121            .map(|dt| chrono::DateTime::<chrono::Utc>::from(dt).into())
122        {
123            return Ok(datetime);
124        }
125
126        // Attempt to parse the input string in the specified formats:
127        // - YYYY-MM-DD'T'HH:MM:SS (without timezone or Z)
128        // - YYYY-MM-DD HH:MM:SS (without timezone or Z)
129        // - YYYY-MM-DD HH:MM
130        // - YYYY-MM-DD
131        // See: <https://github.com/qdrant/qdrant/issues/3529>
132        let datetime = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f")
133            .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f"))
134            .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M"))
135            .or_else(|_| chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").map(Into::into))?;
136
137        // Convert the parsed NaiveDateTime to a DateTime<Utc>
138        let datetime_utc = datetime.and_utc().into();
139        Ok(datetime_utc)
140    }
141}
142
143impl Display for DateTimePayloadType {
144    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145        write!(f, "{}", self.0)
146    }
147}
148
149impl From<chrono::DateTime<chrono::Utc>> for DateTimePayloadType {
150    fn from(dt: chrono::DateTime<chrono::Utc>) -> Self {
151        DateTimeWrapper(dt)
152    }
153}
154
155fn id_num_example() -> u64 {
156    42
157}
158
159fn id_uuid_example() -> String {
160    "550e8400-e29b-41d4-a716-446655440000".to_string()
161}
162
163/// Type, used for specifying point ID in user interface
164#[derive(Debug, Serialize, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, JsonSchema)]
165#[serde(untagged)]
166pub enum ExtendedPointId {
167    #[schemars(example = "id_num_example")]
168    NumId(u64),
169    #[schemars(example = "id_uuid_example")]
170    Uuid(Uuid),
171}
172
173impl StableHash for ExtendedPointId {
174    fn stable_hash<W: FnMut(&[u8])>(&self, write: &mut W) {
175        match self {
176            ExtendedPointId::NumId(num) => {
177                0u64.stable_hash(write); // discriminant for NumId
178                num.stable_hash(write);
179            }
180            ExtendedPointId::Uuid(uuid) => {
181                1u64.stable_hash(write); // discriminant for Uuid
182
183                uuid.as_bytes().len().stable_hash(write); // compatibility with uuid <= v1.16.0
184                write(uuid.as_bytes());
185            }
186        }
187    }
188}
189
190impl ExtendedPointId {
191    #[cfg(any(test, feature = "testing"))]
192    pub fn as_u64(&self) -> u64 {
193        match self {
194            ExtendedPointId::NumId(num) => *num,
195            ExtendedPointId::Uuid(_) => panic!("Cannot convert UUID to u64"),
196        }
197    }
198
199    pub fn is_num_id(&self) -> bool {
200        matches!(self, ExtendedPointId::NumId(..))
201    }
202
203    pub fn is_uuid(&self) -> bool {
204        matches!(self, ExtendedPointId::Uuid(..))
205    }
206}
207
208impl std::fmt::Display for ExtendedPointId {
209    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
210        match self {
211            ExtendedPointId::NumId(idx) => write!(f, "{idx}"),
212            ExtendedPointId::Uuid(uuid) => write!(f, "{uuid}"),
213        }
214    }
215}
216
217impl From<u64> for ExtendedPointId {
218    fn from(idx: u64) -> Self {
219        ExtendedPointId::NumId(idx)
220    }
221}
222
223impl FromStr for ExtendedPointId {
224    type Err = ();
225
226    fn from_str(s: &str) -> Result<Self, Self::Err> {
227        let try_num: Result<u64, _> = s.parse();
228        if let Ok(num) = try_num {
229            return Ok(Self::NumId(num));
230        }
231        let try_uuid = Uuid::from_str(s);
232        if let Ok(uuid) = try_uuid {
233            return Ok(Self::Uuid(uuid));
234        }
235        Err(())
236    }
237}
238
239impl<'de> serde::Deserialize<'de> for ExtendedPointId {
240    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
241    where
242        D: serde::Deserializer<'de>,
243    {
244        let value = serde_value::Value::deserialize(deserializer)?;
245
246        if let Ok(num) = value.clone().deserialize_into() {
247            return Ok(ExtendedPointId::NumId(num));
248        }
249
250        if let Ok(uuid) = value.clone().deserialize_into() {
251            return Ok(ExtendedPointId::Uuid(uuid));
252        }
253
254        let value = crate::segment::utils::fmt::SerdeValue(&value);
255
256        Err(serde::de::Error::custom(format!(
257            "value {value} is not a valid point ID, \
258                 valid values are either an unsigned integer or a UUID",
259        )))
260    }
261}
262
263/// Type of point index across all segments
264pub type PointIdType = ExtendedPointId;
265
266/// Compact representation of [`ExtendedPointId`].
267/// Unlike [`ExtendedPointId`], this type is 17 bytes long vs 24 bytes.
268#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
269pub enum CompactExtendedPointId {
270    NumId(U64),
271    Uuid(Uuid),
272}
273
274impl From<ExtendedPointId> for CompactExtendedPointId {
275    fn from(id: ExtendedPointId) -> Self {
276        match id {
277            ExtendedPointId::NumId(num) => CompactExtendedPointId::NumId(U64::new(num)),
278            ExtendedPointId::Uuid(uuid) => CompactExtendedPointId::Uuid(uuid),
279        }
280    }
281}
282
283impl From<CompactExtendedPointId> for ExtendedPointId {
284    fn from(id: CompactExtendedPointId) -> Self {
285        match id {
286            CompactExtendedPointId::NumId(num) => ExtendedPointId::NumId(num.get()),
287            CompactExtendedPointId::Uuid(uuid) => ExtendedPointId::Uuid(uuid),
288        }
289    }
290}
291
292/// Type of internal tags, build from payload
293#[derive(
294    Debug,
295    Deserialize,
296    Serialize,
297    JsonSchema,
298    
299    Clone,
300    Copy,
301    FromPrimitive,
302    PartialEq,
303    Eq,
304    Hash,
305    EnumString,
306    EnumIter,
307)]
308/// Distance function types used to compare vectors
309pub enum Distance {
310    // <https://en.wikipedia.org/wiki/Cosine_similarity>
311    Cosine,
312    // <https://en.wikipedia.org/wiki/Euclidean_distance>
313    Euclid,
314    // <https://en.wikipedia.org/wiki/Dot_product>
315    Dot,
316    // <https://simple.wikipedia.org/wiki/Manhattan_distance>
317    Manhattan,
318}
319
320impl Distance {
321    pub fn postprocess_score(&self, score: ScoreType) -> ScoreType {
322        match self {
323            Distance::Cosine => CosineMetric::postprocess(score),
324            Distance::Euclid => EuclidMetric::postprocess(score),
325            Distance::Dot => DotProductMetric::postprocess(score),
326            Distance::Manhattan => ManhattanMetric::postprocess(score),
327        }
328    }
329
330    pub fn preprocess_vector<T: PrimitiveVectorElement>(&self, vector: DenseVector) -> DenseVector
331    where
332        CosineMetric: Metric<T>,
333        EuclidMetric: Metric<T>,
334        DotProductMetric: Metric<T>,
335        ManhattanMetric: Metric<T>,
336    {
337        match self {
338            Distance::Cosine => CosineMetric::preprocess(vector),
339            Distance::Euclid => EuclidMetric::preprocess(vector),
340            Distance::Dot => DotProductMetric::preprocess(vector),
341            Distance::Manhattan => ManhattanMetric::preprocess(vector),
342        }
343    }
344
345    pub fn distance_order(&self) -> Order {
346        match self {
347            Distance::Cosine | Distance::Dot => Order::LargeBetter,
348            Distance::Euclid | Distance::Manhattan => Order::SmallBetter,
349        }
350    }
351
352    pub fn is_ordered(&self, left: ScoreType, right: ScoreType) -> bool {
353        match self.distance_order() {
354            Order::LargeBetter => left >= right,
355            Order::SmallBetter => left <= right,
356        }
357    }
358
359    /// Checks if score satisfies threshold condition
360    pub fn check_threshold(&self, score: ScoreType, threshold: ScoreType) -> bool {
361        match self.distance_order() {
362            Order::LargeBetter => score > threshold,
363            Order::SmallBetter => score < threshold,
364        }
365    }
366}
367
368#[derive(Debug, PartialEq, Clone, Copy)]
369pub enum Order {
370    LargeBetter,
371    SmallBetter,
372}
373
374/// Search result
375#[derive(Clone, Debug)]
376pub struct ScoredPoint {
377    /// Point id
378    pub id: PointIdType,
379    /// Point version
380    pub version: SeqNumberType,
381    /// Points vector distance to the query vector
382    pub score: ScoreType,
383    /// Payload - values assigned to the point
384    pub payload: Option<Payload>,
385    /// Vector of the point
386    pub vector: Option<VectorStructInternal>,
387    /// Shard Key
388    pub shard_key: Option<ShardKey>,
389    /// Order-by value
390    pub order_value: Option<OrderValue>,
391}
392
393impl Eq for ScoredPoint {}
394
395impl Ord for ScoredPoint {
396    /// Compare two scored points by score, unless they have `order_value`, in that case compare by `order_value`.
397    fn cmp(&self, other: &Self) -> Ordering {
398        match (&self.order_value, &other.order_value) {
399            (None, None) => OrderedFloat(self.score).cmp(&OrderedFloat(other.score)),
400            (Some(_), None) => Ordering::Greater,
401            (None, Some(_)) => Ordering::Less,
402            (Some(self_order), Some(other_order)) => self_order.cmp(other_order),
403        }
404    }
405}
406
407impl PartialOrd for ScoredPoint {
408    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
409        Some(self.cmp(other))
410    }
411}
412
413impl PartialEq for ScoredPoint {
414    fn eq(&self, other: &Self) -> bool {
415        (self.id, &self.score) == (other.id, &other.score)
416    }
417}
418
419/// Type of segment
420#[derive(Debug, Serialize, JsonSchema,  Clone, Copy, PartialEq, Eq)]
421#[serde(rename_all = "snake_case")]
422pub enum SegmentType {
423    // There are no index built for the segment, all operations are available
424    Plain,
425    // Segment with some sort of index built. Optimized for search, appending new points will require reindexing
426    Indexed,
427    // Some index which you better don't touch
428    Special,
429}
430
431/// Display payload field type & index information
432#[derive(Debug, Serialize, JsonSchema,  Clone, PartialEq, Eq)]
433#[serde(rename_all = "snake_case")]
434pub struct PayloadIndexInfo {
435    pub data_type: PayloadSchemaType,
436    #[serde(skip_serializing_if = "Option::is_none")]
437    pub params: Option<PayloadSchemaParams>,
438    /// Number of points indexed with this index
439    pub points: usize,
440}
441
442impl PayloadIndexInfo {
443    pub fn new(field_type: PayloadFieldSchema, points_count: usize) -> Self {
444        match field_type {
445            PayloadFieldSchema::FieldType(data_type) => PayloadIndexInfo {
446                data_type,
447                params: None,
448                points: points_count,
449            },
450            PayloadFieldSchema::FieldParams(schema_params) => PayloadIndexInfo {
451                data_type: schema_params.kind(),
452                params: Some(schema_params),
453                points: points_count,
454            },
455        }
456    }
457}
458
459#[derive(Debug, Serialize, JsonSchema,  Clone, PartialEq, Eq)]
460#[serde(rename_all = "snake_case")]
461pub struct VectorDataInfo {
462    pub num_vectors: usize,
463    pub num_indexed_vectors: usize,
464    pub num_deleted_vectors: usize,
465}
466
467/// Aggregated information about segment
468#[derive(Debug, Serialize, JsonSchema,  Clone, PartialEq, Eq)]
469#[serde(rename_all = "snake_case")]
470pub struct SegmentInfo {
471    pub uuid: Uuid,
472    pub segment_type: SegmentType,
473    pub num_vectors: usize,
474    pub num_points: usize,
475    pub num_deferred_points: Option<usize>,
476    pub num_deleted_deferred_points: Option<usize>,
477    pub num_indexed_vectors: usize,
478    pub num_deleted_vectors: usize,
479    /// An ESTIMATION of effective amount of bytes used for vectors
480    /// Do NOT rely on this number unless you know what you are doing
481    pub vectors_size_bytes: usize,
482    /// An estimation of the effective amount of bytes used for payloads
483    pub payloads_size_bytes: usize,
484    pub ram_usage_bytes: usize,
485    pub disk_usage_bytes: usize,
486    pub is_appendable: bool,
487    pub index_schema: HashMap<PayloadKeyType, PayloadIndexInfo>,
488    pub vector_data: HashMap<String, VectorDataInfo>,
489    /// Internal ID from which points are deferred (hidden from reads).
490    /// Only set for appendable segments.
491    #[serde(skip_serializing_if = "Option::is_none")]
492    
493    pub deferred_internal_id: Option<PointOffsetType>,
494}
495
496#[derive(Debug, Default)]
497pub struct SizeStats {
498    pub num_vectors: usize,
499    pub num_vectors_by_name: TinyMap<VectorNameBuf, usize>,
500    pub vectors_size_bytes: usize,
501    pub payloads_size_bytes: usize,
502    pub num_points: usize,
503}
504
505/// Additional parameters of the search
506#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, Copy, PartialEq, Default)]
507#[serde(rename_all = "snake_case")]
508pub struct QuantizationSearchParams {
509    /// If true, quantized vectors are ignored. Default is false.
510    #[serde(default = "default_quantization_ignore_value")]
511    pub ignore: bool,
512
513    /// If true, use original vectors to re-score top-k results.
514    /// Might require more time in case if original vectors are stored on disk.
515    /// If not set, qdrant decides automatically apply rescoring or not.
516    #[serde(default)]
517    #[serde(skip_serializing_if = "Option::is_none")]
518    pub rescore: Option<bool>,
519
520    /// Oversampling factor for quantization. Default is 1.0.
521    ///
522    /// Defines how many extra vectors should be preselected using quantized index,
523    /// and then re-scored using original vectors.
524    ///
525    /// For example, if `oversampling` is 2.4 and `limit` is 100, then 240 vectors will be preselected using quantized index,
526    /// and then top-100 will be returned after re-scoring.
527    #[serde(default = "default_quantization_oversampling_value")]
528    #[validate(range(min = 1.0))]
529    #[serde(skip_serializing_if = "Option::is_none")]
530    pub oversampling: Option<f64>,
531}
532
533impl Hash for QuantizationSearchParams {
534    fn hash<H: Hasher>(&self, state: &mut H) {
535        let Self {
536            ignore,
537            rescore,
538            oversampling,
539        } = self;
540        ignore.hash(state);
541        rescore.hash(state);
542        oversampling.map(OrderedFloat).hash(state);
543    }
544}
545
546pub const fn default_quantization_ignore_value() -> bool {
547    false
548}
549
550pub const fn default_quantization_oversampling_value() -> Option<f64> {
551    None
552}
553
554/// Default value for [`AcornSearchParams::max_selectivity`].
555///
556/// After change, update docs for GRPC and REST API.
557pub const ACORN_MAX_SELECTIVITY_DEFAULT: f64 = 0.4;
558
559/// ACORN-related search parameters
560#[derive(
561    Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, Copy, PartialEq, Default, Hash,
562)]
563#[serde(rename_all = "snake_case")]
564pub struct AcornSearchParams {
565    /// If true, then ACORN may be used for the HNSW search based on filters
566    /// selectivity.
567
568    /// Improves search recall for searches with multiple low-selectivity
569    /// payload filters, at cost of performance.
570    #[serde(default)]
571    pub enable: bool,
572
573    /// Maximum selectivity of filters to enable ACORN.
574    ///
575    /// If estimated filters selectivity is higher than this value,
576    /// ACORN will not be used. Selectivity is estimated as:
577    /// `estimated number of points satisfying the filters / total number of points`.
578    ///
579    /// 0.0 for never, 1.0 for always. Default is 0.4.
580    #[serde(default)]
581    #[serde(skip_serializing_if = "Option::is_none")]
582    #[validate(range(min = 0.0, max = 1.0))]
583    pub max_selectivity: Option<OrderedFloat<f64>>,
584}
585
586/// Additional parameters of the search
587#[derive(
588    Debug, Deserialize, Serialize, JsonSchema, Validate, Copy, Clone, PartialEq, Default, Hash,
589)]
590#[serde(rename_all = "snake_case")]
591pub struct SearchParams {
592    /// Params relevant to HNSW index
593    /// Size of the beam in a beam-search. Larger the value - more accurate the result, more time required for search.
594    #[serde(skip_serializing_if = "Option::is_none")]
595    pub hnsw_ef: Option<usize>,
596
597    /// Search without approximation. If set to true, search may run long but with exact results.
598    #[serde(default)]
599    pub exact: bool,
600
601    /// Quantization params
602    #[serde(default)]
603    #[validate(nested)]
604    #[serde(skip_serializing_if = "Option::is_none")]
605    pub quantization: Option<QuantizationSearchParams>,
606
607    /// If enabled, the engine will only perform search among indexed or small segments.
608    /// Using this option prevents slow searches in case of delayed index, but does not
609    /// guarantee that all uploaded vectors will be included in search results
610    #[serde(default)]
611    pub indexed_only: bool,
612
613    /// ACORN search params
614    #[serde(default)]
615    #[validate(nested)]
616    #[serde(skip_serializing_if = "Option::is_none")]
617    pub acorn: Option<AcornSearchParams>,
618}
619
620/// Configuration for vectors.
621#[derive(Debug, Deserialize, Validate, Clone, PartialEq, Eq)]
622pub struct VectorsConfigDefaults {
623    #[serde(default)]
624    pub on_disk: Option<bool>,
625}
626
627/// Vector index configuration
628#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Eq)]
629#[serde(rename_all = "snake_case")]
630#[serde(tag = "type", content = "options")]
631pub enum Indexes {
632    /// Do not use any index, scan whole vector collection during search.
633    /// Guarantee 100% precision, but may be time consuming on large collections.
634    Plain {},
635    /// Use filterable HNSW index for approximate search. Is very fast even on a very huge collections,
636    /// but require additional space to store index and additional time to build it.
637    Hnsw(HnswConfig),
638}
639
640impl Indexes {
641    pub fn is_indexed(&self) -> bool {
642        match self {
643            Indexes::Plain {} => false,
644            Indexes::Hnsw(_) => true,
645        }
646    }
647
648    pub fn is_on_disk(&self) -> bool {
649        match self {
650            Indexes::Plain {} => false,
651            Indexes::Hnsw(config) => config.on_disk.unwrap_or_default(),
652        }
653    }
654}
655
656/// Config of HNSW index
657#[derive(
658    Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Validate, 
659)]
660#[serde(rename_all = "snake_case")]
661
662pub struct HnswConfig {
663    /// Number of edges per node in the index graph. Larger the value - more accurate the search, more space required.
664    pub m: usize,
665    /// Number of neighbours to consider during the index building. Larger the value - more accurate the search, more time required to build index.
666    #[validate(range(min = 4))]
667    pub ef_construct: usize,
668    /// Minimal size threshold (in KiloBytes) below which full-scan is preferred over HNSW search.
669    /// This measures the total size of vectors being queried against.
670    /// When the maximum estimated amount of points that a condition satisfies is smaller than
671    /// `full_scan_threshold_kb`, the query planner will use full-scan search instead of HNSW index
672    /// traversal for better performance.
673    /// Note: 1Kb = 1 vector of size 256
674    #[serde(alias = "full_scan_threshold_kb")]
675    pub full_scan_threshold: usize,
676    /// Number of parallel threads used for background index building.
677    /// If 0 - automatically select from 8 to 16.
678    /// Best to keep between 8 and 16 to prevent likelihood of slow building or broken/inefficient HNSW graphs.
679    /// On small CPUs, less threads are used.
680    #[serde(default = "default_max_indexing_threads")]
681    pub max_indexing_threads: usize,
682    /// Store HNSW index on disk. If set to false, index will be stored in RAM. Default: false
683    #[serde(default, skip_serializing_if = "Option::is_none")] // Better backward compatibility
684    pub on_disk: Option<bool>,
685    /// Custom M param for hnsw graph built for payload index. If not set, default M will be used.
686    #[serde(default, skip_serializing_if = "Option::is_none")] // Better backward compatibility
687    pub payload_m: Option<usize>,
688    /// Store copies of original and quantized vectors within the HNSW index file. Default: false.
689    /// Enabling this option will trade the search speed for disk usage by reducing amount of
690    /// random seeks during the search.
691    /// Requires quantized vectors to be enabled. Multi-vectors are not supported.
692    #[serde(default, skip_serializing_if = "Option::is_none")]
693    pub inline_storage: Option<bool>,
694}
695
696impl HnswConfig {
697    /// Detect configuration mismatch against `other` that requires rebuilding
698    ///
699    /// Returns true only if both conditions are met:
700    /// - this configuration does not match `other`
701    /// - to effectively change the configuration, a HNSW rebuild is required
702    ///
703    /// For example, a change in `max_indexing_threads` will not require rebuilding because it
704    /// doesn't affect the final index, and thus this would return false.
705    pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
706        let HnswConfig {
707            m,
708            ef_construct,
709            full_scan_threshold,
710            max_indexing_threads: _,
711            payload_m,
712            on_disk,
713            inline_storage,
714        } = *self;
715
716        m != other.m
717            || ef_construct != other.ef_construct
718            || full_scan_threshold != other.full_scan_threshold
719            || payload_m != other.payload_m
720            // Data on disk is the same, we have a unit test for that. We can eventually optimize
721            // this to just reload the collection rather than optimizing it again as a whole just
722            // to flip this flag
723            || on_disk != other.on_disk
724            || inline_storage != other.inline_storage
725    }
726}
727
728#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate,  Clone)]
729#[serde(rename_all = "snake_case", default)]
730
731pub struct HnswGlobalConfig {
732    /// Enable HNSW healing if the ratio of missing points is no more than this value.
733    /// To disable healing completely, set this value to `0.0`.
734    #[validate(range(min = 0.0, max = 1.0))]
735    pub healing_threshold: f64,
736}
737
738impl Default for HnswGlobalConfig {
739    fn default() -> Self {
740        Self {
741            healing_threshold: 0.3,
742        }
743    }
744}
745
746const fn default_max_indexing_threads() -> usize {
747    0
748}
749
750#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq, Hash)]
751#[serde(rename_all = "lowercase")]
752pub enum CompressionRatio {
753    X4,
754    X8,
755    X16,
756    X32,
757    X64,
758}
759
760#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema)]
761#[serde(rename_all = "lowercase")]
762pub enum ScalarType {
763    #[default]
764    Int8,
765}
766
767#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate)]
768#[serde(rename_all = "snake_case")]
769pub struct ScalarQuantizationConfig {
770    /// Type of quantization to use
771    /// If `int8` - 8 bit quantization will be used
772    pub r#type: ScalarType,
773    /// Quantile for quantization. Expected value range in [0.5, 1.0]. If not set - use the whole range of values
774    #[serde(skip_serializing_if = "Option::is_none")]
775    #[validate(range(min = 0.5, max = 1.0))]
776    pub quantile: Option<f32>,
777    /// If true - quantized vectors always will be stored in RAM, ignoring the config of main storage
778    #[serde(skip_serializing_if = "Option::is_none")]
779    pub always_ram: Option<bool>,
780}
781
782impl ScalarQuantizationConfig {
783    /// Detect configuration mismatch against `other` that requires rebuilding
784    ///
785    /// Returns true only if both conditions are met:
786    /// - this configuration does not match `other`
787    /// - to effectively change the configuration, a quantization rebuild is required
788    pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
789        self != other
790    }
791}
792
793#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
794pub struct ScalarQuantization {
795    #[validate(nested)]
796    pub scalar: ScalarQuantizationConfig,
797}
798
799#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
800#[serde(rename_all = "snake_case")]
801pub struct ProductQuantizationConfig {
802    pub compression: CompressionRatio,
803
804    #[serde(skip_serializing_if = "Option::is_none")]
805    pub always_ram: Option<bool>,
806}
807
808impl ProductQuantizationConfig {
809    /// Detect configuration mismatch against `other` that requires rebuilding
810    ///
811    /// Returns true only if both conditions are met:
812    /// - this configuration does not match `other`
813    /// - to effectively change the configuration, a quantization rebuild is required
814    pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
815        self != other
816    }
817}
818
819#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
820pub struct ProductQuantization {
821    #[validate(nested)]
822    pub product: ProductQuantizationConfig,
823}
824
825impl Hash for ScalarQuantizationConfig {
826    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
827        self.always_ram.hash(state);
828        self.r#type.hash(state);
829    }
830}
831
832impl Eq for ScalarQuantizationConfig {}
833
834#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Copy, PartialEq, Eq, Hash, Default)]
835#[serde(rename_all = "snake_case")]
836pub enum BinaryQuantizationEncoding {
837    #[default]
838    OneBit,
839    TwoBits,
840    OneAndHalfBits,
841}
842
843impl BinaryQuantizationEncoding {
844    pub fn is_one_bit(&self) -> bool {
845        matches!(self, BinaryQuantizationEncoding::OneBit)
846    }
847}
848
849#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
850#[serde(rename_all = "snake_case")]
851pub struct BinaryQuantizationConfig {
852    #[serde(skip_serializing_if = "Option::is_none")]
853    pub always_ram: Option<bool>,
854    #[serde(default)]
855    #[serde(skip_serializing_if = "Option::is_none")]
856    pub encoding: Option<BinaryQuantizationEncoding>,
857
858    /// Asymmetric quantization configuration allows a query to have different quantization than stored vectors.
859    /// It can increase the accuracy of search at the cost of performance.
860    #[serde(default)]
861    #[serde(skip_serializing_if = "Option::is_none")]
862    pub query_encoding: Option<BinaryQuantizationQueryEncoding>,
863}
864
865#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, Validate)]
866pub struct BinaryQuantization {
867    #[validate(nested)]
868    pub binary: BinaryQuantizationConfig,
869}
870
871#[derive(Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema, )]
872#[serde(untagged, rename_all = "snake_case")]
873
874pub enum QuantizationConfig {
875    Scalar(ScalarQuantization),
876    Product(ProductQuantization),
877    Binary(BinaryQuantization),
878}
879
880impl QuantizationConfig {
881    /// If appendable_quantization feature is enabled and config supports appendable segments,
882    /// returns the config for use in appendable segment; otherwise `None`.
883    pub fn for_appendable_segment(opt: Option<&Self>) -> Option<Self> {
884        let appendable = crate::common::flags::feature_flags().appendable_quantization;
885        opt.filter(|q| appendable && q.supports_appendable())
886            .cloned()
887    }
888
889    /// Detect configuration mismatch against `other` that requires rebuilding
890    ///
891    /// Returns true only if both conditions are met:
892    /// - this configuration does not match `other`
893    /// - to effectively change the configuration, a quantization rebuild is required
894    pub fn mismatch_requires_rebuild(&self, other: &Self) -> bool {
895        self != other
896    }
897
898    pub fn supports_appendable(&self) -> bool {
899        matches!(self, QuantizationConfig::Binary(_))
900    }
901}
902
903impl Validate for QuantizationConfig {
904    fn validate(&self) -> Result<(), ValidationErrors> {
905        match self {
906            QuantizationConfig::Scalar(scalar) => scalar.validate(),
907            QuantizationConfig::Product(product) => product.validate(),
908            QuantizationConfig::Binary(binary) => binary.validate(),
909        }
910    }
911}
912
913#[derive(
914    Default, Debug, Deserialize, Serialize, JsonSchema,  Clone, Copy, PartialEq, Eq, Hash,
915)]
916#[serde(rename_all = "lowercase")]
917
918pub enum BinaryQuantizationQueryEncoding {
919    #[default]
920    Default,
921    Binary,
922    Scalar4Bits,
923    Scalar8Bits,
924}
925
926impl From<ScalarQuantizationConfig> for QuantizationConfig {
927    fn from(config: ScalarQuantizationConfig) -> Self {
928        QuantizationConfig::Scalar(ScalarQuantization { scalar: config })
929    }
930}
931
932impl From<ProductQuantizationConfig> for QuantizationConfig {
933    fn from(config: ProductQuantizationConfig) -> Self {
934        QuantizationConfig::Product(ProductQuantization { product: config })
935    }
936}
937
938impl From<BinaryQuantizationConfig> for QuantizationConfig {
939    fn from(config: BinaryQuantizationConfig) -> Self {
940        QuantizationConfig::Binary(BinaryQuantization { binary: config })
941    }
942}
943
944#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
945pub struct StrictModeSparse {
946    /// Max length of sparse vector
947    #[serde(skip_serializing_if = "Option::is_none")]
948    #[validate(range(min = 1))]
949    pub max_length: Option<usize>,
950}
951
952#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
953#[schemars(deny_unknown_fields)]
954pub struct StrictModeSparseConfig {
955    #[validate(nested)]
956    #[serde(flatten)]
957    pub config: BTreeMap<VectorNameBuf, StrictModeSparse>,
958}
959
960#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Default)]
961#[schemars(deny_unknown_fields)]
962pub struct StrictModeSparseConfigOutput {
963    #[serde(flatten)]
964    pub config: BTreeMap<VectorNameBuf, StrictModeSparseOutput>,
965}
966
967#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Default)]
968pub struct StrictModeSparseOutput {
969    /// Max length of sparse vector
970    #[serde(skip_serializing_if = "Option::is_none")]
971    
972    pub max_length: Option<usize>,
973}
974
975impl From<StrictModeSparseConfig> for StrictModeSparseConfigOutput {
976    fn from(config: StrictModeSparseConfig) -> Self {
977        let StrictModeSparseConfig { config } = config;
978        let mut new_config = StrictModeSparseConfigOutput::default();
979        for (key, value) in config {
980            new_config
981                .config
982                .insert(key, StrictModeSparseOutput::from(value));
983        }
984        new_config
985    }
986}
987
988impl From<StrictModeSparse> for StrictModeSparseOutput {
989    fn from(config: StrictModeSparse) -> Self {
990        let StrictModeSparse { max_length } = config;
991        StrictModeSparseOutput { max_length }
992    }
993}
994
995#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
996pub struct StrictModeMultivector {
997    /// Max number of vectors in a multivector
998    #[serde(skip_serializing_if = "Option::is_none")]
999    #[validate(range(min = 1))]
1000    pub max_vectors: Option<usize>,
1001}
1002
1003#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default, Hash)]
1004#[schemars(deny_unknown_fields)]
1005pub struct StrictModeMultivectorConfig {
1006    #[validate(nested)]
1007    #[serde(flatten)]
1008    pub config: BTreeMap<VectorNameBuf, StrictModeMultivector>,
1009}
1010
1011#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Default)]
1012#[schemars(deny_unknown_fields)]
1013pub struct StrictModeMultivectorConfigOutput {
1014    #[serde(flatten)]
1015    pub config: BTreeMap<VectorNameBuf, StrictModeMultivectorOutput>,
1016}
1017
1018impl From<StrictModeMultivectorConfig> for StrictModeMultivectorConfigOutput {
1019    fn from(config: StrictModeMultivectorConfig) -> Self {
1020        let StrictModeMultivectorConfig { config } = config;
1021        let mut new_config = StrictModeMultivectorConfigOutput::default();
1022        for (key, value) in config {
1023            new_config
1024                .config
1025                .insert(key, StrictModeMultivectorOutput::from(value));
1026        }
1027        new_config
1028    }
1029}
1030
1031#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Default)]
1032pub struct StrictModeMultivectorOutput {
1033    /// Max number of vectors in a multivector
1034    #[serde(skip_serializing_if = "Option::is_none")]
1035    
1036    pub max_vectors: Option<usize>,
1037}
1038
1039impl From<StrictModeMultivector> for StrictModeMultivectorOutput {
1040    fn from(config: StrictModeMultivector) -> Self {
1041        let StrictModeMultivector { max_vectors } = config;
1042        StrictModeMultivectorOutput { max_vectors }
1043    }
1044}
1045
1046#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Default)]
1047pub struct StrictModeConfig {
1048    // Global
1049    /// Whether strict mode is enabled for a collection or not.
1050    #[serde(skip_serializing_if = "Option::is_none")]
1051    pub enabled: Option<bool>,
1052
1053    /// Max allowed `limit` parameter for all APIs that don't have their own max limit.
1054    #[serde(skip_serializing_if = "Option::is_none")]
1055    #[validate(range(min = 1))]
1056    pub max_query_limit: Option<usize>,
1057
1058    /// Max allowed `timeout` parameter.
1059    #[serde(skip_serializing_if = "Option::is_none")]
1060    #[validate(range(min = 1))]
1061    pub max_timeout: Option<usize>,
1062
1063    /// Allow usage of unindexed fields in retrieval based (e.g. search) filters.
1064    #[serde(skip_serializing_if = "Option::is_none")]
1065    pub unindexed_filtering_retrieve: Option<bool>,
1066
1067    /// Allow usage of unindexed fields in filtered updates (e.g. delete by payload).
1068    #[serde(skip_serializing_if = "Option::is_none")]
1069    pub unindexed_filtering_update: Option<bool>,
1070
1071    // Search
1072    /// Max HNSW ef value allowed in search parameters.
1073    #[serde(skip_serializing_if = "Option::is_none")]
1074    pub search_max_hnsw_ef: Option<usize>,
1075
1076    /// Whether exact search is allowed.
1077    #[serde(skip_serializing_if = "Option::is_none")]
1078    pub search_allow_exact: Option<bool>,
1079
1080    /// Max oversampling value allowed in search.
1081    #[serde(skip_serializing_if = "Option::is_none")]
1082    pub search_max_oversampling: Option<f64>,
1083
1084    /// Max batchsize when upserting
1085    #[serde(skip_serializing_if = "Option::is_none")]
1086    pub upsert_max_batchsize: Option<usize>,
1087
1088    /// Max batchsize when searching
1089    #[serde(skip_serializing_if = "Option::is_none")]
1090    pub search_max_batchsize: Option<usize>,
1091
1092    /// Max size of a collections vector storage in bytes, ignoring replicas.
1093    #[serde(skip_serializing_if = "Option::is_none")]
1094    pub max_collection_vector_size_bytes: Option<usize>,
1095
1096    /// Max number of read operations per minute per replica
1097    #[serde(skip_serializing_if = "Option::is_none")]
1098    #[validate(range(min = 1))]
1099    pub read_rate_limit: Option<usize>,
1100
1101    /// Max number of write operations per minute per replica
1102    #[serde(skip_serializing_if = "Option::is_none")]
1103    #[validate(range(min = 1))]
1104    pub write_rate_limit: Option<usize>,
1105
1106    /// Max size of a collections payload storage in bytes
1107    #[serde(skip_serializing_if = "Option::is_none")]
1108    pub max_collection_payload_size_bytes: Option<usize>,
1109
1110    /// Max number of points estimated in a collection
1111    #[serde(skip_serializing_if = "Option::is_none")]
1112    #[validate(range(min = 1))]
1113    pub max_points_count: Option<usize>,
1114
1115    /// Max conditions a filter can have.
1116    #[serde(skip_serializing_if = "Option::is_none")]
1117    pub filter_max_conditions: Option<usize>,
1118
1119    /// Max size of a condition, eg. items in `MatchAny`.
1120    #[serde(skip_serializing_if = "Option::is_none")]
1121    pub condition_max_size: Option<usize>,
1122
1123    /// Multivector strict mode configuration
1124    #[serde(skip_serializing_if = "Option::is_none")]
1125    #[validate(nested)]
1126    pub multivector_config: Option<StrictModeMultivectorConfig>,
1127
1128    /// Sparse vector strict mode configuration
1129    #[serde(skip_serializing_if = "Option::is_none")]
1130    #[validate(nested)]
1131    pub sparse_config: Option<StrictModeSparseConfig>,
1132
1133    /// Max number of payload indexes in a collection
1134    #[serde(skip_serializing_if = "Option::is_none")]
1135    #[validate(range(min = 0))]
1136    pub max_payload_index_count: Option<usize>,
1137}
1138
1139impl Eq for StrictModeConfig {}
1140
1141impl Hash for StrictModeConfig {
1142    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1143        let Self {
1144            enabled,
1145            max_query_limit,
1146            max_timeout,
1147            unindexed_filtering_retrieve,
1148            unindexed_filtering_update,
1149            search_max_hnsw_ef,
1150            search_allow_exact,
1151            // We skip hashing this field because we cannot reliably hash a float
1152            search_max_oversampling: _,
1153            upsert_max_batchsize,
1154            search_max_batchsize,
1155            max_collection_vector_size_bytes,
1156            read_rate_limit,
1157            write_rate_limit,
1158            max_collection_payload_size_bytes,
1159            max_points_count,
1160            filter_max_conditions,
1161            condition_max_size,
1162            multivector_config,
1163            sparse_config,
1164            max_payload_index_count,
1165        } = self;
1166        enabled.hash(state);
1167        max_query_limit.hash(state);
1168        max_timeout.hash(state);
1169        unindexed_filtering_retrieve.hash(state);
1170        unindexed_filtering_update.hash(state);
1171        search_max_hnsw_ef.hash(state);
1172        search_allow_exact.hash(state);
1173        upsert_max_batchsize.hash(state);
1174        search_max_batchsize.hash(state);
1175        max_collection_vector_size_bytes.hash(state);
1176        read_rate_limit.hash(state);
1177        write_rate_limit.hash(state);
1178        max_collection_payload_size_bytes.hash(state);
1179        max_points_count.hash(state);
1180        filter_max_conditions.hash(state);
1181        condition_max_size.hash(state);
1182        multivector_config.hash(state);
1183        sparse_config.hash(state);
1184        max_payload_index_count.hash(state);
1185    }
1186}
1187
1188// Version of the strict mode config we can present to the user
1189#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Default)]
1190pub struct StrictModeConfigOutput {
1191    // Global
1192    /// Whether strict mode is enabled for a collection or not.
1193    #[serde(skip_serializing_if = "Option::is_none")]
1194    pub enabled: Option<bool>,
1195
1196    /// Max allowed `limit` parameter for all APIs that don't have their own max limit.
1197    #[serde(skip_serializing_if = "Option::is_none")]
1198    #[validate(range(min = 1))]
1199    
1200    pub max_query_limit: Option<usize>,
1201
1202    /// Max allowed `timeout` parameter.
1203    #[serde(skip_serializing_if = "Option::is_none")]
1204    #[validate(range(min = 1))]
1205    
1206    pub max_timeout: Option<usize>,
1207
1208    /// Allow usage of unindexed fields in retrieval based (e.g. search) filters.
1209    #[serde(skip_serializing_if = "Option::is_none")]
1210    pub unindexed_filtering_retrieve: Option<bool>,
1211
1212    /// Allow usage of unindexed fields in filtered updates (e.g. delete by payload).
1213    #[serde(skip_serializing_if = "Option::is_none")]
1214    pub unindexed_filtering_update: Option<bool>,
1215
1216    // Search
1217    /// Max HNSW value allowed in search parameters.
1218    #[serde(skip_serializing_if = "Option::is_none")]
1219    
1220    pub search_max_hnsw_ef: Option<usize>,
1221
1222    /// Whether exact search is allowed or not.
1223    #[serde(skip_serializing_if = "Option::is_none")]
1224    pub search_allow_exact: Option<bool>,
1225
1226    /// Max oversampling value allowed in search.
1227    #[serde(skip_serializing_if = "Option::is_none")]
1228    
1229    pub search_max_oversampling: Option<f64>,
1230
1231    /// Max batchsize when upserting
1232    #[serde(skip_serializing_if = "Option::is_none")]
1233    
1234    pub upsert_max_batchsize: Option<usize>,
1235    /// Max batchsize when searching
1236    #[serde(skip_serializing_if = "Option::is_none")]
1237    
1238    pub search_max_batchsize: Option<usize>,
1239
1240    /// Max size of a collections vector storage in bytes, ignoring replicas.
1241    #[serde(skip_serializing_if = "Option::is_none")]
1242    
1243    pub max_collection_vector_size_bytes: Option<usize>,
1244
1245    /// Max number of read operations per minute per replica
1246    #[serde(skip_serializing_if = "Option::is_none")]
1247    
1248    pub read_rate_limit: Option<usize>,
1249
1250    /// Max number of write operations per minute per replica
1251    #[serde(skip_serializing_if = "Option::is_none")]
1252    
1253    pub write_rate_limit: Option<usize>,
1254
1255    /// Max size of a collections payload storage in bytes
1256    #[serde(skip_serializing_if = "Option::is_none")]
1257    
1258    pub max_collection_payload_size_bytes: Option<usize>,
1259
1260    /// Max number of points estimated in a collection
1261    #[serde(skip_serializing_if = "Option::is_none")]
1262    
1263    pub max_points_count: Option<usize>,
1264
1265    /// Max conditions a filter can have.
1266    #[serde(skip_serializing_if = "Option::is_none")]
1267    
1268    pub filter_max_conditions: Option<usize>,
1269
1270    /// Max size of a condition, eg. items in `MatchAny`.
1271    #[serde(skip_serializing_if = "Option::is_none")]
1272    
1273    pub condition_max_size: Option<usize>,
1274
1275    /// Multivector configuration
1276    #[serde(skip_serializing_if = "Option::is_none")]
1277    pub multivector_config: Option<StrictModeMultivectorConfigOutput>,
1278
1279    /// Sparse vector configuration
1280    #[serde(skip_serializing_if = "Option::is_none")]
1281    pub sparse_config: Option<StrictModeSparseConfigOutput>,
1282
1283    /// Max number of payload indexes in a collection
1284    #[serde(skip_serializing_if = "Option::is_none")]
1285    #[validate(range(min = 0))]
1286    pub max_payload_index_count: Option<usize>,
1287}
1288
1289impl From<StrictModeConfig> for StrictModeConfigOutput {
1290    fn from(config: StrictModeConfig) -> Self {
1291        let StrictModeConfig {
1292            enabled,
1293            max_query_limit,
1294            max_timeout,
1295            unindexed_filtering_retrieve,
1296            unindexed_filtering_update,
1297            search_max_hnsw_ef,
1298            search_allow_exact,
1299            search_max_oversampling,
1300            upsert_max_batchsize,
1301            search_max_batchsize,
1302            max_collection_vector_size_bytes,
1303            read_rate_limit,
1304            write_rate_limit,
1305            max_collection_payload_size_bytes,
1306            max_points_count,
1307            filter_max_conditions,
1308            condition_max_size,
1309            multivector_config,
1310            sparse_config,
1311            max_payload_index_count,
1312        } = config;
1313
1314        Self {
1315            enabled,
1316            max_query_limit,
1317            max_timeout,
1318            unindexed_filtering_retrieve,
1319            unindexed_filtering_update,
1320            search_max_hnsw_ef,
1321            search_allow_exact,
1322            search_max_oversampling,
1323            upsert_max_batchsize,
1324            search_max_batchsize,
1325            max_collection_vector_size_bytes,
1326            read_rate_limit,
1327            write_rate_limit,
1328            max_collection_payload_size_bytes,
1329            max_points_count,
1330            filter_max_conditions,
1331            condition_max_size,
1332            multivector_config: multivector_config.map(StrictModeMultivectorConfigOutput::from),
1333            sparse_config: sparse_config.map(StrictModeSparseConfigOutput::from),
1334            max_payload_index_count,
1335        }
1336    }
1337}
1338
1339pub const DEFAULT_HNSW_EF_CONSTRUCT: usize = 100;
1340
1341impl Default for HnswConfig {
1342    fn default() -> Self {
1343        HnswConfig {
1344            m: 16,
1345            ef_construct: DEFAULT_HNSW_EF_CONSTRUCT,
1346            full_scan_threshold: DEFAULT_FULL_SCAN_THRESHOLD,
1347            max_indexing_threads: 0,
1348            on_disk: Some(false),
1349            payload_m: None,
1350            inline_storage: None,
1351        }
1352    }
1353}
1354
1355impl Default for Indexes {
1356    fn default() -> Self {
1357        Indexes::Plain {}
1358    }
1359}
1360
1361/// Type of payload storage
1362#[derive( Debug, Deserialize, Serialize, JsonSchema, Copy, Clone, PartialEq, Eq)]
1363#[serde(tag = "type", content = "options", rename_all = "snake_case")]
1364pub enum PayloadStorageType {
1365    // Store payload in memory and use persistence storage only if vectors are changed
1366    #[cfg(feature = "rocksdb")]
1367    InMemory,
1368    // Store payload on disk only, read each time it is requested
1369    #[cfg(feature = "rocksdb")]
1370    OnDisk,
1371    // Store payload on disk and in memory, read from memory if possible
1372    Mmap,
1373    // Store payload on disk and in memory, populate on load
1374    InRamMmap,
1375}
1376
1377#[cfg(any(test, feature = "testing"))]
1378#[allow(clippy::derivable_impls)]
1379impl Default for PayloadStorageType {
1380    fn default() -> Self {
1381        PayloadStorageType::Mmap
1382    }
1383}
1384
1385impl PayloadStorageType {
1386    /// Convert user-facing `on_disk_payload` (true = store on disk) to storage type.
1387    /// Returns `Mmap` or `InRamMmap`; for RocksDB-backed variants use collection config.
1388    pub fn from_on_disk_payload(on_disk: bool) -> Self {
1389        if on_disk { Self::Mmap } else { Self::InRamMmap }
1390    }
1391
1392    pub fn is_on_disk(&self) -> bool {
1393        match self {
1394            #[cfg(feature = "rocksdb")]
1395            PayloadStorageType::InMemory => false,
1396            #[cfg(feature = "rocksdb")]
1397            PayloadStorageType::OnDisk => true,
1398            PayloadStorageType::Mmap => true,
1399            PayloadStorageType::InRamMmap => false,
1400        }
1401    }
1402}
1403
1404#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, )]
1405#[serde(rename_all = "snake_case")]
1406pub struct SegmentConfig {
1407    #[serde(default)]
1408    pub vector_data: HashMap<VectorNameBuf, VectorDataConfig>,
1409    #[serde(default)]
1410    #[serde(skip_serializing_if = "HashMap::is_empty")]
1411    pub sparse_vector_data: HashMap<VectorNameBuf, SparseVectorDataConfig>,
1412    /// Defines payload storage type
1413    pub payload_storage_type: PayloadStorageType,
1414}
1415
1416impl SegmentConfig {
1417    /// Helper to get vector specific quantization config.
1418    ///
1419    /// This grabs the quantization config for the given vector name if it exists.
1420    ///
1421    /// If no quantization is configured, `None` is returned.
1422    pub fn quantization_config(&self, vector_name: &VectorName) -> Option<&QuantizationConfig> {
1423        self.vector_data
1424            .get(vector_name)
1425            .and_then(|v| v.quantization_config.as_ref())
1426    }
1427
1428    /// Check if any vector storages are indexed
1429    pub fn is_any_vector_indexed(&self) -> bool {
1430        self.vector_data
1431            .values()
1432            .any(|config| config.index.is_indexed())
1433            || self
1434                .sparse_vector_data
1435                .values()
1436                .any(|config| config.is_indexed())
1437    }
1438
1439    /// Check if all vector storages are indexed
1440    pub fn are_all_vectors_indexed(&self) -> bool {
1441        self.vector_data
1442            .values()
1443            .all(|config| config.index.is_indexed())
1444            && self
1445                .sparse_vector_data
1446                .values()
1447                .all(|config| config.is_indexed())
1448    }
1449
1450    /// Check if any vector storage is on-disk
1451    pub fn is_any_on_disk(&self) -> bool {
1452        self.vector_data
1453            .values()
1454            .any(|config| config.storage_type.is_on_disk())
1455            || self
1456                .sparse_vector_data
1457                .values()
1458                .any(|config| config.index.index_type.is_on_disk())
1459    }
1460
1461    pub fn is_appendable(&self) -> bool {
1462        self.vector_data
1463            .values()
1464            .map(|vector_config| vector_config.is_appendable())
1465            .chain(
1466                self.sparse_vector_data
1467                    .values()
1468                    .map(|sparse_vector_config| {
1469                        sparse_vector_config.index.index_type.is_appendable()
1470                    }),
1471            )
1472            .all(|v| v)
1473    }
1474
1475    pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1476        // Vector data have to be compatible between two segments.
1477        // Sparse vector data can be different, but a placeholder check is implemented to catch
1478        // and enforce compatibility check for future changes.
1479        // Payload storage type can be different.
1480
1481        // Assert segment config fields
1482        let Self {
1483            vector_data: _,
1484            sparse_vector_data: _,
1485            payload_storage_type: _,
1486        } = self;
1487
1488        check_vectors_map_compatible(
1489            &self.vector_data,
1490            &other.vector_data,
1491            VectorDataConfig::check_compatible,
1492        )?;
1493
1494        check_vectors_map_compatible(
1495            &self.sparse_vector_data,
1496            &other.sparse_vector_data,
1497            SparseVectorDataConfig::check_compatible,
1498        )?;
1499
1500        Ok(())
1501    }
1502}
1503
1504fn check_vectors_map_compatible<C, F>(
1505    this: &HashMap<String, C>,
1506    other: &HashMap<String, C>,
1507    check: F,
1508) -> Result<(), String>
1509where
1510    F: Fn(&C, &C) -> Result<(), String>,
1511{
1512    if this.len() != other.len() {
1513        let expected_keys: Vec<String> = this.keys().map(|k| format!("{k:?}")).collect();
1514        let actual_keys: Vec<String> = other.keys().map(|k| format!("{k:?}")).collect();
1515        return Err(format!(
1516            "Incompatible configs: expected vector storages with keys {expected_keys:?}, but got {actual_keys:?}"
1517        ));
1518    }
1519
1520    for (vector_name, config) in this {
1521        let Some(other_config) = other.get(vector_name) else {
1522            return Err(format!(
1523                "Incompatible configs: expected vector storage with key {vector_name:?} not found in other config"
1524            ));
1525        };
1526
1527        check(config, other_config)
1528            .map_err(|err| format!("Incompatible config for vector {vector_name:?}: {err}"))?;
1529    }
1530
1531    Ok(())
1532}
1533
1534/// Storage types for vectors
1535#[derive(Debug, Deserialize, Serialize, JsonSchema,  Eq, PartialEq, Copy, Clone)]
1536pub enum VectorStorageType {
1537    /// Storage in memory (RAM)
1538    ///
1539    /// Will be very fast at the cost of consuming a lot of memory.
1540    Memory,
1541    /// Storage in mmap file, not appendable
1542    ///
1543    /// Search performance is defined by disk speed and the fraction of vectors that fit in memory.
1544    Mmap,
1545    /// Storage in chunked mmap files, appendable
1546    ///
1547    /// Search performance is defined by disk speed and the fraction of vectors that fit in memory.
1548    ChunkedMmap,
1549    /// Same as `ChunkedMmap`, but vectors are forced to be locked in RAM
1550    /// In this way we avoid cold requests to disk, but risk to run out of memory
1551    ///
1552    /// Designed as a replacement for `Memory`, which doesn't depend on RocksDB
1553    InRamChunkedMmap,
1554    /// Storage in a single mmap file, not appendable
1555    /// Pre-fetched into RAM on load
1556    InRamMmap,
1557}
1558
1559#[cfg(any(test, feature = "testing"))]
1560#[allow(clippy::derivable_impls)]
1561impl Default for VectorStorageType {
1562    fn default() -> Self {
1563        VectorStorageType::InRamChunkedMmap
1564    }
1565}
1566
1567/// Storage types for vectors
1568#[derive(
1569    Default, Debug, Deserialize, Serialize, JsonSchema,  Eq, PartialEq, Copy, Clone,
1570)]
1571#[serde(rename_all = "snake_case")]
1572pub enum VectorStorageDatatype {
1573    // Single-precision floating point
1574    #[default]
1575    Float32,
1576    // Half-precision floating point
1577    Float16,
1578    // Unsigned 8-bit integer
1579    Uint8,
1580}
1581
1582#[derive(
1583    Debug, Default, Deserialize, Serialize, JsonSchema,  Eq, PartialEq, Copy, Clone, Hash,
1584)]
1585#[serde(rename_all = "snake_case")]
1586pub struct MultiVectorConfig {
1587    /// How to compare multivector points
1588    pub comparator: MultiVectorComparator,
1589}
1590
1591impl MultiVectorConfig {
1592    fn check_compatible(&self, other: &Self) -> Result<(), String> {
1593        // Assert multi-vector config fields
1594        let Self { comparator } = self;
1595
1596        if *comparator != other.comparator {
1597            return Err(format!(
1598                "Incompatible configs: expected multi-vector comparator {comparator:?}, but got {other_comparator:?}",
1599                other_comparator = other.comparator
1600            ));
1601        }
1602
1603        Ok(())
1604    }
1605}
1606
1607#[derive(
1608    Debug, Default, Deserialize, Serialize, JsonSchema,  Eq, PartialEq, Copy, Clone, Hash,
1609)]
1610#[serde(rename_all = "snake_case")]
1611pub enum MultiVectorComparator {
1612    #[default]
1613    MaxSim,
1614}
1615
1616impl VectorStorageType {
1617    /// Convert user-facing `on_disk` (true = store on disk) to appendable vector storage type.
1618    /// Returns `ChunkedMmap` or `InRamChunkedMmap`.
1619    pub fn from_on_disk(on_disk: bool) -> Self {
1620        if on_disk {
1621            Self::ChunkedMmap
1622        } else {
1623            Self::InRamChunkedMmap
1624        }
1625    }
1626
1627    /// Whether this storage type is a mmap on disk
1628    pub fn is_on_disk(&self) -> bool {
1629        match self {
1630            Self::Memory | Self::InRamChunkedMmap | Self::InRamMmap => false,
1631            Self::Mmap | Self::ChunkedMmap => true,
1632        }
1633    }
1634}
1635
1636/// Config of single vector data storage
1637#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, )]
1638#[serde(rename_all = "snake_case")]
1639pub struct VectorDataConfig {
1640    /// Size/dimensionality of the vectors used
1641    pub size: usize,
1642    /// Type of distance function used for measuring distance between vectors
1643    pub distance: Distance,
1644    /// Type of storage this vector uses
1645    pub storage_type: VectorStorageType,
1646    /// Type of index used for search
1647    pub index: Indexes,
1648    /// Vector specific quantization config that overrides collection config
1649    pub quantization_config: Option<QuantizationConfig>,
1650    /// Vector specific configuration to enable multiple vectors per point
1651    #[serde(default, skip_serializing_if = "Option::is_none")]
1652    pub multivector_config: Option<MultiVectorConfig>,
1653    /// Vector specific configuration to set specific storage element type
1654    #[serde(default, skip_serializing_if = "Option::is_none")]
1655    pub datatype: Option<VectorStorageDatatype>,
1656}
1657
1658impl VectorDataConfig {
1659    /// Whether this vector data can be appended to
1660    ///
1661    /// This requires an index and storage type that both support appending.
1662    pub fn is_appendable(&self) -> bool {
1663        let is_index_appendable = match self.index {
1664            Indexes::Plain {} => true,
1665            Indexes::Hnsw(_) => false,
1666        };
1667        let is_storage_appendable = match self.storage_type {
1668            VectorStorageType::Memory => true,
1669            VectorStorageType::Mmap => false,
1670            VectorStorageType::ChunkedMmap => true,
1671            VectorStorageType::InRamChunkedMmap => true,
1672            VectorStorageType::InRamMmap => false,
1673        };
1674        is_index_appendable && is_storage_appendable
1675    }
1676
1677    pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1678        // Size and distance have to be the same for both segments.
1679        // Storage type, index and quantization config can be different.
1680        //
1681        // Assert vector data config fields
1682        let Self {
1683            size,
1684            distance,
1685            storage_type: _,
1686            index: _,
1687            quantization_config: _,
1688            multivector_config,
1689            datatype,
1690        } = self;
1691
1692        if *size != other.size {
1693            return Err(format!(
1694                "Incompatible configs: expected vector size {size}, but got {other_size}",
1695                other_size = other.size
1696            ));
1697        }
1698
1699        if *distance != other.distance {
1700            return Err(format!(
1701                "Incompatible configs: expected distance {distance:?}, but got {other_distance:?}",
1702                other_distance = other.distance
1703            ));
1704        }
1705
1706        let left_datatype = datatype.unwrap_or(VectorStorageDatatype::Float32);
1707        let right_datatype = other.datatype.unwrap_or(VectorStorageDatatype::Float32);
1708        if left_datatype != right_datatype {
1709            return Err(format!(
1710                "Incompatible configs: expected vector storage datatype {left_datatype:?}, but got {right_datatype:?}",
1711            ));
1712        }
1713
1714        match (multivector_config, &other.multivector_config) {
1715            (None, None) => {}
1716            (Some(this), Some(other)) => {
1717                MultiVectorConfig::check_compatible(this, other)?;
1718            }
1719            _ => {
1720                return Err(format!(
1721                    "Incompatible configs: expected multivector config {this_multivector_config:?}, but got {other_multivector_config:?}",
1722                    this_multivector_config = multivector_config,
1723                    other_multivector_config = other.multivector_config
1724                ));
1725            }
1726        }
1727        Ok(())
1728    }
1729}
1730
1731#[derive(
1732    Copy, Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize, JsonSchema, 
1733)]
1734#[serde(rename_all = "snake_case")]
1735pub enum SparseVectorStorageType {
1736    /// Storage on disk (rocksdb storage)
1737    #[cfg(feature = "rocksdb")]
1738    OnDisk,
1739    /// Storage in memory maps (gridstore storage)
1740    #[default]
1741    Mmap,
1742}
1743
1744impl SparseVectorStorageType {
1745    /// Whether this storage type is a mmap on disk
1746    pub fn is_on_disk(&self) -> bool {
1747        match self {
1748            // Both options are on disk, but we keep it explicit for the case if someone adds a new
1749            // storage type in the future
1750            #[cfg(feature = "rocksdb")]
1751            Self::OnDisk => true,
1752            Self::Mmap => true,
1753        }
1754    }
1755}
1756
1757/// Config of single sparse vector data storage
1758#[derive(
1759    Copy, Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema, Validate, 
1760)]
1761#[serde(rename_all = "snake_case")]
1762pub struct SparseVectorDataConfig {
1763    /// Sparse inverted index config
1764    pub index: SparseIndexConfig,
1765
1766    /// Type of storage this sparse vector uses
1767    #[serde(default = "default_sparse_vector_storage_type_when_not_in_config")]
1768    pub storage_type: SparseVectorStorageType,
1769
1770    /// Configures addition value modifications for sparse vectors.
1771    /// Default: none
1772    #[serde(default, skip_serializing_if = "Option::is_none")]
1773    pub modifier: Option<Modifier>,
1774}
1775
1776/// If the storage type is not in config, it means it is the OnDisk variant
1777fn default_sparse_vector_storage_type_when_not_in_config() -> SparseVectorStorageType {
1778    #[cfg(feature = "rocksdb")]
1779    {
1780        SparseVectorStorageType::OnDisk
1781    }
1782    #[cfg(not(feature = "rocksdb"))]
1783    {
1784        SparseVectorStorageType::default()
1785    }
1786}
1787
1788impl SparseVectorDataConfig {
1789    pub fn is_indexed(&self) -> bool {
1790        true
1791    }
1792
1793    pub fn check_compatible(&self, other: &Self) -> Result<(), String> {
1794        // Both index and storage type can be different for two segments to be compatible
1795
1796        // Assert sparse vector config fields
1797        let Self {
1798            index: _,
1799            storage_type: _,
1800            modifier,
1801        } = self;
1802
1803        if modifier != &other.modifier {
1804            return Err(format!(
1805                "Incompatible configs: expected sparse vector modifier {modifier:?}, but got {other_modifier:?}",
1806                other_modifier = other.modifier
1807            ));
1808        }
1809
1810        Ok(())
1811    }
1812}
1813
1814/// Default value based on experiments and observations
1815pub const DEFAULT_FULL_SCAN_THRESHOLD: usize = 10_000;
1816
1817pub const DEFAULT_SPARSE_FULL_SCAN_THRESHOLD: usize = 5_000;
1818
1819/// Persistable state of segment configuration
1820#[derive(Debug, Deserialize, Serialize, Clone)]
1821#[serde(rename_all = "snake_case")]
1822pub struct SegmentState {
1823    #[serde(default)]
1824    pub initial_version: Option<SeqNumberType>,
1825    pub version: Option<SeqNumberType>,
1826    pub config: SegmentConfig,
1827}
1828
1829pub type RawGeoPoint = (f64, f64);
1830
1831/// Geo point payload schema
1832#[derive(
1833    Debug,
1834    Deserialize,
1835    Serialize,
1836    JsonSchema,
1837    Clone,
1838    Copy,
1839    PartialEq,
1840    Eq,
1841    Hash,
1842    Default,
1843    PartialOrd,
1844    Ord,
1845    Pod,
1846    Zeroable,
1847)]
1848#[serde(try_from = "GeoPointShadow")]
1849#[repr(C)]
1850pub struct GeoPoint {
1851    pub lon: OrderedFloat<f64>,
1852    pub lat: OrderedFloat<f64>,
1853}
1854
1855/// Ordered sequence of GeoPoints representing the line
1856#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
1857pub struct GeoLineString {
1858    pub points: Vec<GeoPoint>,
1859}
1860
1861#[derive(Deserialize)]
1862struct GeoPointShadow {
1863    pub lon: f64,
1864    pub lat: f64,
1865}
1866
1867#[derive(Debug)]
1868pub struct GeoPointValidationError {
1869    pub lon: f64,
1870    pub lat: f64,
1871}
1872
1873// The error type has to implement Display
1874impl std::fmt::Display for GeoPointValidationError {
1875    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1876        write!(
1877            formatter,
1878            "Wrong format of GeoPoint payload: expected `lat` = {} within [-90;90] and `lon` = {} within [-180;180]",
1879            self.lat, self.lon,
1880        )
1881    }
1882}
1883
1884impl GeoPoint {
1885    pub fn validate(lon: f64, lat: f64) -> Result<(), GeoPointValidationError> {
1886        let max_lon = 180f64;
1887        let min_lon = -180f64;
1888        let max_lat = 90f64;
1889        let min_lat = -90f64;
1890
1891        if !(min_lon..=max_lon).contains(&lon) || !(min_lat..=max_lat).contains(&lat) {
1892            return Err(GeoPointValidationError { lon, lat });
1893        }
1894        Ok(())
1895    }
1896
1897    pub fn new(lon: f64, lat: f64) -> Result<Self, GeoPointValidationError> {
1898        Self::validate(lon, lat)?;
1899        Ok(Self::new_unchecked(lon, lat))
1900    }
1901
1902    pub const fn new_unchecked(lon: f64, lat: f64) -> Self {
1903        GeoPoint {
1904            lon: OrderedFloat(lon),
1905            lat: OrderedFloat(lat),
1906        }
1907    }
1908}
1909
1910impl TryFrom<GeoPointShadow> for GeoPoint {
1911    type Error = GeoPointValidationError;
1912
1913    fn try_from(value: GeoPointShadow) -> Result<Self, Self::Error> {
1914        let GeoPointShadow { lon, lat } = value;
1915        GeoPoint::validate(lon, lat)?;
1916
1917        Ok(Self::new_unchecked(lon, lat))
1918    }
1919}
1920
1921impl From<GeoPoint> for geo::Point {
1922    fn from(
1923        GeoPoint {
1924            lon: OrderedFloat(lon),
1925            lat: OrderedFloat(lat),
1926        }: GeoPoint,
1927    ) -> Self {
1928        Self::new(lon, lat)
1929    }
1930}
1931
1932impl From<RawGeoPoint> for GeoPoint {
1933    fn from((lon, lat): RawGeoPoint) -> Self {
1934        GeoPoint::new(lon, lat).expect("invalid GeoPoint coordinates")
1935    }
1936}
1937
1938impl From<GeoPoint> for RawGeoPoint {
1939    fn from(geo_point: GeoPoint) -> Self {
1940        (geo_point.lon.0, geo_point.lat.0)
1941    }
1942}
1943
1944pub trait PayloadContainer {
1945    /// Return value from payload by path.
1946    /// If value is not present in the payload, returns empty vector.
1947    fn get_value(&self, path: &JsonPath) -> MultiValue<&Value>;
1948
1949    fn get_value_cloned(&self, path: &JsonPath) -> MultiValue<Value> {
1950        self.get_value(path).into_iter().cloned().collect()
1951    }
1952}
1953
1954/// Construct a [`Payload`] value from a JSON literal.
1955///
1956/// Similar to [`serde_json::json!`] but only allows objects (aka maps).
1957macro_rules! payload_json {
1958    ($($tt:tt)*) => {
1959        match ::serde_json::json!( { $($tt)* } ) {
1960            ::serde_json::Value::Object(map) => $crate::segment::types::Payload(map),
1961            _ => unreachable!(),
1962        }
1963    };
1964}
1965
1966#[allow(clippy::unnecessary_wraps)] // Used as schemars example
1967fn payload_example() -> Option<Payload> {
1968    Some(payload_json! {
1969        "city": "London",
1970        "color": "green",
1971    })
1972}
1973
1974#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, Hash)]
1975#[schemars(example = "payload_example")]
1976pub struct Payload(pub Map<String, Value>);
1977
1978impl Payload {
1979    pub fn merge(&mut self, value: &Payload) {
1980        utils::merge_map(&mut self.0, &value.0)
1981    }
1982
1983    pub fn merge_by_key(&mut self, value: &Payload, key: &JsonPath) {
1984        JsonPath::value_set(Some(key), &mut self.0, &value.0);
1985    }
1986
1987    pub fn remove(&mut self, path: &JsonPath) -> Vec<Value> {
1988        path.value_remove(&mut self.0).to_vec()
1989    }
1990
1991    pub fn len(&self) -> usize {
1992        self.0.len()
1993    }
1994
1995    pub fn is_empty(&self) -> bool {
1996        self.0.is_empty()
1997    }
1998
1999    pub fn contains_key(&self, key: &str) -> bool {
2000        self.0.contains_key(key)
2001    }
2002
2003    pub fn keys(&self) -> impl Iterator<Item = &String> {
2004        self.0.keys()
2005    }
2006}
2007
2008impl PayloadContainer for Map<String, Value> {
2009    fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2010        path.value_get(self)
2011    }
2012}
2013
2014impl PayloadContainer for Payload {
2015    fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2016        path.value_get(&self.0)
2017    }
2018}
2019
2020impl PayloadContainer for OwnedPayloadRef<'_> {
2021    fn get_value(&self, path: &JsonPath) -> MultiValue<&Value> {
2022        path.value_get(self.as_ref())
2023    }
2024}
2025
2026impl Default for Payload {
2027    fn default() -> Self {
2028        Payload(Map::new())
2029    }
2030}
2031
2032impl IntoIterator for Payload {
2033    type Item = (String, Value);
2034    type IntoIter = serde_json::map::IntoIter;
2035
2036    fn into_iter(self) -> serde_json::map::IntoIter {
2037        self.0.into_iter()
2038    }
2039}
2040
2041impl From<Map<String, Value>> for Payload {
2042    fn from(value: serde_json::Map<String, Value>) -> Self {
2043        Payload(value)
2044    }
2045}
2046
2047#[derive(Clone, Debug)]
2048pub enum OwnedPayloadRef<'a> {
2049    Ref(&'a Map<String, Value>),
2050    Owned(Rc<Map<String, Value>>),
2051}
2052
2053impl Deref for OwnedPayloadRef<'_> {
2054    type Target = Map<String, Value>;
2055
2056    fn deref(&self) -> &Self::Target {
2057        match self {
2058            OwnedPayloadRef::Ref(reference) => reference,
2059            OwnedPayloadRef::Owned(owned) => owned.deref(),
2060        }
2061    }
2062}
2063
2064impl AsRef<Map<String, Value>> for OwnedPayloadRef<'_> {
2065    fn as_ref(&self) -> &Map<String, Value> {
2066        match self {
2067            OwnedPayloadRef::Ref(reference) => reference,
2068            OwnedPayloadRef::Owned(owned) => owned.deref(),
2069        }
2070    }
2071}
2072
2073impl From<Payload> for OwnedPayloadRef<'_> {
2074    fn from(payload: Payload) -> Self {
2075        OwnedPayloadRef::Owned(Rc::new(payload.0))
2076    }
2077}
2078
2079impl From<Map<String, Value>> for OwnedPayloadRef<'_> {
2080    fn from(payload: Map<String, Value>) -> Self {
2081        OwnedPayloadRef::Owned(Rc::new(payload))
2082    }
2083}
2084
2085impl<'a> From<&'a Payload> for OwnedPayloadRef<'a> {
2086    fn from(payload: &'a Payload) -> Self {
2087        OwnedPayloadRef::Ref(&payload.0)
2088    }
2089}
2090
2091impl<'a> From<&'a Map<String, Value>> for OwnedPayloadRef<'a> {
2092    fn from(payload: &'a Map<String, Value>) -> Self {
2093        OwnedPayloadRef::Ref(payload)
2094    }
2095}
2096
2097/// Payload interface structure which ensures that user is allowed to pass payload in
2098/// both - array and single element forms.
2099///
2100/// Example:
2101///
2102/// Both versions should work:
2103/// ```json
2104/// {..., "payload": {"city": {"type": "keyword", "value": ["Berlin", "London"] }}},
2105/// {..., "payload": {"city": {"type": "keyword", "value": "Moscow" }}},
2106/// ```
2107#[derive(Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Clone)]
2108#[serde(untagged, rename_all = "snake_case")]
2109pub enum PayloadVariant<T> {
2110    List(Vec<T>),
2111    Value(T),
2112}
2113
2114/// All possible names of payload types
2115#[derive(
2116    Debug, Deserialize, Serialize, JsonSchema,  Clone, Copy, PartialEq, Hash, Eq, EnumIter,
2117)]
2118#[serde(rename_all = "snake_case")]
2119pub enum PayloadSchemaType {
2120    Keyword,
2121    Integer,
2122    Float,
2123    Geo,
2124    Text,
2125    Bool,
2126    Datetime,
2127    Uuid,
2128}
2129
2130impl PayloadSchemaType {
2131    /// Human-readable type name
2132    pub fn name(&self) -> &'static str {
2133        serde_variant::to_variant_name(&self).unwrap_or("unknown")
2134    }
2135
2136    pub fn expand(&self) -> PayloadSchemaParams {
2137        match self {
2138            Self::Keyword => PayloadSchemaParams::Keyword(KeywordIndexParams::default()),
2139            Self::Integer => PayloadSchemaParams::Integer(IntegerIndexParams::default()),
2140            Self::Float => PayloadSchemaParams::Float(FloatIndexParams::default()),
2141            Self::Geo => PayloadSchemaParams::Geo(GeoIndexParams::default()),
2142            Self::Text => PayloadSchemaParams::Text(TextIndexParams::default()),
2143            Self::Bool => PayloadSchemaParams::Bool(BoolIndexParams::default()),
2144            Self::Datetime => PayloadSchemaParams::Datetime(DatetimeIndexParams::default()),
2145            Self::Uuid => PayloadSchemaParams::Uuid(UuidIndexParams::default()),
2146        }
2147    }
2148}
2149
2150/// Payload type with parameters
2151#[derive(Debug, Deserialize, Serialize, JsonSchema,  Clone, PartialEq, Hash, Eq)]
2152#[serde(untagged, rename_all = "snake_case")]
2153
2154pub enum PayloadSchemaParams {
2155    Keyword(KeywordIndexParams),
2156    Integer(IntegerIndexParams),
2157    Float(FloatIndexParams),
2158    Geo(GeoIndexParams),
2159    Text(TextIndexParams),
2160    Bool(BoolIndexParams),
2161    Datetime(DatetimeIndexParams),
2162    Uuid(UuidIndexParams),
2163}
2164
2165impl PayloadSchemaParams {
2166    /// Human-readable type name
2167    pub fn name(&self) -> &'static str {
2168        self.kind().name()
2169    }
2170
2171    pub fn kind(&self) -> PayloadSchemaType {
2172        match self {
2173            PayloadSchemaParams::Keyword(_) => PayloadSchemaType::Keyword,
2174            PayloadSchemaParams::Integer(_) => PayloadSchemaType::Integer,
2175            PayloadSchemaParams::Float(_) => PayloadSchemaType::Float,
2176            PayloadSchemaParams::Geo(_) => PayloadSchemaType::Geo,
2177            PayloadSchemaParams::Text(_) => PayloadSchemaType::Text,
2178            PayloadSchemaParams::Bool(_) => PayloadSchemaType::Bool,
2179            PayloadSchemaParams::Datetime(_) => PayloadSchemaType::Datetime,
2180            PayloadSchemaParams::Uuid(_) => PayloadSchemaType::Uuid,
2181        }
2182    }
2183
2184    pub fn tenant_optimization(&self) -> bool {
2185        match self {
2186            PayloadSchemaParams::Keyword(keyword) => keyword.is_tenant.unwrap_or_default(),
2187            PayloadSchemaParams::Integer(integer) => integer.is_principal.unwrap_or_default(),
2188            PayloadSchemaParams::Float(float) => float.is_principal.unwrap_or_default(),
2189            PayloadSchemaParams::Datetime(datetime) => datetime.is_principal.unwrap_or_default(),
2190            PayloadSchemaParams::Uuid(uuid) => uuid.is_tenant.unwrap_or_default(),
2191            PayloadSchemaParams::Geo(_)
2192            | PayloadSchemaParams::Text(_)
2193            | PayloadSchemaParams::Bool(_) => false,
2194        }
2195    }
2196
2197    pub fn is_on_disk(&self) -> bool {
2198        match self {
2199            PayloadSchemaParams::Keyword(i) => i.on_disk.unwrap_or_default(),
2200            PayloadSchemaParams::Integer(i) => i.on_disk.unwrap_or_default(),
2201            PayloadSchemaParams::Float(i) => i.on_disk.unwrap_or_default(),
2202            PayloadSchemaParams::Datetime(i) => i.on_disk.unwrap_or_default(),
2203            PayloadSchemaParams::Uuid(i) => i.on_disk.unwrap_or_default(),
2204            PayloadSchemaParams::Text(i) => i.on_disk.unwrap_or_default(),
2205            PayloadSchemaParams::Geo(i) => i.on_disk.unwrap_or_default(),
2206            PayloadSchemaParams::Bool(i) => i.on_disk.unwrap_or_default(),
2207        }
2208    }
2209
2210    pub fn enable_hnsw(&self) -> bool {
2211        match self {
2212            PayloadSchemaParams::Keyword(params) => params.enable_hnsw.unwrap_or(true),
2213            PayloadSchemaParams::Integer(params) => params.enable_hnsw.unwrap_or(true),
2214            PayloadSchemaParams::Float(params) => params.enable_hnsw.unwrap_or(true),
2215            PayloadSchemaParams::Datetime(params) => params.enable_hnsw.unwrap_or(true),
2216            PayloadSchemaParams::Uuid(params) => params.enable_hnsw.unwrap_or(true),
2217            PayloadSchemaParams::Text(params) => params.enable_hnsw.unwrap_or(true),
2218            PayloadSchemaParams::Geo(params) => params.enable_hnsw.unwrap_or(true),
2219            PayloadSchemaParams::Bool(params) => params.enable_hnsw.unwrap_or(true),
2220        }
2221    }
2222}
2223
2224impl Validate for PayloadSchemaParams {
2225    fn validate(&self) -> Result<(), ValidationErrors> {
2226        match self {
2227            PayloadSchemaParams::Keyword(_) => Ok(()),
2228            PayloadSchemaParams::Integer(integer_index_params) => integer_index_params.validate(),
2229            PayloadSchemaParams::Float(_) => Ok(()),
2230            PayloadSchemaParams::Geo(_) => Ok(()),
2231            PayloadSchemaParams::Text(_) => Ok(()),
2232            PayloadSchemaParams::Bool(_) => Ok(()),
2233            PayloadSchemaParams::Datetime(_) => Ok(()),
2234            PayloadSchemaParams::Uuid(_) => Ok(()),
2235        }
2236    }
2237}
2238
2239#[derive(Clone, Debug, Eq, Deserialize, Serialize, JsonSchema)]
2240#[serde(untagged, rename_all = "snake_case")]
2241pub enum PayloadFieldSchema {
2242    FieldType(PayloadSchemaType),
2243    FieldParams(PayloadSchemaParams),
2244}
2245
2246impl PartialEq for PayloadFieldSchema {
2247    fn eq(&self, other: &Self) -> bool {
2248        match (self, other) {
2249            (Self::FieldType(this), Self::FieldType(other)) => this == other,
2250            (Self::FieldParams(this), Self::FieldParams(other)) => this == other,
2251            (Self::FieldType(this), Self::FieldParams(other)) => &this.expand() == other,
2252            (Self::FieldParams(this), Self::FieldType(other)) => this == &other.expand(),
2253        }
2254    }
2255}
2256
2257impl hash::Hash for PayloadFieldSchema {
2258    fn hash<H: hash::Hasher>(&self, state: &mut H) {
2259        match self {
2260            PayloadFieldSchema::FieldType(default) => default.expand().hash(state),
2261            PayloadFieldSchema::FieldParams(params) => params.hash(state),
2262        }
2263    }
2264}
2265
2266impl Validate for PayloadFieldSchema {
2267    fn validate(&self) -> Result<(), ValidationErrors> {
2268        match self {
2269            PayloadFieldSchema::FieldType(_) => Ok(()), // nothing to validate
2270            PayloadFieldSchema::FieldParams(payload_schema_params) => {
2271                payload_schema_params.validate()
2272            }
2273        }
2274    }
2275}
2276
2277impl Display for PayloadFieldSchema {
2278    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2279        match self {
2280            PayloadFieldSchema::FieldType(t) => write!(f, "{}", t.name()),
2281            PayloadFieldSchema::FieldParams(params) => match params {
2282                PayloadSchemaParams::Keyword(_)
2283                | PayloadSchemaParams::Float(_)
2284                | PayloadSchemaParams::Geo(_)
2285                | PayloadSchemaParams::Bool(_)
2286                | PayloadSchemaParams::Datetime(_)
2287                | PayloadSchemaParams::Uuid(_) => write!(f, "{}", params.name()),
2288                PayloadSchemaParams::Integer(integer_params) => {
2289                    let range = integer_params.range.unwrap_or(true);
2290                    let lookup = integer_params.lookup.unwrap_or(true);
2291                    if range && lookup {
2292                        write!(f, "integer")
2293                    } else {
2294                        write!(f, "integer (with range: {range}, lookup: {lookup})")
2295                    }
2296                }
2297                PayloadSchemaParams::Text(text_params) => {
2298                    if text_params.phrase_matching.unwrap_or_default() {
2299                        write!(f, "text (with phrase_matching: true)")
2300                    } else {
2301                        write!(f, "text")
2302                    }
2303                }
2304            },
2305        }
2306    }
2307}
2308
2309impl PayloadFieldSchema {
2310    pub fn expand(&self) -> Cow<'_, PayloadSchemaParams> {
2311        match self {
2312            PayloadFieldSchema::FieldType(t) => Cow::Owned(t.expand()),
2313            PayloadFieldSchema::FieldParams(p) => Cow::Borrowed(p),
2314        }
2315    }
2316
2317    /// Human-readable type name
2318    pub fn name(&self) -> &'static str {
2319        match self {
2320            PayloadFieldSchema::FieldType(field_type) => field_type.name(),
2321            PayloadFieldSchema::FieldParams(field_params) => field_params.name(),
2322        }
2323    }
2324
2325    pub fn is_tenant(&self) -> bool {
2326        match self {
2327            PayloadFieldSchema::FieldType(_) => false,
2328            PayloadFieldSchema::FieldParams(params) => params.tenant_optimization(),
2329        }
2330    }
2331
2332    pub fn is_on_disk(&self) -> bool {
2333        match self {
2334            PayloadFieldSchema::FieldType(_) => false,
2335            PayloadFieldSchema::FieldParams(params) => params.is_on_disk(),
2336        }
2337    }
2338
2339    pub fn kind(&self) -> PayloadSchemaType {
2340        match self {
2341            PayloadFieldSchema::FieldType(t) => *t,
2342            PayloadFieldSchema::FieldParams(p) => p.kind(),
2343        }
2344    }
2345
2346    /// Check if this type supports a `match` condition
2347    pub fn supports_match(&self) -> bool {
2348        match self {
2349            PayloadFieldSchema::FieldType(payload_schema_type) => match payload_schema_type {
2350                PayloadSchemaType::Keyword => true,
2351                PayloadSchemaType::Integer => true,
2352                PayloadSchemaType::Uuid => true,
2353                PayloadSchemaType::Bool => true,
2354                PayloadSchemaType::Float => false,
2355                PayloadSchemaType::Geo => false,
2356                PayloadSchemaType::Text => false,
2357                PayloadSchemaType::Datetime => false,
2358            },
2359            PayloadFieldSchema::FieldParams(payload_schema_params) => match payload_schema_params {
2360                PayloadSchemaParams::Keyword(_) => true,
2361                PayloadSchemaParams::Integer(integer_index_params) => {
2362                    integer_index_params.lookup == Some(true)
2363                }
2364                PayloadSchemaParams::Uuid(_) => true,
2365                PayloadSchemaParams::Bool(_) => true,
2366                PayloadSchemaParams::Float(_) => false,
2367                PayloadSchemaParams::Geo(_) => false,
2368                PayloadSchemaParams::Text(_) => false,
2369                PayloadSchemaParams::Datetime(_) => false,
2370            },
2371        }
2372    }
2373
2374    pub fn enable_hnsw(&self) -> bool {
2375        match self {
2376            PayloadFieldSchema::FieldType(_) => true,
2377            PayloadFieldSchema::FieldParams(p) => p.enable_hnsw(),
2378        }
2379    }
2380}
2381
2382impl From<PayloadSchemaType> for PayloadFieldSchema {
2383    fn from(payload_schema_type: PayloadSchemaType) -> Self {
2384        PayloadFieldSchema::FieldType(payload_schema_type)
2385    }
2386}
2387
2388impl TryFrom<PayloadIndexInfo> for PayloadFieldSchema {
2389    type Error = String;
2390
2391    fn try_from(index_info: PayloadIndexInfo) -> Result<Self, Self::Error> {
2392        let PayloadIndexInfo {
2393            data_type,
2394            params,
2395            points: _,
2396        } = index_info;
2397
2398        match params {
2399            None => Ok(PayloadFieldSchema::FieldType(data_type)),
2400
2401            Some(params) if data_type == params.kind() => {
2402                Ok(PayloadFieldSchema::FieldParams(params))
2403            }
2404
2405            Some(params) => Err(format!(
2406                "payload field with type {data_type:?} has parameters of type {:?}",
2407                params.kind(),
2408            )),
2409        }
2410    }
2411}
2412
2413pub fn value_type(value: &Value) -> Option<PayloadSchemaType> {
2414    match value {
2415        Value::Null => None,
2416        Value::Bool(_) => None,
2417        Value::Number(num) => {
2418            if num.is_i64() {
2419                Some(PayloadSchemaType::Integer)
2420            } else if num.is_f64() {
2421                Some(PayloadSchemaType::Float)
2422            } else {
2423                None
2424            }
2425        }
2426        Value::String(_) => Some(PayloadSchemaType::Keyword),
2427        Value::Array(_) => None,
2428        Value::Object(obj) => {
2429            let lon_op = obj.get("lon").and_then(|x| x.as_f64());
2430            let lat_op = obj.get("lat").and_then(|x| x.as_f64());
2431
2432            if let (Some(_), Some(_)) = (lon_op, lat_op) {
2433                return Some(PayloadSchemaType::Geo);
2434            }
2435            None
2436        }
2437    }
2438}
2439
2440#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2441#[serde(untagged)]
2442pub enum ValueVariants {
2443    String(String),
2444    Integer(IntPayloadType),
2445    Bool(bool),
2446}
2447
2448impl ValueVariants {
2449    pub fn to_value(&self) -> Value {
2450        match self {
2451            ValueVariants::String(keyword) => Value::String(keyword.clone()),
2452            &ValueVariants::Integer(integer) => Value::Number(integer.into()),
2453            &ValueVariants::Bool(flag) => Value::Bool(flag),
2454        }
2455    }
2456}
2457
2458#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
2459#[serde(untagged)]
2460pub enum AnyVariants {
2461    Strings(IndexSet<String, FnvBuildHasher>),
2462    Integers(IndexSet<IntPayloadType, FnvBuildHasher>),
2463}
2464
2465impl Hash for AnyVariants {
2466    fn hash<H: Hasher>(&self, state: &mut H) {
2467        mem::discriminant(self).hash(state);
2468        match self {
2469            AnyVariants::Strings(index_set) => {
2470                for item in index_set.iter() {
2471                    item.hash(state);
2472                }
2473            }
2474            AnyVariants::Integers(index_set) => {
2475                for item in index_set.iter() {
2476                    item.hash(state);
2477                }
2478            }
2479        }
2480    }
2481}
2482
2483impl AnyVariants {
2484    pub fn len(&self) -> usize {
2485        match self {
2486            AnyVariants::Strings(index_set) => index_set.len(),
2487            AnyVariants::Integers(index_set) => index_set.len(),
2488        }
2489    }
2490
2491    pub fn is_empty(&self) -> bool {
2492        match self {
2493            AnyVariants::Strings(index_set) => index_set.is_empty(),
2494            AnyVariants::Integers(index_set) => index_set.is_empty(),
2495        }
2496    }
2497}
2498
2499/// Exact match of the given value
2500#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2501#[serde(rename_all = "snake_case")]
2502pub struct MatchValue {
2503    pub value: ValueVariants,
2504}
2505
2506/// Full-text match of the strings.
2507#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2508#[serde(rename_all = "snake_case")]
2509pub struct MatchText {
2510    pub text: String,
2511}
2512
2513/// Full-text match of at least one token of the string.
2514#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2515#[serde(rename_all = "snake_case")]
2516pub struct MatchTextAny {
2517    pub text_any: String,
2518}
2519
2520impl<S: Into<String>> From<S> for MatchText {
2521    fn from(text: S) -> Self {
2522        MatchText { text: text.into() }
2523    }
2524}
2525
2526/// Full-text phrase match of the string.
2527#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2528#[serde(rename_all = "snake_case")]
2529pub struct MatchPhrase {
2530    pub phrase: String,
2531}
2532
2533impl<S: Into<String>> From<S> for MatchPhrase {
2534    fn from(text: S) -> Self {
2535        MatchPhrase {
2536            phrase: text.into(),
2537        }
2538    }
2539}
2540
2541/// Exact match on any of the given values
2542#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2543#[serde(rename_all = "snake_case")]
2544pub struct MatchAny {
2545    pub any: AnyVariants,
2546}
2547
2548/// Should have at least one value not matching the any given values
2549#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2550#[serde(rename_all = "snake_case")]
2551pub struct MatchExcept {
2552    pub except: AnyVariants,
2553}
2554
2555/// Match filter request
2556#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
2557#[serde(untagged, rename_all = "snake_case")]
2558pub enum MatchInterface {
2559    Value(MatchValue),
2560    Text(MatchText),
2561    TextAny(MatchTextAny),
2562    Phrase(MatchPhrase),
2563    Any(MatchAny),
2564    Except(MatchExcept),
2565}
2566
2567/// Match filter request
2568#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2569#[serde(untagged, from = "MatchInterface")]
2570pub enum Match {
2571    Value(MatchValue),
2572    Text(MatchText),
2573    TextAny(MatchTextAny),
2574    Phrase(MatchPhrase),
2575    Any(MatchAny),
2576    Except(MatchExcept),
2577}
2578
2579impl Match {
2580    pub fn new_value(value: ValueVariants) -> Self {
2581        Self::Value(MatchValue { value })
2582    }
2583
2584    pub fn new_text(text: &str) -> Self {
2585        Self::Text(MatchText { text: text.into() })
2586    }
2587
2588    pub fn new_phrase(phrase: &str) -> Self {
2589        Self::Phrase(MatchPhrase {
2590            phrase: phrase.into(),
2591        })
2592    }
2593
2594    pub fn new_any(any: AnyVariants) -> Self {
2595        Self::Any(MatchAny { any })
2596    }
2597
2598    pub fn new_except(except: AnyVariants) -> Self {
2599        Self::Except(MatchExcept { except })
2600    }
2601}
2602
2603impl From<AnyVariants> for Match {
2604    fn from(any: AnyVariants) -> Self {
2605        Self::Any(MatchAny { any })
2606    }
2607}
2608
2609impl From<MatchInterface> for Match {
2610    fn from(value: MatchInterface) -> Self {
2611        match value {
2612            MatchInterface::Value(value) => Self::Value(MatchValue { value: value.value }),
2613            MatchInterface::Text(text) => Self::Text(MatchText { text: text.text }),
2614            MatchInterface::TextAny(text_any) => Self::TextAny(MatchTextAny {
2615                text_any: text_any.text_any,
2616            }),
2617            MatchInterface::Any(any) => Self::Any(MatchAny { any: any.any }),
2618            MatchInterface::Except(except) => Self::Except(MatchExcept {
2619                except: except.except,
2620            }),
2621            MatchInterface::Phrase(MatchPhrase { phrase }) => Self::Phrase(MatchPhrase { phrase }),
2622        }
2623    }
2624}
2625
2626impl From<bool> for Match {
2627    fn from(flag: bool) -> Self {
2628        Self::Value(MatchValue {
2629            value: ValueVariants::Bool(flag),
2630        })
2631    }
2632}
2633
2634impl From<String> for Match {
2635    fn from(keyword: String) -> Self {
2636        Self::Value(MatchValue {
2637            value: ValueVariants::String(keyword),
2638        })
2639    }
2640}
2641
2642impl From<EcoString> for Match {
2643    fn from(keyword: EcoString) -> Self {
2644        Self::Value(MatchValue {
2645            value: ValueVariants::String(keyword.into()),
2646        })
2647    }
2648}
2649
2650impl From<IntPayloadType> for Match {
2651    fn from(integer: IntPayloadType) -> Self {
2652        Self::Value(MatchValue {
2653            value: ValueVariants::Integer(integer),
2654        })
2655    }
2656}
2657
2658impl From<Vec<String>> for Match {
2659    fn from(keywords: Vec<String>) -> Self {
2660        let keywords: IndexSet<String, FnvBuildHasher> = keywords.into_iter().collect();
2661        Self::Any(MatchAny {
2662            any: AnyVariants::Strings(keywords),
2663        })
2664    }
2665}
2666
2667impl From<ValueVariants> for Match {
2668    fn from(value: ValueVariants) -> Self {
2669        Self::Value(MatchValue { value })
2670    }
2671}
2672
2673impl From<Vec<String>> for MatchExcept {
2674    fn from(keywords: Vec<String>) -> Self {
2675        let keywords: IndexSet<String, FnvBuildHasher> = keywords.into_iter().collect();
2676        MatchExcept {
2677            except: AnyVariants::Strings(keywords),
2678        }
2679    }
2680}
2681
2682impl From<Vec<IntPayloadType>> for Match {
2683    fn from(integers: Vec<IntPayloadType>) -> Self {
2684        let integers: IndexSet<_, FnvBuildHasher> = integers.into_iter().collect();
2685        Self::Any(MatchAny {
2686            any: AnyVariants::Integers(integers),
2687        })
2688    }
2689}
2690
2691impl From<Vec<IntPayloadType>> for MatchExcept {
2692    fn from(integers: Vec<IntPayloadType>) -> Self {
2693        let integers: IndexSet<_, FnvBuildHasher> = integers.into_iter().collect();
2694        MatchExcept {
2695            except: AnyVariants::Integers(integers),
2696        }
2697    }
2698}
2699
2700#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, JsonSchema)]
2701#[serde(untagged)]
2702pub enum RangeInterface {
2703    Float(Range<OrderedFloat<FloatPayloadType>>),
2704    DateTime(Range<DateTimePayloadType>),
2705}
2706
2707impl Hash for RangeInterface {
2708    fn hash<H: hash::Hasher>(&self, state: &mut H) {
2709        match self {
2710            RangeInterface::Float(range) => {
2711                let Range { lt, gt, gte, lte } = range;
2712                lt.hash(state);
2713                gt.hash(state);
2714                gte.hash(state);
2715                lte.hash(state);
2716            }
2717            RangeInterface::DateTime(range) => {
2718                let Range { lt, gt, gte, lte } = range;
2719                lt.hash(state);
2720                gt.hash(state);
2721                gte.hash(state);
2722                lte.hash(state);
2723            }
2724        }
2725    }
2726}
2727
2728#[derive(serde::Deserialize)]
2729#[serde(untagged)]
2730enum RangeInterfaceUntagged {
2731    Float(Range<OrderedFloatPayloadType>),
2732    DateTime(Range<DateTimePayloadType>),
2733}
2734
2735impl<'de> serde::Deserialize<'de> for RangeInterface {
2736    /// Parses range bounds, treating string bounds as RFC3339 datetimes for REST/JSON `datetime_range` filters.
2737    /// Preserves clear user-facing errors when datetime formats are invalid.
2738    /// Example accepted datetime bound: `2014-01-01T00:00:00Z`.
2739    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
2740    where
2741        D: serde::Deserializer<'de>,
2742    {
2743        if !deserializer.is_human_readable() {
2744            return RangeInterfaceUntagged::deserialize(deserializer).map(|parsed| match parsed {
2745                RangeInterfaceUntagged::Float(r) => RangeInterface::Float(r),
2746                RangeInterfaceUntagged::DateTime(r) => RangeInterface::DateTime(r),
2747            });
2748        }
2749
2750        let value = serde_json::Value::deserialize(deserializer)?;
2751
2752        // If any range bound is a string -> treat as datetime range
2753        if let Some(obj) = value.as_object() {
2754            let keys = ["lt", "gt", "lte", "gte"];
2755            let has_string_bound = keys
2756                .iter()
2757                .any(|k| obj.get(*k).map(|v| v.is_string()).unwrap_or(false));
2758
2759            if has_string_bound {
2760                return serde_json::from_value::<Range<DateTimePayloadType>>(value)
2761                    .map(RangeInterface::DateTime)
2762                    .map_err(serde::de::Error::custom);
2763            }
2764        }
2765
2766        // Fallback to existing untagged behavior
2767        let parsed = serde_json::from_value::<RangeInterfaceUntagged>(value)
2768            .map_err(serde::de::Error::custom)?;
2769
2770        Ok(match parsed {
2771            RangeInterfaceUntagged::Float(r) => RangeInterface::Float(r),
2772            RangeInterfaceUntagged::DateTime(r) => RangeInterface::DateTime(r),
2773        })
2774    }
2775}
2776
2777type OrderedFloatPayloadType = OrderedFloat<FloatPayloadType>;
2778
2779/// Range filter request
2780#[macro_rules_attribute::macro_rules_derive(crate::segment::common::macros::schemars_rename_generics)]
2781#[derive_args(< OrderedFloatPayloadType > => "Range", < DateTimePayloadType > => "DatetimeRange")]
2782#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
2783#[serde(rename_all = "snake_case")]
2784pub struct Range<T> {
2785    /// point.key < range.lt
2786    pub lt: Option<T>,
2787    /// point.key > range.gt
2788    pub gt: Option<T>,
2789    /// point.key >= range.gte
2790    pub gte: Option<T>,
2791    /// point.key <= range.lte
2792    pub lte: Option<T>,
2793}
2794
2795impl<T: Copy> Range<T> {
2796    /// Convert range to a range of another type
2797    pub fn map<U, F: Fn(T) -> U>(&self, f: F) -> Range<U> {
2798        let Self { lt, gt, gte, lte } = self;
2799        Range {
2800            lt: lt.map(&f),
2801            gt: gt.map(&f),
2802            gte: gte.map(&f),
2803            lte: lte.map(&f),
2804        }
2805    }
2806}
2807
2808impl<T: Copy + PartialOrd> Range<T> {
2809    pub fn check_range(&self, number: T) -> bool {
2810        let Self { lt, gt, gte, lte } = self;
2811        lt.is_none_or(|x| number < x)
2812            && gt.is_none_or(|x| number > x)
2813            && lte.is_none_or(|x| number <= x)
2814            && gte.is_none_or(|x| number >= x)
2815    }
2816}
2817
2818/// Values count filter request
2819#[derive(Debug, Deserialize, Serialize, JsonSchema, Copy, Clone, PartialEq, Eq, Hash)]
2820#[serde(rename_all = "snake_case")]
2821pub struct ValuesCount {
2822    /// point.key.length() < values_count.lt
2823    pub lt: Option<usize>,
2824    /// point.key.length() > values_count.gt
2825    pub gt: Option<usize>,
2826    /// point.key.length() >= values_count.gte
2827    pub gte: Option<usize>,
2828    /// point.key.length() <= values_count.lte
2829    pub lte: Option<usize>,
2830}
2831
2832impl ValuesCount {
2833    pub fn check_count(&self, count: usize) -> bool {
2834        let Self { lt, gt, gte, lte } = self;
2835        lt.is_none_or(|x| count < x)
2836            && gt.is_none_or(|x| count > x)
2837            && lte.is_none_or(|x| count <= x)
2838            && gte.is_none_or(|x| count >= x)
2839    }
2840
2841    pub fn check_count_from(&self, value: &Value) -> bool {
2842        let count = match value {
2843            Value::Null => 0,
2844            Value::Array(array) => array.len(),
2845            _ => 1,
2846        };
2847
2848        self.check_count(count)
2849    }
2850}
2851
2852#[cfg(test)]
2853impl From<std::ops::Range<usize>> for ValuesCount {
2854    fn from(range: std::ops::Range<usize>) -> Self {
2855        Self {
2856            gte: Some(range.start),
2857            lt: Some(range.end),
2858            gt: None,
2859            lte: None,
2860        }
2861    }
2862}
2863
2864/// Geo filter request
2865///
2866/// Matches coordinates inside the rectangle, described by coordinates of lop-left and bottom-right edges
2867#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, Serialize, JsonSchema)]
2868#[serde(rename_all = "snake_case")]
2869pub struct GeoBoundingBox {
2870    /// Coordinates of the top left point of the area rectangle
2871    pub top_left: GeoPoint,
2872    /// Coordinates of the bottom right point of the area rectangle
2873    pub bottom_right: GeoPoint,
2874}
2875
2876impl GeoBoundingBox {
2877    pub fn check_point(&self, point: &GeoPoint) -> bool {
2878        let longitude_check = if self.top_left.lon > self.bottom_right.lon {
2879            // Handle antimeridian crossing
2880            point.lon > self.top_left.lon || point.lon < self.bottom_right.lon
2881        } else {
2882            self.top_left.lon < point.lon && point.lon < self.bottom_right.lon
2883        };
2884
2885        let latitude_check = self.bottom_right.lat < point.lat && point.lat < self.top_left.lat;
2886
2887        longitude_check && latitude_check
2888    }
2889}
2890
2891/// Geo filter request
2892///
2893/// Matches coordinates inside the circle of `radius` and center with coordinates `center`
2894#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)]
2895#[serde(rename_all = "snake_case")]
2896pub struct GeoRadius {
2897    /// Coordinates of the top left point of the area rectangle
2898    pub center: GeoPoint,
2899    /// Radius of the area in meters
2900    pub radius: OrderedFloat<f64>,
2901}
2902
2903impl Hash for GeoRadius {
2904    fn hash<H: hash::Hasher>(&self, state: &mut H) {
2905        let GeoRadius { center, radius } = self;
2906        center.hash(state);
2907        // Hash f64 by converting to bits
2908        OrderedFloat(*radius).hash(state);
2909    }
2910}
2911
2912impl GeoRadius {
2913    pub fn check_point(&self, point: &GeoPoint) -> bool {
2914        let query_center = Point::from(self.center);
2915        Haversine.distance(query_center, Point::from(*point)) < self.radius.0
2916    }
2917}
2918
2919#[derive(Deserialize)]
2920pub struct GeoPolygonShadow {
2921    pub exterior: GeoLineString,
2922    pub interiors: Option<Vec<GeoLineString>>,
2923}
2924
2925pub struct PolygonWrapper {
2926    pub polygon: Polygon,
2927}
2928
2929impl PolygonWrapper {
2930    pub fn check_point(&self, point: &GeoPoint) -> bool {
2931        let point_new = Point::new(point.lon.0, point.lat.0);
2932        self.polygon.contains(&point_new)
2933    }
2934}
2935
2936/// Geo filter request
2937///
2938/// Matches coordinates inside the polygon, defined by `exterior` and `interiors`
2939#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
2940#[serde(try_from = "GeoPolygonShadow", rename_all = "snake_case")]
2941pub struct GeoPolygon {
2942    /// The exterior line bounds the surface
2943    /// must consist of a minimum of 4 points, and the first and last points
2944    /// must be the same.
2945    pub exterior: GeoLineString,
2946    /// Interior lines (if present) bound holes within the surface
2947    /// each GeoLineString must consist of a minimum of 4 points, and the first
2948    /// and last points must be the same.
2949    pub interiors: Option<Vec<GeoLineString>>,
2950}
2951
2952impl GeoPolygon {
2953    pub fn validate_line_string(line: &GeoLineString) -> OperationResult<()> {
2954        if line.points.len() <= 3 {
2955            return Err(OperationError::ValidationError {
2956                description: format!(
2957                    "polygon invalid, the size must be at least 4, got {}",
2958                    line.points.len()
2959                ),
2960            });
2961        }
2962
2963        if let (Some(first), Some(last)) = (line.points.first(), line.points.last())
2964            && ((first.lat - last.lat).abs() > f64::EPSILON
2965                || (first.lon - last.lon).abs() > f64::EPSILON)
2966        {
2967            return Err(OperationError::ValidationError {
2968                description: String::from(
2969                    "polygon invalid, the first and the last points should be the same to form a closed line",
2970                ),
2971            });
2972        }
2973
2974        Ok(())
2975    }
2976
2977    // convert GeoPolygon to Geo crate Polygon class for checking point intersection
2978    pub fn convert(&self) -> PolygonWrapper {
2979        let exterior_line: LineString = LineString(
2980            self.exterior
2981                .points
2982                .iter()
2983                .map(|p| Coord {
2984                    x: p.lon.0,
2985                    y: p.lat.0,
2986                })
2987                .collect(),
2988        );
2989
2990        // Convert the interior points to coordinates (if any)
2991        let interior_lines: Vec<LineString> = match &self.interiors {
2992            None => vec![],
2993            Some(interiors) => interiors
2994                .iter()
2995                .map(|interior_points| {
2996                    interior_points
2997                        .points
2998                        .iter()
2999                        .map(|p| Coord {
3000                            x: p.lon.0,
3001                            y: p.lat.0,
3002                        })
3003                        .collect()
3004                })
3005                .map(LineString)
3006                .collect(),
3007        };
3008        PolygonWrapper {
3009            polygon: Polygon::new(exterior_line, interior_lines),
3010        }
3011    }
3012}
3013
3014impl TryFrom<GeoPolygonShadow> for GeoPolygon {
3015    type Error = OperationError;
3016
3017    fn try_from(value: GeoPolygonShadow) -> OperationResult<Self> {
3018        let GeoPolygonShadow {
3019            exterior,
3020            interiors,
3021        } = value;
3022        Self::validate_line_string(&exterior)?;
3023
3024        if let Some(interiors) = &interiors {
3025            for interior in interiors {
3026                Self::validate_line_string(interior)?;
3027            }
3028        }
3029
3030        Ok(GeoPolygon {
3031            exterior,
3032            interiors,
3033        })
3034    }
3035}
3036
3037/// All possible payload filtering conditions
3038#[derive(Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Hash)]
3039#[validate(schema(function = "validate_field_condition"))]
3040#[serde(rename_all = "snake_case")]
3041pub struct FieldCondition {
3042    /// Payload key
3043    pub key: PayloadKeyType,
3044    /// Check if point has field with a given value
3045    #[serde(skip_serializing_if = "Option::is_none")]
3046    pub r#match: Option<Match>,
3047    /// Check if points value lies in a given range
3048    #[serde(skip_serializing_if = "Option::is_none")]
3049    pub range: Option<RangeInterface>,
3050    /// Check if points geolocation lies in a given area
3051    #[serde(skip_serializing_if = "Option::is_none")]
3052    pub geo_bounding_box: Option<GeoBoundingBox>,
3053    /// Check if geo point is within a given radius
3054    #[serde(skip_serializing_if = "Option::is_none")]
3055    pub geo_radius: Option<GeoRadius>,
3056    /// Check if geo point is within a given polygon
3057    #[serde(skip_serializing_if = "Option::is_none")]
3058    pub geo_polygon: Option<GeoPolygon>,
3059    /// Check number of values of the field
3060    #[serde(skip_serializing_if = "Option::is_none")]
3061    pub values_count: Option<ValuesCount>,
3062    /// Check that the field is empty, alternative syntax for `is_empty: "field_name"`
3063    #[serde(skip_serializing_if = "Option::is_none")]
3064    pub is_empty: Option<bool>,
3065    /// Check that the field is null, alternative syntax for `is_null: "field_name"`
3066    #[serde(skip_serializing_if = "Option::is_none")]
3067    pub is_null: Option<bool>,
3068}
3069
3070impl FieldCondition {
3071    pub fn new_match(key: PayloadKeyType, r#match: Match) -> Self {
3072        Self {
3073            key,
3074            r#match: Some(r#match),
3075            range: None,
3076            geo_bounding_box: None,
3077            geo_radius: None,
3078            geo_polygon: None,
3079            values_count: None,
3080            is_empty: None,
3081            is_null: None,
3082        }
3083    }
3084
3085    pub fn new_range(key: PayloadKeyType, range: Range<OrderedFloat<FloatPayloadType>>) -> Self {
3086        Self {
3087            key,
3088            r#match: None,
3089            range: Some(RangeInterface::Float(range)),
3090            geo_bounding_box: None,
3091            geo_radius: None,
3092            geo_polygon: None,
3093            values_count: None,
3094            is_empty: None,
3095            is_null: None,
3096        }
3097    }
3098
3099    pub fn new_datetime_range(
3100        key: PayloadKeyType,
3101        datetime_range: Range<DateTimePayloadType>,
3102    ) -> Self {
3103        Self {
3104            key,
3105            r#match: None,
3106            range: Some(RangeInterface::DateTime(datetime_range)),
3107            geo_bounding_box: None,
3108            geo_radius: None,
3109            geo_polygon: None,
3110            values_count: None,
3111            is_empty: None,
3112            is_null: None,
3113        }
3114    }
3115
3116    pub fn new_geo_bounding_box(key: PayloadKeyType, geo_bounding_box: GeoBoundingBox) -> Self {
3117        Self {
3118            key,
3119            r#match: None,
3120            range: None,
3121            geo_bounding_box: Some(geo_bounding_box),
3122            geo_radius: None,
3123            geo_polygon: None,
3124            values_count: None,
3125            is_empty: None,
3126            is_null: None,
3127        }
3128    }
3129
3130    pub fn new_geo_radius(key: PayloadKeyType, geo_radius: GeoRadius) -> Self {
3131        Self {
3132            key,
3133            r#match: None,
3134            range: None,
3135            geo_bounding_box: None,
3136            geo_radius: Some(geo_radius),
3137            geo_polygon: None,
3138            values_count: None,
3139            is_empty: None,
3140            is_null: None,
3141        }
3142    }
3143
3144    pub fn new_geo_polygon(key: PayloadKeyType, geo_polygon: GeoPolygon) -> Self {
3145        Self {
3146            key,
3147            r#match: None,
3148            range: None,
3149            geo_bounding_box: None,
3150            geo_radius: None,
3151            geo_polygon: Some(geo_polygon),
3152            values_count: None,
3153            is_empty: None,
3154            is_null: None,
3155        }
3156    }
3157
3158    pub fn new_values_count(key: PayloadKeyType, values_count: ValuesCount) -> Self {
3159        Self {
3160            key,
3161            r#match: None,
3162            range: None,
3163            geo_bounding_box: None,
3164            geo_radius: None,
3165            geo_polygon: None,
3166            values_count: Some(values_count),
3167            is_empty: None,
3168            is_null: None,
3169        }
3170    }
3171
3172    pub fn new_is_empty(key: PayloadKeyType, is_empty: bool) -> Self {
3173        Self {
3174            key,
3175            r#match: None,
3176            range: None,
3177            geo_bounding_box: None,
3178            geo_radius: None,
3179            geo_polygon: None,
3180            values_count: None,
3181            is_empty: Some(is_empty),
3182            is_null: None,
3183        }
3184    }
3185
3186    pub fn new_is_null(key: PayloadKeyType, is_null: bool) -> Self {
3187        Self {
3188            key,
3189            r#match: None,
3190            range: None,
3191            geo_bounding_box: None,
3192            geo_radius: None,
3193            geo_polygon: None,
3194            values_count: None,
3195            is_empty: None,
3196            is_null: Some(is_null),
3197        }
3198    }
3199
3200    pub fn all_fields_none(&self) -> bool {
3201        matches!(
3202            self,
3203            FieldCondition {
3204                r#match: None,
3205                range: None,
3206                geo_bounding_box: None,
3207                geo_radius: None,
3208                geo_polygon: None,
3209                values_count: None,
3210                key: _,
3211                is_empty: None,
3212                is_null: None,
3213            }
3214        )
3215    }
3216
3217    fn input_size(&self) -> usize {
3218        if self.r#match.is_none() {
3219            return 0;
3220        }
3221
3222        match self.r#match.as_ref().unwrap() {
3223            Match::Any(match_any) => match_any.any.len(),
3224            Match::Except(match_except) => match_except.except.len(),
3225            Match::Value(_) => 0,
3226            Match::Text(_) => 0,
3227            Match::Phrase(_) => 0,
3228            Match::TextAny(_) => 0,
3229        }
3230    }
3231}
3232
3233pub fn validate_field_condition(field_condition: &FieldCondition) -> Result<(), ValidationError> {
3234    if field_condition.all_fields_none() {
3235        Err(ValidationError::new(
3236            "At least one field condition must be specified",
3237        ))
3238    } else {
3239        Ok(())
3240    }
3241}
3242
3243/// Payload field
3244#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3245pub struct PayloadField {
3246    /// Payload field name
3247    pub key: PayloadKeyType,
3248}
3249
3250/// Select points with empty payload for a specified field
3251#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3252pub struct IsEmptyCondition {
3253    pub is_empty: PayloadField,
3254}
3255
3256/// Select points with null payload for a specified field
3257#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3258pub struct IsNullCondition {
3259    pub is_null: PayloadField,
3260}
3261
3262impl From<JsonPath> for IsNullCondition {
3263    fn from(key: PayloadKeyType) -> Self {
3264        IsNullCondition {
3265            is_null: PayloadField { key },
3266        }
3267    }
3268}
3269
3270impl From<JsonPath> for IsEmptyCondition {
3271    fn from(key: PayloadKeyType) -> Self {
3272        IsEmptyCondition {
3273            is_empty: PayloadField { key },
3274        }
3275    }
3276}
3277
3278/// ID-based filtering condition
3279#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
3280pub struct HasIdCondition {
3281    #[schemars(schema_with = "HashSet::<PointIdType>::json_schema")]
3282    pub has_id: MaybeArc<AHashSet<PointIdType>>,
3283}
3284
3285impl Hash for HasIdCondition {
3286    fn hash<H: hash::Hasher>(&self, state: &mut H) {
3287        unordered_hash_unique(state, self.has_id.iter());
3288    }
3289}
3290
3291/// Filter points which have specific vector assigned
3292#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3293pub struct HasVectorCondition {
3294    pub has_vector: VectorNameBuf,
3295}
3296
3297impl From<VectorNameBuf> for HasVectorCondition {
3298    fn from(vector: VectorNameBuf) -> Self {
3299        HasVectorCondition { has_vector: vector }
3300    }
3301}
3302
3303/// Threshold determining when to use an `Arc` in `HasIdCondition` if the condition includes many points.
3304/// Since we're cloning filters quite a lot, using an Arc for larger conditions reduces risk of memory leaks
3305/// and potentially improves performance in some places.
3306const HAS_ID_CONDITION_ARC_THRESHOLD: usize = 1_000;
3307
3308impl From<AHashSet<PointIdType>> for HasIdCondition {
3309    fn from(has_id: AHashSet<PointIdType>) -> Self {
3310        if has_id.len() > HAS_ID_CONDITION_ARC_THRESHOLD {
3311            HasIdCondition {
3312                has_id: MaybeArc::arc(has_id),
3313            }
3314        } else {
3315            HasIdCondition {
3316                has_id: MaybeArc::no_arc(has_id),
3317            }
3318        }
3319    }
3320}
3321
3322impl FromIterator<PointIdType> for HasIdCondition {
3323    fn from_iter<T: IntoIterator<Item = PointIdType>>(iter: T) -> Self {
3324        let items: AHashSet<_> = iter.into_iter().collect();
3325        // Arc-Threshold applies here, since we're reusing the From implementation from AHashSet.
3326        Self::from(items)
3327    }
3328}
3329
3330/// Select points with payload for a specified nested field
3331#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Validate, Hash)]
3332pub struct Nested {
3333    pub key: PayloadKeyType,
3334    #[validate(nested)]
3335    pub filter: Filter,
3336}
3337
3338#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Validate, Hash)]
3339pub struct NestedCondition {
3340    #[validate(nested)]
3341    pub nested: Nested,
3342}
3343
3344/// Container to work around the untagged enum limitation for condition
3345impl NestedCondition {
3346    pub fn new(nested: Nested) -> Self {
3347        Self { nested }
3348    }
3349
3350    /// Get the raw key without any modifications
3351    pub fn raw_key(&self) -> &PayloadKeyType {
3352        &self.nested.key
3353    }
3354
3355    /// Nested is made to be used with arrays, so we add `[]` to the key if it is not present for convenience
3356    pub fn array_key(&self) -> PayloadKeyType {
3357        self.raw_key().array_key()
3358    }
3359
3360    pub fn filter(&self) -> &Filter {
3361        &self.nested.filter
3362    }
3363}
3364
3365#[derive(Clone, Debug, Serialize, JsonSchema, PartialEq, Eq, Hash)]
3366#[serde(untagged)]
3367#[serde(
3368    expecting = "Expected some form of condition, which can be a field condition (like {\"key\": ..., \"match\": ... }), or some other mentioned in the documentation: https://qdrant.tech/documentation/concepts/filtering/#filtering-conditions"
3369)]
3370#[allow(clippy::large_enum_variant)]
3371pub enum Condition {
3372    /// Check if field satisfies provided condition
3373    Field(FieldCondition),
3374    /// Check if payload field is empty: equals to empty array, or does not exists
3375    IsEmpty(IsEmptyCondition),
3376    /// Check if payload field equals `NULL`
3377    IsNull(IsNullCondition),
3378    /// Check if points id is in a given set
3379    HasId(HasIdCondition),
3380    /// Check if point has vector assigned
3381    HasVector(HasVectorCondition),
3382    /// Nested filters
3383    Nested(NestedCondition),
3384    /// Nested filter
3385    Filter(Filter),
3386
3387    #[serde(skip)]
3388    CustomIdChecker(CustomIdChecker),
3389}
3390
3391#[derive(Deserialize)]
3392#[serde(untagged)]
3393#[serde(
3394    expecting = "Expected some form of condition, which can be a field condition (like {\"key\": ..., \"match\": ... }), or some other mentioned in the documentation: https://qdrant.tech/documentation/concepts/filtering/#filtering-conditions"
3395)]
3396#[allow(clippy::large_enum_variant)]
3397#[allow(dead_code)]
3398enum ConditionUntagged {
3399    Field(FieldCondition),
3400    IsEmpty(IsEmptyCondition),
3401    IsNull(IsNullCondition),
3402    HasId(HasIdCondition),
3403    HasVector(HasVectorCondition),
3404    Nested(NestedCondition),
3405    Filter(Filter),
3406
3407    #[serde(skip)]
3408    CustomIdChecker(CustomIdChecker),
3409}
3410
3411impl From<ConditionUntagged> for Condition {
3412    fn from(condition: ConditionUntagged) -> Self {
3413        match condition {
3414            ConditionUntagged::Field(condition) => Condition::Field(condition),
3415            ConditionUntagged::IsEmpty(condition) => Condition::IsEmpty(condition),
3416            ConditionUntagged::IsNull(condition) => Condition::IsNull(condition),
3417            ConditionUntagged::HasId(condition) => Condition::HasId(condition),
3418            ConditionUntagged::HasVector(condition) => Condition::HasVector(condition),
3419            ConditionUntagged::Nested(condition) => Condition::Nested(condition),
3420            ConditionUntagged::Filter(condition) => Condition::Filter(condition),
3421            ConditionUntagged::CustomIdChecker(condition) => Condition::CustomIdChecker(condition),
3422        }
3423    }
3424}
3425
3426impl<'de> serde::Deserialize<'de> for Condition {
3427    /// Deserializes Condition with special handling for FieldCondition to preserve
3428    /// readable RFC3339 datetime parse errors. Other variants use ConditionUntagged
3429    /// for compiler-level safety when new variants are added.
3430    /// Example accepted datetime value: `2014-01-01T00:00:00Z`.
3431    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
3432    where
3433        D: serde::Deserializer<'de>,
3434    {
3435        // Buffer into serde_value::Value which, unlike serde_json::Value,
3436        // can represent byte arrays from non-human-readable formats (e.g. CBOR).
3437        // Note: we cannot rely on `deserializer.is_human_readable()` here because
3438        // serde's internal ContentDeserializer (used by flatten + untagged) always
3439        // reports `true` regardless of the original format.
3440        let value = serde_value::Value::deserialize(deserializer)?;
3441
3442        // Special case: FieldCondition first to surface datetime parse errors.
3443        // Untagged enum would swallow these errors with generic message.
3444        if let serde_value::Value::Map(obj) = &value
3445            && obj.contains_key(&serde_value::Value::String("key".into()))
3446        {
3447            return value
3448                .deserialize_into()
3449                .map(Condition::Field)
3450                .map_err(serde::de::Error::custom);
3451        }
3452
3453        // All other variants handled by ConditionUntagged (compiler-safe)
3454        value
3455            .deserialize_into::<ConditionUntagged>()
3456            .map(Condition::from)
3457            .map_err(serde::de::Error::custom)
3458    }
3459}
3460
3461impl Condition {
3462    pub fn new_custom(checker: Arc<dyn CustomIdCheckerCondition + Send + Sync + 'static>) -> Self {
3463        Condition::CustomIdChecker(CustomIdChecker(checker))
3464    }
3465}
3466
3467#[derive(Debug, Clone)]
3468pub struct CustomIdChecker(pub Arc<dyn CustomIdCheckerCondition + Send + Sync + 'static>);
3469
3470impl Hash for CustomIdChecker {
3471    fn hash<H: hash::Hasher>(&self, state: &mut H) {
3472        // We cannot hash the inner function
3473        // This means that two different CustomIdChecker conditions will have the same hash,
3474        // but that's acceptable since we cannot do better, and only expected to be used
3475        // for logging and profiling purposes.
3476        std::ptr::hash(Arc::as_ptr(&self.0), state);
3477    }
3478}
3479
3480impl PartialEq for CustomIdChecker {
3481    fn eq(&self, other: &Self) -> bool {
3482        // We cannot compare the inner function
3483        // This means that two different CustomIdChecker conditions will never be equal,
3484        // but that's acceptable since we cannot do better, and only expected to be used
3485        // for logging and profiling purposes.
3486        Arc::ptr_eq(&self.0, &other.0)
3487    }
3488}
3489
3490impl Eq for CustomIdChecker {}
3491
3492impl Condition {
3493    pub fn new_nested(key: JsonPath, filter: Filter) -> Self {
3494        Self::Nested(NestedCondition {
3495            nested: Nested { key, filter },
3496        })
3497    }
3498
3499    pub fn size_estimation(&self) -> usize {
3500        match self {
3501            Condition::Field(field_condition) => field_condition.input_size(),
3502            Condition::HasId(has_id_condition) => has_id_condition.has_id.len(),
3503            Condition::Filter(filter) => filter.max_condition_input_size(),
3504            Condition::Nested(nested) => nested.filter().max_condition_input_size(),
3505            Condition::IsEmpty(_)
3506            | Condition::IsNull(_)
3507            | Condition::HasVector(_)
3508            | Condition::CustomIdChecker(_) => 0,
3509        }
3510    }
3511
3512    pub fn sub_conditions_count(&self) -> usize {
3513        match self {
3514            Condition::Nested(nested_condition) => {
3515                nested_condition.filter().total_conditions_count()
3516            }
3517            Condition::Filter(filter) => filter.total_conditions_count(),
3518            Condition::Field(_)
3519            | Condition::IsEmpty(_)
3520            | Condition::IsNull(_)
3521            | Condition::CustomIdChecker(_)
3522            | Condition::HasId(_)
3523            | Condition::HasVector(_) => 1,
3524        }
3525    }
3526
3527    pub fn targeted_key(&self) -> Option<PayloadKeyType> {
3528        match self {
3529            Condition::Field(field_condition) => Some(field_condition.key.clone()),
3530            Condition::IsEmpty(is_empty_condition) => Some(is_empty_condition.is_empty.key.clone()),
3531            Condition::IsNull(is_null_condition) => Some(is_null_condition.is_null.key.clone()),
3532            Condition::Nested(nested_condition) => Some(nested_condition.array_key()),
3533            Condition::Filter(filter) => filter.iter_conditions().find_map(|c| c.targeted_key()),
3534            Condition::HasId(_) | Condition::HasVector(_) | Condition::CustomIdChecker(_) => None,
3535        }
3536    }
3537}
3538
3539// The validator crate does not support deriving for enums.
3540impl Validate for Condition {
3541    fn validate(&self) -> Result<(), ValidationErrors> {
3542        match self {
3543            Condition::HasId(_)
3544            | Condition::IsEmpty(_)
3545            | Condition::IsNull(_)
3546            | Condition::HasVector(_) => Ok(()),
3547            Condition::Field(field_condition) => field_condition.validate(),
3548            Condition::Nested(nested_condition) => nested_condition.validate(),
3549            Condition::Filter(filter) => filter.validate(),
3550            Condition::CustomIdChecker(_) => Ok(()),
3551        }
3552    }
3553}
3554
3555pub trait CustomIdCheckerCondition: fmt::Debug {
3556    fn estimate_cardinality(&self, points: usize) -> CardinalityEstimation;
3557    fn check(&self, point_id: ExtendedPointId) -> bool;
3558}
3559
3560/// Options for specifying which payload to include or not
3561#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Hash)]
3562#[serde(untagged, rename_all = "snake_case")]
3563#[serde(
3564    expecting = "Expected a boolean, an array of strings, or an object with an include/exclude field"
3565)]
3566pub enum WithPayloadInterface {
3567    /// If `true` - return all payload,
3568    /// If `false` - do not return payload
3569    Bool(bool),
3570    /// Specify which fields to return
3571    Fields(Vec<JsonPath>),
3572    /// Specify included or excluded fields
3573    Selector(PayloadSelector),
3574}
3575
3576impl From<bool> for WithPayloadInterface {
3577    fn from(b: bool) -> Self {
3578        WithPayloadInterface::Bool(b)
3579    }
3580}
3581
3582impl Default for WithPayloadInterface {
3583    fn default() -> Self {
3584        WithPayloadInterface::Bool(false)
3585    }
3586}
3587
3588/// Options for specifying which vector to include
3589#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3590#[serde(untagged, rename_all = "snake_case")]
3591#[serde(expecting = "Expected a boolean, or an array of strings")]
3592pub enum WithVector {
3593    /// If `true` - return all vector,
3594    /// If `false` - do not return vector
3595    Bool(bool),
3596    /// Specify which vector to return
3597    Selector(Vec<VectorNameBuf>),
3598}
3599
3600impl WithVector {
3601    pub fn is_enabled(&self) -> bool {
3602        match self {
3603            WithVector::Bool(b) => *b,
3604            WithVector::Selector(_) => true,
3605        }
3606    }
3607
3608    /// Merges two `WithVector` options, additively.
3609    pub fn merge(&self, other: &WithVector) -> WithVector {
3610        match (self, other) {
3611            // if any is true, then true
3612            (WithVector::Bool(true), _) => WithVector::Bool(true),
3613            (_, WithVector::Bool(true)) => WithVector::Bool(true),
3614
3615            // if both are false, then false
3616            (WithVector::Bool(false), WithVector::Bool(false)) => WithVector::Bool(false),
3617
3618            // merge selectors
3619            (WithVector::Selector(s1), WithVector::Selector(s2)) => {
3620                WithVector::Selector(s1.iter().chain(s2).unique().cloned().collect())
3621            }
3622
3623            // use selector from the other option
3624            (WithVector::Bool(false), WithVector::Selector(s)) => WithVector::Selector(s.clone()),
3625            (WithVector::Selector(s), WithVector::Bool(false)) => WithVector::Selector(s.clone()),
3626        }
3627    }
3628}
3629
3630impl From<bool> for WithVector {
3631    fn from(b: bool) -> Self {
3632        WithVector::Bool(b)
3633    }
3634}
3635
3636impl From<VectorNameBuf> for WithVector {
3637    fn from(name: VectorNameBuf) -> Self {
3638        WithVector::Selector(vec![name])
3639    }
3640}
3641
3642impl Default for WithVector {
3643    fn default() -> Self {
3644        WithVector::Bool(false)
3645    }
3646}
3647
3648impl WithPayloadInterface {
3649    pub fn is_required(&self) -> bool {
3650        match self {
3651            WithPayloadInterface::Bool(b) => *b,
3652            _ => true,
3653        }
3654    }
3655}
3656
3657impl From<bool> for WithPayload {
3658    fn from(x: bool) -> Self {
3659        WithPayload {
3660            enable: x,
3661            payload_selector: None,
3662        }
3663    }
3664}
3665
3666impl From<WithPayloadInterface> for WithPayload {
3667    fn from(interface: WithPayloadInterface) -> Self {
3668        match interface {
3669            WithPayloadInterface::Bool(enable) => WithPayload {
3670                enable,
3671                payload_selector: None,
3672            },
3673            WithPayloadInterface::Fields(fields) => WithPayload {
3674                enable: true,
3675                payload_selector: Some(PayloadSelector::new_include(fields)),
3676            },
3677            WithPayloadInterface::Selector(selector) => WithPayload {
3678                enable: true,
3679                payload_selector: Some(selector),
3680            },
3681        }
3682    }
3683}
3684
3685impl From<&WithPayloadInterface> for WithPayload {
3686    fn from(interface: &WithPayloadInterface) -> Self {
3687        WithPayload::from(interface.clone())
3688    }
3689}
3690
3691#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3692#[serde(deny_unknown_fields, rename_all = "snake_case")]
3693pub struct PayloadSelectorInclude {
3694    /// Only include this payload keys
3695    pub include: Vec<PayloadKeyType>,
3696}
3697
3698impl PayloadSelectorInclude {
3699    pub fn new(include: Vec<PayloadKeyType>) -> Self {
3700        Self { include }
3701    }
3702}
3703
3704#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3705#[serde(deny_unknown_fields, rename_all = "snake_case")]
3706pub struct PayloadSelectorExclude {
3707    /// Exclude this fields from returning payload
3708    pub exclude: Vec<PayloadKeyType>,
3709}
3710
3711impl PayloadSelectorExclude {
3712    pub fn new(exclude: Vec<PayloadKeyType>) -> Self {
3713        Self { exclude }
3714    }
3715}
3716
3717/// Specifies how to treat payload selector
3718#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq, Hash)]
3719#[serde(untagged, rename_all = "snake_case")]
3720pub enum PayloadSelector {
3721    /// Include only this fields into response payload
3722    Include(PayloadSelectorInclude),
3723    /// Exclude this fields from result payload. Keep all other fields.
3724    Exclude(PayloadSelectorExclude),
3725}
3726
3727impl From<PayloadSelectorExclude> for WithPayloadInterface {
3728    fn from(selector: PayloadSelectorExclude) -> Self {
3729        WithPayloadInterface::Selector(PayloadSelector::Exclude(selector))
3730    }
3731}
3732
3733impl From<PayloadSelectorInclude> for WithPayloadInterface {
3734    fn from(selector: PayloadSelectorInclude) -> Self {
3735        WithPayloadInterface::Selector(PayloadSelector::Include(selector))
3736    }
3737}
3738
3739impl PayloadSelector {
3740    pub fn new_include(vecs_payload_key_type: Vec<PayloadKeyType>) -> Self {
3741        PayloadSelector::Include(PayloadSelectorInclude {
3742            include: vecs_payload_key_type,
3743        })
3744    }
3745
3746    pub fn new_exclude(vecs_payload_key_type: Vec<PayloadKeyType>) -> Self {
3747        PayloadSelector::Exclude(PayloadSelectorExclude {
3748            exclude: vecs_payload_key_type,
3749        })
3750    }
3751
3752    /// Process payload selector
3753    pub fn process(&self, x: Payload) -> Payload {
3754        match self {
3755            PayloadSelector::Include(selector) => JsonPath::value_filter(&x.0, |key, _| {
3756                selector
3757                    .include
3758                    .iter()
3759                    .any(|pattern| pattern.check_include_pattern(key))
3760            })
3761            .into(),
3762            PayloadSelector::Exclude(selector) => JsonPath::value_filter(&x.0, |key, _| {
3763                selector
3764                    .exclude
3765                    .iter()
3766                    .all(|pattern| !pattern.check_exclude_pattern(key))
3767            })
3768            .into(),
3769        }
3770    }
3771}
3772
3773#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, Default, PartialEq, Eq)]
3774#[serde(deny_unknown_fields, rename_all = "snake_case")]
3775pub struct WithPayload {
3776    /// Enable return payloads or not
3777    pub enable: bool,
3778    /// Filter include and exclude payloads
3779    pub payload_selector: Option<PayloadSelector>,
3780}
3781
3782#[derive(
3783    Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Default, Hash,
3784)]
3785#[serde(rename_all = "snake_case")]
3786pub struct MinShould {
3787    #[validate(nested)]
3788    pub conditions: Vec<Condition>,
3789    pub min_count: usize,
3790}
3791
3792#[derive(
3793    Debug, Deserialize, Serialize, JsonSchema, Validate, Clone, PartialEq, Eq, Default, Hash,
3794)]
3795#[serde(deny_unknown_fields, rename_all = "snake_case")]
3796pub struct Filter {
3797    /// At least one of those conditions should match
3798    #[validate(nested)]
3799    #[serde(
3800        default,
3801        with = "MaybeOneOrMany",
3802        skip_serializing_if = "Option::is_none"
3803    )]
3804    #[schemars(with = "MaybeOneOrMany<Condition>")]
3805    pub should: Option<Vec<Condition>>,
3806    /// At least minimum amount of given conditions should match
3807    #[validate(nested)]
3808    #[serde(skip_serializing_if = "Option::is_none")]
3809    pub min_should: Option<MinShould>,
3810    /// All conditions must match
3811    #[validate(nested)]
3812    #[serde(
3813        default,
3814        with = "MaybeOneOrMany",
3815        skip_serializing_if = "Option::is_none"
3816    )]
3817    #[schemars(with = "MaybeOneOrMany<Condition>")]
3818    pub must: Option<Vec<Condition>>,
3819    /// All conditions must NOT match
3820    #[validate(nested)]
3821    #[serde(
3822        default,
3823        with = "MaybeOneOrMany",
3824        skip_serializing_if = "Option::is_none"
3825    )]
3826    #[schemars(with = "MaybeOneOrMany<Condition>")]
3827    pub must_not: Option<Vec<Condition>>,
3828}
3829
3830impl Filter {
3831    pub fn new() -> Self {
3832        Filter {
3833            should: None,
3834            min_should: None,
3835            must: None,
3836            must_not: None,
3837        }
3838    }
3839
3840    pub fn new_should(condition: Condition) -> Self {
3841        Filter {
3842            should: Some(vec![condition]),
3843            min_should: None,
3844            must: None,
3845            must_not: None,
3846        }
3847    }
3848
3849    pub fn new_min_should(min_should: MinShould) -> Self {
3850        Filter {
3851            should: None,
3852            min_should: Some(min_should),
3853            must: None,
3854            must_not: None,
3855        }
3856    }
3857
3858    pub fn new_must(condition: Condition) -> Self {
3859        Filter {
3860            should: None,
3861            min_should: None,
3862            must: Some(vec![condition]),
3863            must_not: None,
3864        }
3865    }
3866
3867    pub fn new_must_not(condition: Condition) -> Self {
3868        Filter {
3869            should: None,
3870            min_should: None,
3871            must: None,
3872            must_not: Some(vec![condition]),
3873        }
3874    }
3875
3876    /// Create an extended filtering condition, which would also include filter by given list of IDs.
3877    pub fn with_point_ids(self, ids: impl IntoIterator<Item = PointIdType>) -> Filter {
3878        let has_id_condition: HasIdCondition = ids.into_iter().collect();
3879
3880        let Filter {
3881            should,
3882            min_should,
3883            must,
3884            must_not,
3885        } = self;
3886
3887        let new_must = match must {
3888            Some(mut must) => {
3889                must.push(Condition::HasId(has_id_condition));
3890                Some(must)
3891            }
3892            None => Some(vec![Condition::HasId(has_id_condition)]),
3893        };
3894
3895        Filter {
3896            should,
3897            min_should,
3898            must: new_must,
3899            must_not,
3900        }
3901    }
3902
3903    pub fn merge(&self, other: &Filter) -> Filter {
3904        self.clone().merge_owned(other.clone())
3905    }
3906
3907    pub fn merge_owned(self, other: Filter) -> Filter {
3908        let merge_component = |this, other| -> Option<Vec<Condition>> {
3909            match (this, other) {
3910                (None, None) => None,
3911                (Some(this), None) => Some(this),
3912                (None, Some(other)) => Some(other),
3913                (Some(mut this), Some(mut other)) => {
3914                    this.append(&mut other);
3915                    Some(this)
3916                }
3917            }
3918        };
3919        Filter {
3920            should: merge_component(self.should, other.should),
3921            min_should: {
3922                match (self.min_should, other.min_should) {
3923                    (None, None) => None,
3924                    (Some(this), None) => Some(this),
3925                    (None, Some(other)) => Some(other),
3926                    (Some(mut this), Some(mut other)) => {
3927                        this.conditions.append(&mut other.conditions);
3928
3929                        // The union of conditions should be able to have at least the bigger of the two min_counts
3930                        this.min_count = this.min_count.max(other.min_count);
3931
3932                        Some(this)
3933                    }
3934                }
3935            },
3936            must: merge_component(self.must, other.must),
3937            must_not: merge_component(self.must_not, other.must_not),
3938        }
3939    }
3940
3941    pub fn merge_opts(this: Option<Self>, other: Option<Self>) -> Option<Self> {
3942        match (this, other) {
3943            (None, None) => None,
3944            (Some(this), None) => Some(this),
3945            (None, Some(other)) => Some(other),
3946            (Some(this), Some(other)) => Some(this.merge_owned(other)),
3947        }
3948    }
3949
3950    pub fn iter_conditions(&self) -> impl Iterator<Item = &Condition> {
3951        self.must
3952            .iter()
3953            .flatten()
3954            .chain(self.must_not.iter().flatten())
3955            .chain(self.should.iter().flatten())
3956            .chain(self.min_should.iter().flat_map(|i| &i.conditions))
3957    }
3958
3959    /// Returns the total amount of conditions of the filter, including all nested filter.
3960    pub fn total_conditions_count(&self) -> usize {
3961        fn count_all_conditions(field: Option<&Vec<Condition>>) -> usize {
3962            field
3963                .map(|i| i.iter().map(|j| j.sub_conditions_count()).sum::<usize>())
3964                .unwrap_or(0)
3965        }
3966
3967        count_all_conditions(self.should.as_ref())
3968            + count_all_conditions(self.min_should.as_ref().map(|i| &i.conditions))
3969            + count_all_conditions(self.must.as_ref())
3970            + count_all_conditions(self.must_not.as_ref())
3971    }
3972
3973    /// Returns the size of the largest condition.
3974    pub fn max_condition_input_size(&self) -> usize {
3975        self.iter_conditions()
3976            .map(|i| i.size_estimation())
3977            .max()
3978            .unwrap_or(0)
3979    }
3980}
3981
3982#[derive(Debug, Clone, Copy, Eq, PartialEq)]
3983pub enum SnapshotFormat {
3984    /// Created by Qdrant `<0.11.0`.
3985    ///
3986    /// The collection snapshot contains nested tar archives for segments.
3987    /// Segment tar archives contain a plain copy of the segment directory.
3988    ///
3989    /// ```plaintext
3990    /// ./0/segments/
3991    /// ├── 0b31e274-dc65-40e4-8493-67ebed4bcf10.tar
3992    /// │   ├── segment.json
3993    /// │   ├── CURRENT
3994    /// │   ├── 000009.sst
3995    /// │   ├── 000010.sst
3996    /// │   └── …
3997    /// ├── 1d6c96ec-7965-491a-9c45-362d55361e9b.tar
3998    /// └── …
3999    /// ```
4000    Ancient,
4001    /// Qdrant `>=0.11.0` `<=1.13` (and maybe even later).
4002    ///
4003    /// The collection snapshot contains nested tar archives for segments.
4004    /// Distinguished by a single top-level directory `snapshot` in each segment
4005    /// tar archive. RocksDB data stored as backups and requires unpacking
4006    /// procedure.
4007    ///
4008    /// ```plaintext
4009    /// ./0/segments/
4010    /// ├── 0b31e274-dc65-40e4-8493-67ebed4bcf10.tar
4011    /// │   └── snapshot/                               # single top-level dir
4012    /// │       ├── db_backup/                          # rockdb backup
4013    /// │       │   ├── meta/
4014    /// │       │   ├── private/
4015    /// │       │   └── shared_checksum/
4016    /// │       ├── payload_index_db_backup             # rocksdb backup
4017    /// │       │   ├── meta/
4018    /// │       │   ├── private/
4019    /// │       │   └── shared_checksum/
4020    /// │       └── files/                              # regular files
4021    /// │           ├── segment.json
4022    /// │           └── …
4023    /// ├── 1d6c96ec-7965-491a-9c45-362d55361e9b.tar
4024    /// └── …
4025    /// ```
4026    Regular,
4027    /// New experimental format.
4028    ///
4029    /// ```plaintext
4030    /// ./0/segments/
4031    /// ├── 0b31e274-dc65-40e4-8493-67ebed4bcf10/
4032    /// │   ├── db_backup/                              # rockdb backup
4033    /// │   │   ├── meta/
4034    /// │   │   ├── private/
4035    /// │   │   └── shared_checksum/
4036    /// │   ├── payload_index_db_backup                 # rocksdb backup
4037    /// │   │   ├── meta/
4038    /// │   │   ├── private/
4039    /// │   │   └── shared_checksum/
4040    /// │   └── files/                                  # regular files
4041    /// │       ├── segment.json
4042    /// │       └── …
4043    /// ├── 1d6c96ec-7965-491a-9c45-362d55361e9b/
4044    /// └── …
4045    /// ```
4046    Streamable,
4047}
4048
4049#[cfg(test)]
4050pub(crate) mod test_utils {
4051    use super::{GeoLineString, GeoPoint, GeoPolygon};
4052
4053    pub fn build_polygon(exterior_points: Vec<(f64, f64)>) -> GeoPolygon {
4054        let exterior_line = GeoLineString {
4055            points: exterior_points
4056                .into_iter()
4057                .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4058                .collect(),
4059        };
4060
4061        GeoPolygon {
4062            exterior: exterior_line,
4063            interiors: None,
4064        }
4065    }
4066
4067    pub fn build_polygon_with_interiors(
4068        exterior_points: Vec<(f64, f64)>,
4069        interiors_points: Vec<Vec<(f64, f64)>>,
4070    ) -> GeoPolygon {
4071        let exterior_line = GeoLineString {
4072            points: exterior_points
4073                .into_iter()
4074                .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4075                .collect(),
4076        };
4077
4078        let interior_lines = Some(
4079            interiors_points
4080                .into_iter()
4081                .map(|points| GeoLineString {
4082                    points: points
4083                        .into_iter()
4084                        .map(|(lon, lat)| GeoPoint::new_unchecked(lon, lat))
4085                        .collect(),
4086                })
4087                .collect(),
4088        );
4089
4090        GeoPolygon {
4091            exterior: exterior_line,
4092            interiors: interior_lines,
4093        }
4094    }
4095}
4096
4097#[cfg(test)]
4098mod tests {
4099    use itertools::Itertools;
4100    use rstest::rstest;
4101    use serde::de::DeserializeOwned;
4102    use serde_json;
4103
4104    use super::test_utils::build_polygon_with_interiors;
4105    use super::*;
4106
4107    #[allow(dead_code)]
4108    fn check_rms_serialization<T: Serialize + DeserializeOwned + PartialEq + std::fmt::Debug>(
4109        record: T,
4110    ) {
4111        let binary_entity = rmp_serde::to_vec(&record).expect("serialization ok");
4112        let de_record: T = rmp_serde::from_slice(&binary_entity).expect("deserialization ok");
4113
4114        assert_eq!(record, de_record);
4115    }
4116
4117    #[test]
4118    #[ignore]
4119    fn test_rmp_vs_cbor_deserialize() {
4120        let payload = payload_json! {"payload_key": "payload_value"};
4121        let raw = rmp_serde::to_vec(&payload).unwrap();
4122        let de_record: Payload = serde_cbor::from_slice(&raw).unwrap();
4123        eprintln!("payload = {payload:#?}");
4124        eprintln!("de_record = {de_record:#?}");
4125    }
4126
4127    #[rstest]
4128    #[case::rfc_3339("2020-03-01T00:00:00Z")]
4129    #[case::rfc_3339_custom_tz("2020-03-01T00:00:00-09:00")]
4130    #[case::rfc_3339_custom_tz_no_colon("2020-03-01 00:00:00-0900")]
4131    #[case::rfc_3339_custom_tz_no_colon_and_t("2020-03-01T00:00:00-0900")]
4132    #[case::rfc_3339_custom_tz_no_minutes("2020-03-01 00:00:00-09")]
4133    #[case::rfc_3339_and_decimals("2020-03-01T00:00:00.123456Z")]
4134    #[case::without_z("2020-03-01T00:00:00")]
4135    #[case::without_z_and_decimals("2020-03-01T00:00:00.12")]
4136    #[case::space_sep_without_z("2020-03-01 00:00:00")]
4137    #[case::space_sep_without_z_and_decimals("2020-03-01 00:00:00.123456")]
4138    fn test_datetime_deserialization(#[case] datetime: &str) {
4139        let datetime = DateTimePayloadType::from_str(datetime).unwrap();
4140        let serialized = serde_json::to_string(&datetime).unwrap();
4141        let deserialized: DateTimePayloadType = serde_json::from_str(&serialized).unwrap();
4142        assert_eq!(datetime, deserialized);
4143    }
4144
4145    #[test]
4146    fn test_datetime_deserialization_equivalency() {
4147        let datetime_str = "2020-03-01T01:02:03.123456Z";
4148        let datetime_str_no_z = "2020-03-01T01:02:03.123456";
4149        let datetime = DateTimePayloadType::from_str(datetime_str).unwrap();
4150        let datetime_no_z = DateTimePayloadType::from_str(datetime_str_no_z).unwrap();
4151
4152        // Having or not the Z at the end of the string both mean UTC time
4153        assert_eq!(datetime.timestamp(), datetime_no_z.timestamp());
4154    }
4155
4156    #[test]
4157    fn test_invalid_datetime_range_returns_clear_rfc3339_error() {
4158        let json = r#"{
4159            "key": "created_at",
4160            "range": {
4161                "gte": "2014-01-01T00:00:00BAD"
4162            }
4163        }"#;
4164
4165        let err = serde_json::from_str::<Condition>(json)
4166            .unwrap_err()
4167            .to_string();
4168
4169        assert!(err.contains("RFC3339"), "err was: {err}");
4170        assert!(err.contains("2014-01-01T00:00:00BAD"), "err was: {err}");
4171        assert!(err.contains("Example"), "err was: {err}");
4172    }
4173
4174    /// Regression test: DateTimePayloadType binary serialization roundtrip.
4175    /// Ensures DateTimePayloadType parses binary-encoded RFC3339 strings.
4176    #[test]
4177    fn test_datetime_payload_type_binary_roundtrip() {
4178        let original = DateTimePayloadType::from_str("2024-06-15T12:30:45Z").unwrap();
4179
4180        // rmp-serde uses non-human-readable format
4181        let binary = rmp_serde::to_vec(&original).expect("serialize");
4182        let restored: DateTimePayloadType = rmp_serde::from_slice(&binary).expect("deserialize");
4183
4184        assert_eq!(original, restored);
4185    }
4186
4187    /// Regression test: RangeInterface with datetime binary roundtrip.
4188    /// Ensures the RangeInterface datetime deserialization works in binary.
4189    #[test]
4190    fn test_range_interface_datetime_binary_roundtrip() {
4191        let dt_gte = DateTimePayloadType::from_str("2024-01-01T00:00:00Z").unwrap();
4192        let dt_lte = DateTimePayloadType::from_str("2024-12-31T23:59:59Z").unwrap();
4193
4194        let range = RangeInterface::DateTime(Range {
4195            lt: None,
4196            gt: None,
4197            gte: Some(dt_gte),
4198            lte: Some(dt_lte),
4199        });
4200
4201        // rmp-serde uses non-human-readable format
4202        let binary = rmp_serde::to_vec(&range).expect("serialize");
4203        let restored: RangeInterface = rmp_serde::from_slice(&binary).expect("deserialize");
4204
4205        assert_eq!(range, restored);
4206    }
4207
4208    /// Regression test: Non-FieldCondition JSON deserialization uses ConditionUntagged fallback.
4209    /// Ensures compiler-safe handling of other Condition variants.
4210    #[test]
4211    fn test_condition_json_fallback_to_untagged() {
4212        // IsEmptyCondition (no "key" field at top level, uses "is_empty" instead)
4213        let is_empty_json = r#"{"is_empty": {"key": "optional_field"}}"#;
4214        let condition: Condition = serde_json::from_str(is_empty_json).unwrap();
4215        assert!(matches!(condition, Condition::IsEmpty(_)));
4216
4217        // HasIdCondition
4218        let has_id_json = r#"{"has_id": [1, 2, 3]}"#;
4219        let condition: Condition = serde_json::from_str(has_id_json).unwrap();
4220        assert!(matches!(condition, Condition::HasId(_)));
4221
4222        // Nested Filter
4223        let nested_json = r#"{"nested": {"key": "items", "filter": {"must": []}}}"#;
4224        let condition: Condition = serde_json::from_str(nested_json).unwrap();
4225        assert!(matches!(condition, Condition::Nested(_)));
4226    }
4227
4228    #[test]
4229    fn test_datetime_wrapper_transcoding() {
4230        let expected = DateTimeWrapper(chrono::Utc::now());
4231        let transcoded = DateTimeWrapper::from_str(&expected.to_string()).unwrap();
4232        assert_eq!(expected, transcoded);
4233    }
4234
4235    #[test]
4236    fn test_timezone_ordering() {
4237        let datetimes = [
4238            "2000-06-08 00:18:53+0900",
4239            "2000-06-07 07:25:34-1100",
4240            "2000-07-10T00:18:53+0100",
4241            "2000-07-11 00:25:34-01:00",
4242            "2000-07-11 00:25:35-01",
4243        ];
4244
4245        let sorted_datetimes: Vec<_> = datetimes
4246            .iter()
4247            .enumerate()
4248            .map(|(i, s)| (i, DateTimePayloadType::from_str(s).unwrap()))
4249            .sorted_by_key(|(_, dt)| dt.timestamp())
4250            .collect();
4251
4252        sorted_datetimes
4253            .array_windows()
4254            .for_each(|[(i1, dt1), (i2, dt2)]| {
4255                assert!(
4256                    i1 < i2,
4257                    "i1: {}, dt1: {}, ts1: {}\ni2: {}, dt2: {}, ts2: {}",
4258                    i1,
4259                    dt1.0,
4260                    dt1.timestamp(),
4261                    i2,
4262                    dt2.0,
4263                    dt2.timestamp()
4264                );
4265            });
4266    }
4267
4268    #[test]
4269    fn test_geo_radius_check_point() {
4270        let radius = GeoRadius {
4271            center: GeoPoint::new_unchecked(0.0, 0.0),
4272            radius: OrderedFloat(80000.0),
4273        };
4274
4275        let inside_result = radius.check_point(&GeoPoint::new_unchecked(0.5, 0.5));
4276        assert!(inside_result);
4277
4278        let outside_result = radius.check_point(&GeoPoint::new_unchecked(1.5, 1.5));
4279        assert!(!outside_result);
4280    }
4281
4282    #[test]
4283    fn test_geo_boundingbox_check_point() {
4284        let bounding_box = GeoBoundingBox {
4285            top_left: GeoPoint::new_unchecked(-1.0, 1.0),
4286            bottom_right: GeoPoint::new_unchecked(1.0, -1.0),
4287        };
4288
4289        // haversine distance between (0, 0) and (0.5, 0.5) is 78626.29627999048
4290        let inside_result = bounding_box.check_point(&GeoPoint::new_unchecked(-0.5, 0.5));
4291        assert!(inside_result);
4292
4293        // haversine distance between (0, 0) and (0.5, 0.5) is 235866.91169814655
4294        let outside_result = bounding_box.check_point(&GeoPoint::new_unchecked(1.5, 1.5));
4295        assert!(!outside_result);
4296    }
4297
4298    #[test]
4299    fn test_geo_boundingbox_antimeridian_check_point() {
4300        // Use the bounding box for USA: (74.071028, 167), (18.7763, -66.885417)
4301        let bounding_box = GeoBoundingBox {
4302            top_left: GeoPoint::new_unchecked(167.0, 74.071028),
4303            bottom_right: GeoPoint::new_unchecked(-66.885417, 18.7763),
4304        };
4305
4306        // Test NYC, which is inside the bounding box
4307        let inside_result =
4308            bounding_box.check_point(&GeoPoint::new_unchecked(-73.991516, 40.75798));
4309        assert!(inside_result);
4310
4311        // Test Berlin, which is outside the bounding box
4312        let outside_result = bounding_box.check_point(&GeoPoint::new_unchecked(13.41053, 52.52437));
4313        assert!(!outside_result);
4314    }
4315
4316    #[test]
4317    fn test_geo_polygon_check_point() {
4318        let test_cases = [
4319            // Create a GeoPolygon with a square shape
4320            (
4321                // Exterior
4322                vec![
4323                    (-1.0, -1.0),
4324                    (1.0, -1.0),
4325                    (1.0, 1.0),
4326                    (-1.0, 1.0),
4327                    (-1.0, -1.0),
4328                ],
4329                // Interiors
4330                vec![vec![]],
4331                // Expected results
4332                vec![((0.5, 0.5), true), ((1.5, 1.5), false), ((1.0, 0.0), false)],
4333            ),
4334            // Create a GeoPolygon as a `twisted square`
4335            (
4336                // Exterior
4337                vec![
4338                    (-1.0, -1.0),
4339                    (1.0, 1.0),
4340                    (1.0, -1.0),
4341                    (-1.0, 1.0),
4342                    (-1.0, -1.0),
4343                ],
4344                // Interiors
4345                vec![vec![]],
4346                // Expected results
4347                vec![((0.5, 0.0), true), ((0.0, 0.5), false), ((0.0, 0.0), false)],
4348            ),
4349            // Create a GeoPolygon with an interior (a 'hole' inside the polygon)
4350            (
4351                // Exterior
4352                vec![
4353                    (-1.0, -1.0),
4354                    (1.5, -1.0),
4355                    (1.5, 1.5),
4356                    (-1.0, 1.5),
4357                    (-1.0, -1.0),
4358                ],
4359                // Interiors
4360                vec![vec![
4361                    (-0.5, -0.5),
4362                    (-0.5, 0.5),
4363                    (0.5, 0.5),
4364                    (0.5, -0.5),
4365                    (-0.5, -0.5),
4366                ]],
4367                // Expected results
4368                vec![((0.6, 0.6), true), ((0.0, 0.0), false), ((0.5, 0.5), false)],
4369            ),
4370        ];
4371
4372        for (exterior, interiors, points) in test_cases {
4373            let polygon = build_polygon_with_interiors(exterior, interiors);
4374
4375            for ((lon, lat), expected_result) in points {
4376                let inside_result = polygon
4377                    .convert()
4378                    .check_point(&GeoPoint::new_unchecked(lon, lat));
4379                assert_eq!(inside_result, expected_result);
4380            }
4381        }
4382    }
4383
4384    #[test]
4385    fn test_serialize_query() {
4386        let filter = Filter {
4387            must: Some(vec![Condition::Field(FieldCondition::new_match(
4388                JsonPath::new("hello"),
4389                "world".to_owned().into(),
4390            ))]),
4391            must_not: None,
4392            should: None,
4393            min_should: None,
4394        };
4395        let json = serde_json::to_string_pretty(&filter).unwrap();
4396        eprintln!("{json}")
4397    }
4398
4399    #[test]
4400    fn test_deny_unknown_fields() {
4401        let query1 = r#"
4402         {
4403            "wrong": "query"
4404         }
4405         "#;
4406        let filter: Result<Filter, _> = serde_json::from_str(query1);
4407
4408        assert!(filter.is_err())
4409    }
4410
4411    #[test]
4412    fn test_parse_match_query() {
4413        let query = r#"
4414        {
4415            "key": "hello",
4416            "match": { "value": 42 }
4417        }
4418        "#;
4419        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4420        assert_eq!(
4421            condition.r#match.unwrap(),
4422            Match::Value(MatchValue {
4423                value: ValueVariants::Integer(42)
4424            })
4425        );
4426
4427        let query = r#"
4428        {
4429            "key": "hello",
4430            "match": { "value": true }
4431        }
4432        "#;
4433        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4434        assert_eq!(
4435            condition.r#match.unwrap(),
4436            Match::Value(MatchValue {
4437                value: ValueVariants::Bool(true)
4438            })
4439        );
4440
4441        let query = r#"
4442        {
4443            "key": "hello",
4444            "match": { "value": "world" }
4445        }
4446        "#;
4447
4448        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4449        assert_eq!(
4450            condition.r#match.unwrap(),
4451            Match::Value(MatchValue {
4452                value: ValueVariants::String("world".to_owned())
4453            })
4454        );
4455    }
4456
4457    #[test]
4458    fn test_parse_match_any() {
4459        let query = r#"
4460        {
4461            "should": [
4462                {
4463                    "key": "Jason",
4464                    "match": {
4465                        "any": [
4466                            "Bourne",
4467                            "Momoa",
4468                            "Statham"
4469                        ]
4470                    }
4471                }
4472            ]
4473        }
4474        "#;
4475
4476        let filter: Filter = serde_json::from_str(query).unwrap();
4477        let should = filter.should.unwrap();
4478
4479        assert_eq!(should.len(), 1);
4480        let Some(Condition::Field(c)) = should.first() else {
4481            panic!("Condition::Field expected")
4482        };
4483
4484        assert_eq!(c.key.to_string(), "Jason");
4485
4486        let Match::Any(m) = c.r#match.as_ref().unwrap() else {
4487            panic!("Match::Any expected")
4488        };
4489        if let AnyVariants::Strings(kws) = &m.any {
4490            assert_eq!(kws.len(), 3);
4491            let expect: IndexSet<_, FnvBuildHasher> = ["Bourne", "Momoa", "Statham"]
4492                .into_iter()
4493                .map(|i| i.to_string())
4494                .collect();
4495            assert_eq!(kws, &expect);
4496        } else {
4497            panic!("AnyVariants::Keywords expected");
4498        }
4499    }
4500
4501    #[test]
4502    fn test_parse_match_any_mixed_types() {
4503        let query = r#"
4504        {
4505            "should": [
4506                {
4507                    "key": "Jason",
4508                    "match": {
4509                        "any": [
4510                            "Bourne",
4511                            42
4512                        ]
4513                    }
4514                }
4515            ]
4516        }
4517        "#;
4518
4519        let result: Result<Filter, _> = serde_json::from_str(query);
4520        assert!(result.is_err());
4521    }
4522
4523    #[test]
4524    fn test_parse_nested_match_query() {
4525        let query = r#"
4526        {
4527            "key": "hello.nested",
4528            "match": { "value": 42 }
4529        }
4530        "#;
4531        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4532        assert_eq!(
4533            condition.r#match.unwrap(),
4534            Match::Value(MatchValue {
4535                value: ValueVariants::Integer(42)
4536            })
4537        );
4538
4539        let query = r#"
4540        {
4541            "key": "hello.nested",
4542            "match": { "value": true }
4543        }
4544        "#;
4545        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4546        assert_eq!(
4547            condition.r#match.unwrap(),
4548            Match::Value(MatchValue {
4549                value: ValueVariants::Bool(true)
4550            })
4551        );
4552
4553        let query = r#"
4554        {
4555            "key": "hello.nested",
4556            "match": { "value": "world" }
4557        }
4558        "#;
4559
4560        let condition: FieldCondition = serde_json::from_str(query).unwrap();
4561        assert_eq!(
4562            condition.r#match.unwrap(),
4563            Match::Value(MatchValue {
4564                value: ValueVariants::String("world".to_owned())
4565            })
4566        );
4567    }
4568
4569    #[test]
4570    fn test_parse_empty_query() {
4571        let query = r#"
4572        {
4573            "should": [
4574                {
4575                    "is_empty" : {
4576                        "key" : "Jason"
4577                    }
4578                }
4579            ]
4580        }
4581        "#;
4582
4583        let filter: Filter = serde_json::from_str(query).unwrap();
4584        let should = filter.should.unwrap();
4585
4586        assert_eq!(should.len(), 1);
4587        let Some(Condition::IsEmpty(c)) = should.first() else {
4588            panic!("Condition::IsEmpty expected")
4589        };
4590
4591        assert_eq!(c.is_empty.key.to_string(), "Jason");
4592    }
4593
4594    #[test]
4595    fn test_parse_null_query() {
4596        let query = r#"
4597        {
4598            "should": [
4599                {
4600                    "is_null" : {
4601                        "key" : "Jason"
4602                    }
4603                }
4604            ]
4605        }
4606        "#;
4607
4608        let filter: Filter = serde_json::from_str(query).unwrap();
4609        let should = filter.should.unwrap();
4610
4611        assert_eq!(should.len(), 1);
4612        let Some(Condition::IsNull(c)) = should.first() else {
4613            panic!("Condition::IsNull expected")
4614        };
4615
4616        assert_eq!(c.is_null.key.to_string(), "Jason");
4617    }
4618
4619    #[test]
4620    fn test_parse_nested_filter_query() {
4621        let query = r#"
4622        {
4623          "must": [
4624            {
4625              "nested": {
4626                "key": "country.cities",
4627                "filter": {
4628                  "must": [
4629                    {
4630                      "key": "population",
4631                      "range": {
4632                        "gte": 8
4633                      }
4634                    },
4635                    {
4636                      "key": "sightseeing",
4637                      "values_count": {
4638                        "lt": 3
4639                      }
4640                    }
4641                  ]
4642                }
4643              }
4644            }
4645          ]
4646        }
4647        "#;
4648        let filter: Filter = serde_json::from_str(query).unwrap();
4649        let musts = filter.must.unwrap();
4650        assert_eq!(musts.len(), 1);
4651        match musts.first() {
4652            Some(Condition::Nested(nested_condition)) => {
4653                assert_eq!(nested_condition.raw_key().to_string(), "country.cities");
4654                assert_eq!(nested_condition.array_key().to_string(), "country.cities[]");
4655                let nested_musts = nested_condition.filter().must.as_ref().unwrap();
4656                assert_eq!(nested_musts.len(), 2);
4657                let first_must = nested_musts.first().unwrap();
4658                match first_must {
4659                    Condition::Field(c) => {
4660                        assert_eq!(c.key.to_string(), "population");
4661                        assert!(c.range.is_some());
4662                    }
4663                    _ => panic!("Condition::Field expected"),
4664                }
4665
4666                let second_must = nested_musts.get(1).unwrap();
4667                match second_must {
4668                    Condition::Field(c) => {
4669                        assert_eq!(c.key.to_string(), "sightseeing");
4670                        assert!(c.values_count.is_some());
4671                    }
4672                    _ => panic!("Condition::Field expected"),
4673                }
4674            }
4675            o => panic!("Condition::Nested expected but got {o:?}"),
4676        };
4677    }
4678
4679    #[test]
4680    fn test_parse_single_nested_filter_query() {
4681        let query = r#"
4682        {
4683          "must": {
4684              "nested": {
4685                "key": "country.cities",
4686                "filter": {
4687                  "must": {
4688                      "key": "population",
4689                      "range": {
4690                        "gte": 8
4691                      }
4692                    }
4693                }
4694              }
4695            }
4696        }
4697        "#;
4698        let filter: Filter = serde_json::from_str(query).unwrap();
4699        let musts = filter.must.unwrap();
4700        assert_eq!(musts.len(), 1);
4701
4702        let first_must = musts.first().unwrap();
4703        let Condition::Nested(nested_condition) = first_must else {
4704            panic!("Condition::Nested expected but got {first_must:?}")
4705        };
4706
4707        assert_eq!(nested_condition.raw_key().to_string(), "country.cities");
4708        assert_eq!(nested_condition.array_key().to_string(), "country.cities[]");
4709
4710        let nested_must = nested_condition.filter().must.as_ref().unwrap();
4711        assert_eq!(nested_must.len(), 1);
4712
4713        let must = nested_must.first().unwrap();
4714        let Condition::Field(c) = must else {
4715            panic!("Condition::Field expected, got {must:?}")
4716        };
4717
4718        assert_eq!(c.key.to_string(), "population");
4719        assert!(c.range.is_some());
4720    }
4721
4722    #[test]
4723    fn test_payload_query_parse() {
4724        let query1 = r#"
4725        {
4726            "must": [
4727                {
4728                    "key": "hello",
4729                    "match": {
4730                        "value": 42
4731                    }
4732                },
4733                {
4734                    "must_not": [
4735                        {
4736                            "has_id": [1, 2, 3, 4]
4737                        },
4738                        {
4739                            "key": "geo_field",
4740                            "geo_bounding_box": {
4741                                "top_left": {
4742                                    "lon": 13.410146,
4743                                    "lat": 52.519289
4744                                },
4745                                "bottom_right": {
4746                                    "lon": 13.432683,
4747                                    "lat": 52.505582
4748                                }
4749                            }
4750                        }
4751                    ]
4752                }
4753            ]
4754        }
4755        "#;
4756
4757        let filter: Filter = serde_json::from_str(query1).unwrap();
4758        eprintln!("{filter:?}");
4759        let must = filter.must.unwrap();
4760        let _must_not = filter.must_not;
4761        assert_eq!(must.len(), 2);
4762        match must.get(1) {
4763            Some(Condition::Filter(f)) => {
4764                let must_not = &f.must_not;
4765                match must_not {
4766                    Some(v) => assert_eq!(v.len(), 2),
4767                    None => panic!("Filter expected"),
4768                }
4769            }
4770            _ => panic!("Condition expected"),
4771        }
4772    }
4773
4774    #[test]
4775    fn test_nested_payload_query_parse() {
4776        let query1 = r#"
4777        {
4778            "must": [
4779                {
4780                    "key": "hello.nested.world",
4781                    "match": {
4782                        "value": 42
4783                    }
4784                },
4785                {
4786                    "key": "foo.nested.bar",
4787                    "match": {
4788                        "value": 1
4789                    }
4790                }
4791            ]
4792        }
4793        "#;
4794
4795        let filter: Filter = serde_json::from_str(query1).unwrap();
4796        let must = filter.must.unwrap();
4797        assert_eq!(must.len(), 2);
4798    }
4799
4800    #[test]
4801    fn test_min_should_query_parse() {
4802        let query1 = r#"
4803        {
4804            "min_should": {
4805                "conditions": [
4806                    {
4807                        "key": "hello.nested.world",
4808                        "match": {
4809                            "value": 42
4810                        }
4811                    },
4812                    {
4813                        "key": "foo.nested.bar",
4814                        "match": {
4815                            "value": 1
4816                        }
4817                    }
4818                ],
4819                "min_count": 2
4820            }
4821        }
4822        "#;
4823
4824        let filter: Filter = serde_json::from_str(query1).unwrap();
4825        let min_should = filter.min_should.unwrap();
4826        assert_eq!(min_should.conditions.len(), 2);
4827    }
4828
4829    #[test]
4830    fn test_min_should_nested_parse() {
4831        let query1 = r#"
4832        {
4833            "must": [
4834                {
4835                    "min_should": {
4836                        "conditions": [
4837                            {
4838                                "key": "hello.nested.world",
4839                                "match": {
4840                                    "value": 42
4841                                }
4842                            },
4843                            {
4844                                "key": "foo.nested.bar",
4845                                "match": {
4846                                    "value": 1
4847                                }
4848                            }
4849                        ],
4850                        "min_count": 2
4851                    }
4852                }
4853            ]
4854        }
4855        "#;
4856
4857        let filter: Filter = serde_json::from_str(query1).unwrap();
4858        let must = filter.must.unwrap();
4859        assert_eq!(must.len(), 1);
4860
4861        match must.first() {
4862            Some(Condition::Filter(f)) => {
4863                let min_should = &f.min_should;
4864                match min_should {
4865                    Some(v) => assert_eq!(v.conditions.len(), 2),
4866                    None => panic!("Filter expected"),
4867                }
4868            }
4869            _ => panic!("Condition expected"),
4870        }
4871    }
4872
4873    #[test]
4874    fn test_geo_validation() {
4875        let query1 = r#"
4876        {
4877            "must": [
4878                {
4879                    "key": "geo_field",
4880                    "geo_bounding_box": {
4881                        "top_left": {
4882                            "lon": 1113.410146,
4883                            "lat": 52.519289
4884                        },
4885                        "bottom_right": {
4886                            "lon": 13.432683,
4887                            "lat": 52.505582
4888                        }
4889                    }
4890                }
4891            ]
4892        }
4893        "#;
4894        let filter: Result<Filter, _> = serde_json::from_str(query1);
4895        assert!(filter.is_err());
4896
4897        let query2 = r#"
4898        {
4899            "must": [
4900                {
4901                    "key": "geo_field",
4902                    "geo_polygon": {
4903                        "exterior": {},
4904                        "interiors": []
4905                    }
4906                }
4907            ]
4908        }
4909        "#;
4910        let filter: Result<Filter, _> = serde_json::from_str(query2);
4911        assert!(filter.is_err());
4912
4913        let query3 = r#"
4914        {
4915            "must": [
4916                {
4917                    "key": "geo_field",
4918                    "geo_polygon": {
4919                        "exterior":{
4920                            "points": [
4921                                {"lon": -12.0, "lat": -34.0},
4922                                {"lon": 11.0, "lat": -22.0},
4923                                {"lon": -32.0, "lat": -14.0}
4924                            ]
4925                        },
4926                        "interiors": []
4927                    }
4928                }
4929            ]
4930        }
4931        "#;
4932        let filter: Result<Filter, _> = serde_json::from_str(query3);
4933        assert!(filter.is_err());
4934
4935        let query4 = r#"
4936        {
4937            "must": [
4938                {
4939                    "key": "geo_field",
4940                    "geo_polygon": {
4941                        "exterior": {
4942                            "points": [
4943                                {"lon": -12.0, "lat": -34.0},
4944                                {"lon": 11.0, "lat": -22.0},
4945                                {"lon": -32.0, "lat": -14.0},
4946                                {"lon": -12.0, "lat": -34.0}
4947                            ]
4948                        },
4949                        "interiors": []
4950                    }
4951                }
4952            ]
4953        }
4954        "#;
4955        let filter: Result<Filter, _> = serde_json::from_str(query4);
4956        assert!(filter.is_ok());
4957
4958        let query5 = r#"
4959            {
4960                "must": [
4961                    {
4962                        "key": "geo_field",
4963                        "geo_polygon": {
4964                            "exterior": {
4965                                    "points": [
4966                                        {"lon": -12.0, "lat": -34.0},
4967                                        {"lon": 11.0, "lat": -22.0},
4968                                        {"lon": -32.0, "lat": -14.0},
4969                                        {"lon": -12.0, "lat": -34.0}
4970                                    ]
4971                                },
4972                            "interiors": [
4973                                {
4974                                    "points": [
4975                                        {"lon": -12.0, "lat": -34.0},
4976                                        {"lon": 11.0, "lat": -22.0},
4977                                        {"lon": -32.0, "lat": -14.0}
4978                                    ]
4979                                }
4980                            ]
4981                        }
4982                    }
4983                ]
4984            }
4985            "#;
4986        let filter: Result<Filter, _> = serde_json::from_str(query5);
4987        assert!(filter.is_err());
4988
4989        let query6 = r#"
4990            {
4991                "must": [
4992                    {
4993                        "key": "geo_field",
4994                        "geo_polygon": {
4995                            "exterior": {
4996                                    "points": [
4997                                        {"lon": -12.0, "lat": -34.0},
4998                                        {"lon": 11.0, "lat": -22.0},
4999                                        {"lon": -32.0, "lat": -14.0},
5000                                        {"lon": -12.0, "lat": -34.0}
5001                                    ]
5002                                },
5003                            "interiors": [
5004                                {
5005                                    "points": [
5006                                        {"lon": -12.0, "lat": -34.0},
5007                                        {"lon": 11.0, "lat": -22.0},
5008                                        {"lon": -32.0, "lat": -14.0},
5009                                        {"lon": -12.0, "lat": -34.0}
5010                                    ]
5011                                }
5012                            ]
5013                        }
5014                    }
5015                ]
5016            }
5017            "#;
5018        let filter: Result<Filter, _> = serde_json::from_str(query6);
5019        assert!(filter.is_ok());
5020    }
5021
5022    #[test]
5023    fn test_payload_parsing() {
5024        let ft = PayloadFieldSchema::FieldType(PayloadSchemaType::Keyword);
5025        let ft_json = serde_json::to_string(&ft).unwrap();
5026        eprintln!("ft_json = {ft_json:?}");
5027
5028        let ft = PayloadFieldSchema::FieldParams(PayloadSchemaParams::Text(Default::default()));
5029        let ft_json = serde_json::to_string(&ft).unwrap();
5030        eprintln!("ft_json = {ft_json:?}");
5031
5032        let query = r#""keyword""#;
5033        let field_type: PayloadSchemaType = serde_json::from_str(query).unwrap();
5034        eprintln!("field_type = {field_type:?}");
5035    }
5036
5037    #[test]
5038    fn merge_filters() {
5039        let condition1 = Condition::Field(FieldCondition::new_match(
5040            JsonPath::new("summary"),
5041            Match::new_text("Berlin"),
5042        ));
5043        let mut this = Filter::new_must(condition1.clone());
5044        this.should = Some(vec![condition1.clone()]);
5045
5046        let condition2 = Condition::Field(FieldCondition::new_match(
5047            JsonPath::new("city"),
5048            Match::new_value(ValueVariants::String("Osaka".into())),
5049        ));
5050        let other = Filter::new_must(condition2.clone());
5051
5052        let merged = this.merge(&other);
5053
5054        assert!(merged.must.is_some());
5055        assert_eq!(merged.must.as_ref().unwrap().len(), 2);
5056        assert!(merged.must_not.is_none());
5057        assert!(merged.should.is_some());
5058        assert_eq!(merged.should.as_ref().unwrap().len(), 1);
5059
5060        assert!(merged.must.as_ref().unwrap().contains(&condition1));
5061        assert!(merged.must.as_ref().unwrap().contains(&condition2));
5062        assert!(merged.should.as_ref().unwrap().contains(&condition1));
5063    }
5064
5065    #[test]
5066    fn test_payload_selector_include() {
5067        let payload = payload_json! {
5068            "a": 1,
5069            "b": {
5070                "c": 123,
5071                "e": {
5072                    "f": [1,2,3],
5073                    "g": 7,
5074                    "h": "text",
5075                    "i": [
5076                        {
5077                            "j": 1,
5078                            "k": 2
5079
5080                        },
5081                        {
5082                            "j": 3,
5083                            "k": 4
5084                        }
5085                    ]
5086                }
5087            }
5088        };
5089
5090        // include root & nested
5091        let selector =
5092            PayloadSelector::new_include(vec![JsonPath::new("a"), JsonPath::new("b.e.f")]);
5093        let payload = selector.process(payload);
5094
5095        let expected = payload_json! {
5096            "a": 1,
5097            "b": {
5098                "e": {
5099                    "f": [1,2,3],
5100                }
5101            }
5102        };
5103        assert_eq!(payload, expected);
5104    }
5105
5106    #[test]
5107    fn test_payload_selector_array_include() {
5108        let payload = payload_json! {
5109            "a": 1,
5110            "b": {
5111                "c": 123,
5112                "f": [1,2,3,4,5],
5113            }
5114        };
5115
5116        // handles duplicates
5117        let selector = PayloadSelector::new_include(vec![JsonPath::new("a"), JsonPath::new("a")]);
5118        let payload = selector.process(payload);
5119
5120        let expected = payload_json! {
5121            "a": 1
5122        };
5123        assert_eq!(payload, expected);
5124
5125        // ignore path that points to array
5126        let selector = PayloadSelector::new_include(vec![JsonPath::new("b.f[0]")]);
5127        let payload = selector.process(payload);
5128
5129        // nothing included
5130        let expected = payload_json! {};
5131        assert_eq!(payload, expected);
5132    }
5133
5134    #[test]
5135    fn test_payload_selector_no_implicit_array_include() {
5136        let payload = payload_json! {
5137            "a": 1,
5138            "b": {
5139                "c": [
5140                    {
5141                        "d": 1,
5142                        "e": 2
5143                    },
5144                    {
5145                        "d": 3,
5146                        "e": 4
5147                    }
5148                ],
5149            }
5150        };
5151
5152        let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c")]);
5153        let selected_payload = selector.process(payload.clone());
5154
5155        let expected = payload_json! {
5156            "b": {
5157                "c": [
5158                    {
5159                        "d": 1,
5160                        "e": 2
5161                    },
5162                    {
5163                        "d": 3,
5164                        "e": 4
5165                    }
5166                ]
5167            }
5168        };
5169        assert_eq!(selected_payload, expected);
5170
5171        // with explicit array traversal ([] notation)
5172        let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c[].d")]);
5173        let selected_payload = selector.process(payload.clone());
5174
5175        let expected = payload_json! {
5176            "b": {
5177                "c": [
5178                    {"d": 1},
5179                    {"d": 3}
5180                ]
5181            }
5182        };
5183        assert_eq!(selected_payload, expected);
5184
5185        // shortcuts implicit array traversal
5186        let selector = PayloadSelector::new_include(vec![JsonPath::new("b.c.d")]);
5187        let selected_payload = selector.process(payload);
5188
5189        let expected = payload_json! {
5190            "b": {
5191                "c": []
5192            }
5193        };
5194        assert_eq!(selected_payload, expected);
5195    }
5196
5197    #[test]
5198    fn test_payload_selector_exclude() {
5199        let payload = payload_json! {
5200            "a": 1,
5201            "b": {
5202                "c": 123,
5203                "e": {
5204                    "f": [1,2,3],
5205                    "g": 7,
5206                    "h": "text",
5207                    "i": [
5208                        {
5209                            "j": 1,
5210                            "k": 2
5211
5212                        },
5213                        {
5214                            "j": 3,
5215                            "k": 4
5216                        }
5217                    ]
5218                }
5219            }
5220        };
5221
5222        // exclude
5223        let selector =
5224            PayloadSelector::new_exclude(vec![JsonPath::new("a"), JsonPath::new("b.e.f")]);
5225        let payload = selector.process(payload);
5226
5227        // root removal & nested removal
5228        let expected = payload_json! {
5229            "b": {
5230                "c": 123,
5231                "e": {
5232                    "g": 7,
5233                    "h": "text",
5234                    "i": [
5235                        {
5236                            "j": 1,
5237                            "k": 2
5238
5239                        },
5240                        {
5241                            "j": 3,
5242                            "k": 4
5243                        }
5244                    ]
5245                }
5246            }
5247        };
5248        assert_eq!(payload, expected);
5249    }
5250
5251    #[test]
5252    fn test_payload_selector_array_exclude() {
5253        let payload = payload_json! {
5254            "a": 1,
5255            "b": {
5256                "c": 123,
5257                "f": [1,2,3,4,5],
5258            }
5259        };
5260
5261        // handles duplicates
5262        let selector = PayloadSelector::new_exclude(vec![JsonPath::new("a"), JsonPath::new("a")]);
5263        let payload = selector.process(payload);
5264
5265        // single removal
5266        let expected = payload_json! {
5267            "b": {
5268                "c": 123,
5269                "f": [1,2,3,4,5],
5270            }
5271        };
5272        assert_eq!(payload, expected);
5273
5274        // ignore path that points to array
5275        let selector = PayloadSelector::new_exclude(vec![JsonPath::new("b.f[0]")]);
5276
5277        let payload = selector.process(payload);
5278
5279        // no removal
5280        let expected = payload_json! {
5281            "b": {
5282                "c": 123,
5283                "f": [1,2,3,4,5],
5284            }
5285        };
5286        assert_eq!(payload, expected);
5287    }
5288
5289    #[test]
5290    fn test_extended_point_id_cbor_roundtrip() {
5291        let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
5292
5293        for point_id in [ExtendedPointId::Uuid(uuid), ExtendedPointId::NumId(42)] {
5294            let cbor_bytes = serde_cbor::to_vec(&point_id).unwrap();
5295            let deserialized: ExtendedPointId = serde_cbor::from_slice(&cbor_bytes).unwrap();
5296            assert_eq!(point_id, deserialized);
5297        }
5298    }
5299
5300    #[test]
5301    fn test_filter_with_match_and_has_id_uuid_cbor_roundtrip() {
5302        let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
5303        let filter = Filter {
5304            should: None,
5305            min_should: None,
5306            must: Some(vec![Condition::Field(FieldCondition::new_match(
5307                crate::segment::json_path::JsonPath::new("org_id"),
5308                Match::new_value(ValueVariants::String("test_org".to_string())),
5309            ))]),
5310            must_not: Some(vec![Condition::HasId(HasIdCondition {
5311                has_id: [ExtendedPointId::Uuid(uuid)].into_iter().collect(),
5312            })]),
5313        };
5314
5315        let cbor_bytes = serde_cbor::to_vec(&filter).unwrap();
5316        let deserialized: Filter = serde_cbor::from_slice(&cbor_bytes).unwrap();
5317        assert_eq!(filter, deserialized);
5318    }
5319}
5320
5321fn shard_key_string_example() -> String {
5322    "region_1".to_string()
5323}
5324
5325fn shard_key_number_example() -> u64 {
5326    12
5327}
5328
5329#[derive(Deserialize, Serialize, JsonSchema,  Debug, Clone, PartialEq, Eq, Hash)]
5330#[serde(untagged)]
5331pub enum ShardKey {
5332    #[schemars(
5333        schema_with = "String::json_schema",
5334        example = "shard_key_string_example"
5335    )]
5336    Keyword(EcoString),
5337    #[schemars(example = "shard_key_number_example")]
5338    
5339    Number(u64),
5340}
5341
5342impl From<String> for ShardKey {
5343    fn from(s: String) -> Self {
5344        ShardKey::Keyword(EcoString::from(s))
5345    }
5346}
5347
5348impl From<EcoString> for ShardKey {
5349    fn from(s: EcoString) -> Self {
5350        ShardKey::Keyword(s)
5351    }
5352}
5353
5354impl From<&str> for ShardKey {
5355    fn from(s: &str) -> Self {
5356        ShardKey::Keyword(EcoString::from(s))
5357    }
5358}
5359
5360impl From<u64> for ShardKey {
5361    fn from(n: u64) -> Self {
5362        ShardKey::Number(n)
5363    }
5364}
5365
5366impl Display for ShardKey {
5367    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
5368        match self {
5369            ShardKey::Keyword(keyword) => write!(f, "\"{keyword}\""),
5370            ShardKey::Number(number) => write!(f, "{number}"),
5371        }
5372    }
5373}
qdrant_edge/segment/types.rs

qdrant_edge/segment/
types.rs