Skip to main content

qdrant_edge/segment/vector_storage/query/
reco_query.rs

1use std::hash::Hash;
2
3use crate::common::math::scaled_fast_sigmoid;
4use crate::common::types::ScoreType;
5use itertools::Itertools;
6use serde::Serialize;
7
8use super::{Query, TransformInto};
9use crate::segment::common::operation_error::OperationResult;
10use crate::segment::data_types::vectors::{QueryVector, VectorInternal};
11
12#[derive(Debug, Clone, PartialEq, Serialize, Hash)]
13pub struct RecoQuery<T> {
14    pub positives: Vec<T>,
15    pub negatives: Vec<T>,
16}
17
18impl<T> RecoQuery<T> {
19    pub fn new(positives: Vec<T>, negatives: Vec<T>) -> Self {
20        Self {
21            positives,
22            negatives,
23        }
24    }
25
26    pub fn flat_iter(&self) -> impl Iterator<Item = &T> {
27        self.positives.iter().chain(self.negatives.iter())
28    }
29}
30
31impl<T, U> TransformInto<RecoQuery<U>, T, U> for RecoQuery<T> {
32    fn transform<F>(self, mut f: F) -> OperationResult<RecoQuery<U>>
33    where
34        F: FnMut(T) -> OperationResult<U>,
35    {
36        Ok(RecoQuery::new(
37            self.positives.into_iter().map(&mut f).try_collect()?,
38            self.negatives.into_iter().map(&mut f).try_collect()?,
39        ))
40    }
41}
42
43#[derive(Debug, Clone, PartialEq)]
44pub struct RecoBestScoreQuery<T>(RecoQuery<T>);
45
46impl<T> From<RecoQuery<T>> for RecoBestScoreQuery<T> {
47    fn from(query: RecoQuery<T>) -> Self {
48        Self(query)
49    }
50}
51
52impl<T, U> TransformInto<RecoBestScoreQuery<U>, T, U> for RecoBestScoreQuery<T> {
53    fn transform<F>(self, f: F) -> OperationResult<RecoBestScoreQuery<U>>
54    where
55        F: FnMut(T) -> OperationResult<U>,
56    {
57        Ok(RecoBestScoreQuery(self.0.transform(f)?))
58    }
59}
60
61impl From<RecoBestScoreQuery<VectorInternal>> for QueryVector {
62    fn from(query: RecoBestScoreQuery<VectorInternal>) -> Self {
63        QueryVector::RecommendBestScore(query.0)
64    }
65}
66
67impl<T> Query<T> for RecoBestScoreQuery<T> {
68    fn score_by(&self, similarity: impl Fn(&T) -> ScoreType) -> ScoreType {
69        // get similarities to all positives
70        let positive_similarities = self.0.positives.iter().map(&similarity);
71
72        // and all negatives
73        let negative_similarities = self.0.negatives.iter().map(&similarity);
74
75        // get max similarity to positives and max to negatives
76        let max_positive = positive_similarities
77            .max_by(|a, b| a.total_cmp(b))
78            .unwrap_or(ScoreType::NEG_INFINITY);
79
80        let max_negative = negative_similarities
81            .max_by(|a, b| a.total_cmp(b))
82            .unwrap_or(ScoreType::NEG_INFINITY);
83
84        if max_positive > max_negative {
85            scaled_fast_sigmoid(max_positive)
86        } else {
87            -scaled_fast_sigmoid(max_negative)
88        }
89    }
90}
91
92#[derive(Debug, Clone, PartialEq)]
93pub struct RecoSumScoresQuery<T>(RecoQuery<T>);
94
95impl<T> From<RecoQuery<T>> for RecoSumScoresQuery<T> {
96    fn from(query: RecoQuery<T>) -> Self {
97        Self(query)
98    }
99}
100
101impl<T, U> TransformInto<RecoSumScoresQuery<U>, T, U> for RecoSumScoresQuery<T> {
102    fn transform<F>(self, f: F) -> OperationResult<RecoSumScoresQuery<U>>
103    where
104        F: FnMut(T) -> OperationResult<U>,
105    {
106        Ok(RecoSumScoresQuery(self.0.transform(f)?))
107    }
108}
109
110impl From<RecoSumScoresQuery<VectorInternal>> for QueryVector {
111    fn from(query: RecoSumScoresQuery<VectorInternal>) -> Self {
112        QueryVector::RecommendSumScores(query.0)
113    }
114}
115
116impl<T> Query<T> for RecoSumScoresQuery<T> {
117    fn score_by(&self, similarity: impl Fn(&T) -> ScoreType) -> ScoreType {
118        // Sum all positive vectors scores
119        let positive_score: ScoreType = self.0.positives.iter().map(&similarity).sum();
120
121        // Sum all negative vectors scores
122        let negative_score: ScoreType = self.0.negatives.iter().map(&similarity).sum();
123
124        // Subtract
125        positive_score - negative_score
126    }
127}
128
129#[cfg(test)]
130mod test {
131    use std::cmp::Ordering;
132
133    use crate::common::math::scaled_fast_sigmoid;
134    use crate::common::types::ScoreType;
135    use proptest::prelude::*;
136    use rstest::rstest;
137
138    use crate::segment::vector_storage::query::{Query, RecoBestScoreQuery, RecoQuery};
139
140    enum Chosen {
141        Positive,
142        Negative,
143    }
144
145    #[rstest]
146    #[case::higher_positive(vec![42], vec![4], Chosen::Positive, 42.0)]
147    #[case::higher_negative(vec![4], vec![42], Chosen::Negative, 42.0)]
148    #[case::negative_zero(vec![-1], vec![0], Chosen::Negative, 0.0)]
149    #[case::positive_zero(vec![0], vec![-1], Chosen::Positive, 0.0)]
150    #[case::both_under_zero(vec![-42], vec![-84], Chosen::Positive, -42.0)]
151    #[case::both_under_zero_but_negative_is_higher(vec![-84], vec![-42], Chosen::Negative, -42.0)]
152    #[case::multiple_with_negative_best(vec![1, 2, 3], vec![4, 5, 6], Chosen::Negative, 6.0)]
153    #[case::multiple_with_positive_best(vec![10, 2, 3], vec![4, 5, 6], Chosen::Positive, 10.0)]
154    fn score_query(
155        #[case] positives: Vec<isize>,
156        #[case] negatives: Vec<isize>,
157        #[case] chosen: Chosen,
158        #[case] expected: ScoreType,
159    ) {
160        use super::{RecoBestScoreQuery, RecoQuery};
161
162        let query = RecoBestScoreQuery::from(RecoQuery::new(positives, negatives));
163
164        let dummy_similarity = |x: &isize| *x as ScoreType;
165
166        let positive_transformation = scaled_fast_sigmoid;
167        let negative_transformation = |x| -scaled_fast_sigmoid(x);
168
169        let score = query.score_by(dummy_similarity);
170
171        match chosen {
172            Chosen::Positive => {
173                assert_eq!(score, positive_transformation(expected));
174            }
175            Chosen::Negative => {
176                assert_eq!(score, negative_transformation(expected));
177            }
178        }
179    }
180
181    fn ulps_eq(a: f32, b: f32, ulps: u32) -> bool {
182        if a.signum() != b.signum() {
183            return false;
184        }
185
186        let a = a.to_bits();
187        let b = b.to_bits();
188
189        a.abs_diff(b) <= ulps
190    }
191
192    /// Relaxes the comparison of floats to allow for a some difference in units of least precision
193    fn float_cmp(a: f32, b: f32) -> Ordering {
194        if ulps_eq(a, b, 80) {
195            Ordering::Equal
196        } else {
197            a.total_cmp(&b)
198        }
199    }
200
201    proptest! {
202        /// Checks that the negative-chosen scores invert the order of the candidates
203        #[test]
204        fn correct_negative_order(a in -100f32..=100f32, b in -100f32..=100f32) {
205            let dummy_similarity = |x: &f32| *x as ScoreType;
206
207            let ordering_before = float_cmp(dummy_similarity(&a), dummy_similarity(&b));
208
209            let query_a = RecoBestScoreQuery::from(RecoQuery::new(vec![], vec![a]));
210            let query_b = RecoBestScoreQuery::from(RecoQuery::new(vec![], vec![b]));
211
212            let score_a = query_a.score_by(dummy_similarity);
213            let score_b = query_b.score_by(dummy_similarity);
214
215            let ordering_after = float_cmp(score_a, score_b);
216
217            if ordering_before == std::cmp::Ordering::Equal {
218                assert_eq!(ordering_before, ordering_after);
219            } else {
220                assert_ne!(ordering_before, ordering_after)
221            }
222        }
223
224        /// Checks that the positive-chosen scores preserve the order of the candidates
225        #[test]
226        fn correct_positive_order(a in -100f32..=100f32, b in -100f32..=100f32) {
227            let dummy_similarity = |x: &f32| *x as ScoreType;
228
229            let ordering_before = float_cmp(dummy_similarity(&a), dummy_similarity(&b));
230
231            // Too similar scores can get compressed to the same value by the sigmoid function.
232            // This would make the test useless, so we skip those cases.
233            prop_assume!(ordering_before != Ordering::Equal);
234
235            let query_a = RecoBestScoreQuery::from(RecoQuery::new(vec![a], vec![]));
236            let query_b = RecoBestScoreQuery::from(RecoQuery::new(vec![b], vec![]));
237
238            let score_a = query_a.score_by(dummy_similarity);
239            let score_b = query_b.score_by(dummy_similarity);
240
241            let ordering_after = score_a.total_cmp(&score_b);
242
243            assert_eq!(ordering_before, ordering_after);
244        }
245
246        /// Guarantees that the point that was chosen from positive is always preferred on
247        /// the candidate list over a point that was chosen from negatives
248        #[test]
249        fn correct_positive_and_negative_order(p in -100f32..=100f32, n in -100f32..=100f32) {
250            let dummy_similarity = |x: &f32| *x as ScoreType;
251
252            let query_p = RecoBestScoreQuery::from(RecoQuery::new(vec![p], vec![]));
253            let query_n = RecoBestScoreQuery::from(RecoQuery::new(vec![], vec![n]));
254
255            let ordering = query_p.score_by(dummy_similarity).total_cmp(&query_n.score_by(dummy_similarity));
256
257            assert_ne!(ordering, std::cmp::Ordering::Less);
258        }
259    }
260}