milli_core/
score_details.rs

1use std::cmp::Ordering;
2
3use itertools::Itertools;
4use serde::{Deserialize, Serialize};
5
6use crate::distance_between_two_points;
7
8#[derive(Debug, Clone, PartialEq)]
9pub enum ScoreDetails {
10    Words(Words),
11    Typo(Typo),
12    Proximity(Rank),
13    Fid(Rank),
14    Position(Rank),
15    ExactAttribute(ExactAttribute),
16    ExactWords(ExactWords),
17    Sort(Sort),
18    Vector(Vector),
19    GeoSort(GeoSort),
20
21    /// Returned when we don't have the time to finish applying all the subsequent ranking-rules
22    Skipped,
23}
24
25#[derive(Clone, Copy)]
26pub enum ScoreValue<'a> {
27    Score(f64),
28    Sort(&'a Sort),
29    GeoSort(&'a GeoSort),
30}
31
32enum RankOrValue<'a> {
33    Rank(Rank),
34    Sort(&'a Sort),
35    GeoSort(&'a GeoSort),
36    Score(f64),
37}
38
39#[derive(Clone, Serialize, Deserialize)]
40#[serde(rename_all = "camelCase")]
41pub enum WeightedScoreValue {
42    WeightedScore(f64),
43    Sort { asc: bool, value: serde_json::Value },
44    GeoSort { asc: bool, distance: Option<f64> },
45    VectorSort(f64),
46}
47
48impl ScoreDetails {
49    pub fn local_score(&self) -> Option<f64> {
50        self.rank().map(Rank::local_score)
51    }
52
53    pub fn rank(&self) -> Option<Rank> {
54        match self {
55            ScoreDetails::Words(details) => Some(details.rank()),
56            ScoreDetails::Typo(details) => Some(details.rank()),
57            ScoreDetails::Proximity(details) => Some(*details),
58            ScoreDetails::Fid(details) => Some(*details),
59            ScoreDetails::Position(details) => Some(*details),
60            ScoreDetails::ExactAttribute(details) => Some(details.rank()),
61            ScoreDetails::ExactWords(details) => Some(details.rank()),
62            ScoreDetails::Sort(_) => None,
63            ScoreDetails::GeoSort(_) => None,
64            ScoreDetails::Vector(_) => None,
65            ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
66        }
67    }
68
69    pub fn global_score<'a>(details: impl Iterator<Item = &'a Self> + 'a) -> f64 {
70        Self::score_values(details)
71            .find_map(|x| {
72                let ScoreValue::Score(score) = x else {
73                    return None;
74                };
75                Some(score)
76            })
77            .unwrap_or(1.0f64)
78    }
79
80    pub fn score_values<'a>(
81        details: impl Iterator<Item = &'a Self> + 'a,
82    ) -> impl Iterator<Item = ScoreValue<'a>> + 'a {
83        details
84            .map(ScoreDetails::rank_or_value)
85            .coalesce(|left, right| match (left, right) {
86                (RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
87                    Ok(RankOrValue::Rank(Rank::merge(left, right)))
88                }
89                (left, right) => Err((left, right)),
90            })
91            .map(|rank_or_value| match rank_or_value {
92                RankOrValue::Rank(r) => ScoreValue::Score(r.local_score()),
93                RankOrValue::Sort(s) => ScoreValue::Sort(s),
94                RankOrValue::GeoSort(g) => ScoreValue::GeoSort(g),
95                RankOrValue::Score(s) => ScoreValue::Score(s),
96            })
97    }
98
99    pub fn weighted_score_values<'a>(
100        details: impl Iterator<Item = &'a Self> + 'a,
101        weight: f64,
102    ) -> impl Iterator<Item = WeightedScoreValue> + 'a {
103        details
104            .map(ScoreDetails::rank_or_value)
105            .coalesce(|left, right| match (left, right) {
106                (RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
107                    Ok(RankOrValue::Rank(Rank::merge(left, right)))
108                }
109                (left, right) => Err((left, right)),
110            })
111            .map(move |rank_or_value| match rank_or_value {
112                RankOrValue::Rank(r) => WeightedScoreValue::WeightedScore(r.local_score() * weight),
113                RankOrValue::Sort(s) => {
114                    WeightedScoreValue::Sort { asc: s.ascending, value: s.value.clone() }
115                }
116                RankOrValue::GeoSort(g) => {
117                    WeightedScoreValue::GeoSort { asc: g.ascending, distance: g.distance() }
118                }
119                RankOrValue::Score(s) => WeightedScoreValue::VectorSort(s * weight),
120            })
121    }
122
123    fn rank_or_value(&self) -> RankOrValue<'_> {
124        match self {
125            ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),
126            ScoreDetails::Typo(t) => RankOrValue::Rank(t.rank()),
127            ScoreDetails::Proximity(p) => RankOrValue::Rank(*p),
128            ScoreDetails::Fid(f) => RankOrValue::Rank(*f),
129            ScoreDetails::Position(p) => RankOrValue::Rank(*p),
130            ScoreDetails::ExactAttribute(e) => RankOrValue::Rank(e.rank()),
131            ScoreDetails::ExactWords(e) => RankOrValue::Rank(e.rank()),
132            ScoreDetails::Sort(sort) => RankOrValue::Sort(sort),
133            ScoreDetails::GeoSort(geosort) => RankOrValue::GeoSort(geosort),
134            ScoreDetails::Vector(vector) => {
135                RankOrValue::Score(vector.similarity.as_ref().map(|s| *s as f64).unwrap_or(0.0f64))
136            }
137            ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),
138        }
139    }
140
141    /// Panics
142    ///
143    /// - If Position is not preceded by Fid
144    /// - If Exactness is not preceded by ExactAttribute
145    pub fn to_json_map<'a>(
146        details: impl Iterator<Item = &'a Self>,
147    ) -> serde_json::Map<String, serde_json::Value> {
148        let mut order = 0;
149        let mut fid_details = None;
150        let mut details_map = serde_json::Map::default();
151        for details in details {
152            match details {
153                ScoreDetails::Words(words) => {
154                    let words_details = serde_json::json!({
155                            "order": order,
156                            "matchingWords": words.matching_words,
157                            "maxMatchingWords": words.max_matching_words,
158                            "score": words.rank().local_score(),
159                    });
160                    details_map.insert("words".into(), words_details);
161                    order += 1;
162                }
163                ScoreDetails::Typo(typo) => {
164                    let typo_details = serde_json::json!({
165                        "order": order,
166                        "typoCount": typo.typo_count,
167                        "maxTypoCount": typo.max_typo_count,
168                        "score": typo.rank().local_score(),
169                    });
170                    details_map.insert("typo".into(), typo_details);
171                    order += 1;
172                }
173                ScoreDetails::Proximity(proximity) => {
174                    let proximity_details = serde_json::json!({
175                        "order": order,
176                        "score": proximity.local_score(),
177                    });
178                    details_map.insert("proximity".into(), proximity_details);
179                    order += 1;
180                }
181                ScoreDetails::Fid(fid) => {
182                    // copy the rank for future use in Position.
183                    fid_details = Some(*fid);
184                    // For now, fid is a virtual rule always followed by the "position" rule
185                    let fid_details = serde_json::json!({
186                        "order": order,
187                        "attributeRankingOrderScore": fid.local_score(),
188                    });
189                    details_map.insert("attribute".into(), fid_details);
190                    order += 1;
191                }
192                ScoreDetails::Position(position) => {
193                    // For now, position is a virtual rule always preceded by the "fid" rule
194                    let attribute_details = details_map
195                        .get_mut("attribute")
196                        .expect("position not preceded by attribute");
197                    let attribute_details = attribute_details
198                        .as_object_mut()
199                        .expect("attribute details was not an object");
200                    let Some(fid_details) = fid_details else {
201                        unimplemented!("position not preceded by attribute");
202                    };
203
204                    attribute_details
205                        .insert("queryWordDistanceScore".into(), position.local_score().into());
206                    let score = Rank::global_score([fid_details, *position].iter().copied());
207                    attribute_details.insert("score".into(), score.into());
208
209                    // do not update the order since this was already done by fid
210                }
211                ScoreDetails::ExactAttribute(exact_attribute) => {
212                    let exactness_details = serde_json::json!({
213                        "order": order,
214                        "matchType": exact_attribute,
215                        "score": exact_attribute.rank().local_score(),
216                    });
217                    details_map.insert("exactness".into(), exactness_details);
218                    order += 1;
219                }
220                ScoreDetails::ExactWords(details) => {
221                    // For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
222                    let exactness_details = details_map
223                        .get_mut("exactness")
224                        .expect("Exactness not preceded by exactAttribute");
225                    let exactness_details = exactness_details
226                        .as_object_mut()
227                        .expect("exactness details was not an object");
228                    if exactness_details.get("matchType").expect("missing 'matchType'")
229                        == &serde_json::json!(ExactAttribute::NoExactMatch)
230                    {
231                        let score = Rank::global_score(
232                            [ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),
233                        );
234                        // tiny detail, but we want the score to be the last displayed field,
235                        // so we're removing it here, adding the other fields, then adding the new score
236                        exactness_details.remove("score");
237                        exactness_details
238                            .insert("matchingWords".into(), details.matching_words.into());
239                        exactness_details
240                            .insert("maxMatchingWords".into(), details.max_matching_words.into());
241                        exactness_details.insert("score".into(), score.into());
242                    }
243                    // do not update the order since this was already done by exactAttribute
244                }
245                ScoreDetails::Sort(details) => {
246                    let sort = if details.redacted {
247                        format!("<hidden-rule-{order}>")
248                    } else {
249                        format!(
250                            "{}:{}",
251                            details.field_name,
252                            if details.ascending { "asc" } else { "desc" }
253                        )
254                    };
255                    let value =
256                        if details.redacted { "<hidden>".into() } else { details.value.clone() };
257                    let sort_details = serde_json::json!({
258                        "order": order,
259                        "value": value,
260                    });
261                    details_map.insert(sort, sort_details);
262                    order += 1;
263                }
264                ScoreDetails::GeoSort(details) => {
265                    let sort = format!(
266                        "_geoPoint({}, {}):{}",
267                        details.target_point[0],
268                        details.target_point[1],
269                        if details.ascending { "asc" } else { "desc" }
270                    );
271                    let point = if let Some(value) = details.value {
272                        serde_json::json!({ "lat": value[0], "lng": value[1]})
273                    } else {
274                        serde_json::Value::Null
275                    };
276                    let sort_details = serde_json::json!({
277                        "order": order,
278                        "value": point,
279                        "distance": details.distance(),
280                    });
281                    details_map.insert(sort, sort_details);
282                    order += 1;
283                }
284                ScoreDetails::Vector(s) => {
285                    let similarity = s.similarity.as_ref();
286
287                    let details = serde_json::json!({
288                        "order": order,
289                        "similarity": similarity,
290                    });
291                    details_map.insert("vectorSort".into(), details);
292                    order += 1;
293                }
294                ScoreDetails::Skipped => {
295                    details_map
296                        .insert("skipped".to_string(), serde_json::json!({ "order": order }));
297                    order += 1;
298                }
299            }
300        }
301        details_map
302    }
303}
304
305/// The strategy to compute scores.
306///
307/// It makes sense to pass down this strategy to the internals of the search, because
308/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
309/// are not correct to do when computing the scores.
310///
311/// This strategy could feasibly be extended to differentiate between the normalized score and the
312/// detailed scores, but it is not useful today as the normalized score is *derived from* the
313/// detailed scores.
314#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
315pub enum ScoringStrategy {
316    /// Don't compute scores
317    #[default]
318    Skip,
319    /// Compute detailed scores
320    Detailed,
321}
322
323#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
324pub struct Words {
325    pub matching_words: u32,
326    pub max_matching_words: u32,
327}
328
329impl Words {
330    pub fn rank(&self) -> Rank {
331        Rank { rank: self.matching_words, max_rank: self.max_matching_words }
332    }
333
334    pub(crate) fn from_rank(rank: Rank) -> Self {
335        Self { matching_words: rank.rank, max_matching_words: rank.max_rank }
336    }
337}
338
339/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
340///
341/// In exactness, the number of matching words can actually be 0 with a non-zero score,
342/// if no words from the query appear exactly in the document.
343#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
344pub struct ExactWords {
345    pub matching_words: u32,
346    pub max_matching_words: u32,
347}
348
349impl ExactWords {
350    pub fn rank(&self) -> Rank {
351        // 0 matching words means last rank (1)
352        Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }
353    }
354
355    pub(crate) fn from_rank(rank: Rank) -> Self {
356        // last rank (1) means that 0 words from the query appear exactly in the document.
357        // first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.
358        Self {
359            matching_words: rank.rank.saturating_sub(1),
360            max_matching_words: rank.max_rank.saturating_sub(1),
361        }
362    }
363}
364
365#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
366pub struct Typo {
367    pub typo_count: u32,
368    pub max_typo_count: u32,
369}
370
371impl Typo {
372    pub fn rank(&self) -> Rank {
373        Rank {
374            rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),
375            max_rank: (self.max_typo_count + 1),
376        }
377    }
378
379    // max_rank = max_typo + 1
380    // max_typo = max_rank - 1
381    //
382    // rank = max_typo - typo + 1
383    // rank = max_rank - 1 - typo + 1
384    // rank + typo = max_rank
385    // typo = max_rank - rank
386    pub fn from_rank(rank: Rank) -> Typo {
387        Typo {
388            typo_count: rank.max_rank.saturating_sub(rank.rank),
389            max_typo_count: rank.max_rank.saturating_sub(1),
390        }
391    }
392}
393
394#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
395pub struct Rank {
396    /// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
397    ///
398    /// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
399    /// (they don't match the query).
400    pub rank: u32,
401    /// The maximum possible rank. Documents with this rank have a score of 1.
402    ///
403    /// The max rank should not be 0.
404    pub max_rank: u32,
405}
406
407impl Rank {
408    pub fn local_score(self) -> f64 {
409        self.rank as f64 / self.max_rank as f64
410    }
411
412    pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
413        let mut rank = Rank { rank: 1, max_rank: 1 };
414        for inner_rank in details {
415            rank = Rank::merge(rank, inner_rank);
416        }
417        rank.local_score()
418    }
419
420    pub fn merge(mut outer: Rank, inner: Rank) -> Rank {
421        outer.rank = outer.rank.saturating_sub(1);
422
423        outer.rank *= inner.max_rank;
424        outer.max_rank *= inner.max_rank;
425
426        outer.rank += inner.rank;
427
428        outer
429    }
430}
431
432#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
433#[serde(rename_all = "camelCase")]
434pub enum ExactAttribute {
435    ExactMatch,
436    MatchesStart,
437    NoExactMatch,
438}
439
440impl ExactAttribute {
441    pub fn rank(&self) -> Rank {
442        let rank = match self {
443            ExactAttribute::ExactMatch => 3,
444            ExactAttribute::MatchesStart => 2,
445            ExactAttribute::NoExactMatch => 1,
446        };
447        Rank { rank, max_rank: 3 }
448    }
449}
450
451#[derive(Debug, Clone, PartialEq)]
452pub struct Sort {
453    pub field_name: String,
454    pub ascending: bool,
455    pub redacted: bool,
456    pub value: serde_json::Value,
457}
458
459pub fn compare_sort_values(
460    ascending: bool,
461    left: &serde_json::Value,
462    right: &serde_json::Value,
463) -> Ordering {
464    use serde_json::Value::*;
465    match (left, right) {
466        (Null, Null) => Ordering::Equal,
467        (Null, _) => Ordering::Less,
468        (_, Null) => Ordering::Greater,
469        // numbers are always before strings
470        (Number(_), String(_)) => Ordering::Greater,
471        (String(_), Number(_)) => Ordering::Less,
472        (Number(left), Number(right)) => {
473            // FIXME: unwrap permitted here?
474            let order = left
475                .as_f64()
476                .unwrap()
477                .partial_cmp(&right.as_f64().unwrap())
478                .unwrap_or(Ordering::Equal);
479            // 12 < 42, and when ascending, we want to see 12 first, so the smallest.
480            // Hence, when ascending, smaller is better
481            if ascending {
482                order.reverse()
483            } else {
484                order
485            }
486        }
487        (String(left), String(right)) => {
488            let order = left.cmp(right);
489            // Taking e.g. "a" and "z"
490            // "a" < "z", and when ascending, we want to see "a" first, so the smallest.
491            // Hence, when ascending, smaller is better
492            if ascending {
493                order.reverse()
494            } else {
495                order
496            }
497        }
498        (left, right) => {
499            tracing::warn!(%left, %right, "sort values that are neither numbers, strings or null, handling as equal");
500            Ordering::Equal
501        }
502    }
503}
504
505impl PartialOrd for Sort {
506    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
507        if self.ascending != other.ascending {
508            return None;
509        }
510        Some(compare_sort_values(self.ascending, &self.value, &other.value))
511    }
512}
513
514#[derive(Debug, Clone, Copy, PartialEq)]
515pub struct GeoSort {
516    pub target_point: [f64; 2],
517    pub ascending: bool,
518    pub value: Option<[f64; 2]>,
519}
520
521impl PartialOrd for GeoSort {
522    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
523        if self.ascending != other.ascending {
524            return None;
525        }
526        Some(match (self.distance(), other.distance()) {
527            (None, None) => Ordering::Equal,
528            (None, Some(_)) => Ordering::Less,
529            (Some(_), None) => Ordering::Greater,
530            (Some(left), Some(right)) => {
531                let order = left.partial_cmp(&right)?;
532                if self.ascending {
533                    // when ascending, the one with the smallest distance has the best score
534                    order.reverse()
535                } else {
536                    order
537                }
538            }
539        })
540    }
541}
542
543#[derive(Debug, Clone, PartialEq, PartialOrd)]
544pub struct Vector {
545    pub similarity: Option<f32>,
546}
547
548impl GeoSort {
549    pub fn distance(&self) -> Option<f64> {
550        self.value.map(|value| distance_between_two_points(&self.target_point, &value))
551    }
552}