imdb_index/
scored.rs

1use std::cmp;
2use std::collections::BinaryHeap;
3use std::num::FpCategory;
4use std::vec;
5
6/// A collection of scored values, sorted in descending order by score.
7#[derive(Clone, Debug, Default)]
8pub struct SearchResults<T>(Vec<Scored<T>>);
9
10impl<T> SearchResults<T> {
11    /// Create an empty collection of scored values.
12    pub fn new() -> SearchResults<T> {
13        SearchResults(vec![])
14    }
15
16    /// Create a collection of search results from a min-heap of scored values.
17    pub fn from_min_heap(
18        queue: &mut BinaryHeap<cmp::Reverse<Scored<T>>>,
19    ) -> SearchResults<T> {
20        let mut results = vec![];
21        while let Some(x) = queue.pop() {
22            results.push(x.0);
23        }
24        results.reverse();
25        SearchResults(results)
26    }
27
28    /// Add a new scored value to this collection.
29    ///
30    /// The score provided must be less than or equal to every other score in
31    /// this collection, otherwise this method will panic.
32    pub fn push(&mut self, scored: Scored<T>) {
33        assert!(self.0.last().map_or(true, |smallest| &scored <= smallest));
34        self.0.push(scored);
35    }
36
37    /// Normalizes the scores in this collection such that all scores are in
38    /// the range `[0, 1]` where the top result always has score `1.0`.
39    ///
40    /// This operation is idempotent and does not change the ordering of
41    /// results.
42    pub fn normalize(&mut self) {
43        if let Some(top_score) = self.0.get(0).map(|s| s.score()) {
44            // The minimal score is 0, so if the top score is 0, then all
45            // scores must be 0. No normalization needed. (And we avoid a
46            // divide-by-zero below.)
47            if top_score.classify() == FpCategory::Zero {
48                return;
49            }
50            for result in &mut self.0 {
51                let score = result.score();
52                result.set_score(score / top_score);
53            }
54        }
55    }
56
57    /// Recomputes the scores in this collection using the given function.
58    ///
59    /// The results are then re-sorted according to the new scores.
60    pub fn rescore<F: FnMut(&T) -> f64>(&mut self, mut rescore: F) {
61        for result in &mut self.0 {
62            let score = rescore(result.value());
63            result.set_score(score);
64        }
65        self.0.sort_by(|s1, s2| s1.cmp(&s2).reverse());
66    }
67
68    /// Trim this collection so that it contains at most the first `size`
69    /// results.
70    pub fn trim(&mut self, size: usize) {
71        if self.0.len() > size {
72            self.0.drain(size..);
73        }
74    }
75
76    /// Returns the number of results in this collection.
77    pub fn len(&self) -> usize {
78        self.0.len()
79    }
80
81    /// Returns true if and only if this collection is empty.
82    pub fn is_empty(&self) -> bool {
83        self.0.is_empty()
84    }
85
86    /// Return a slice of search results in order.
87    pub fn as_slice(&self) -> &[Scored<T>] {
88        &self.0
89    }
90
91    /// Consume this collection and return the underlying sorted sequence of
92    /// scored values.
93    pub fn into_vec(self) -> Vec<Scored<T>> {
94        self.0
95    }
96}
97
98impl<T> IntoIterator for SearchResults<T> {
99    type IntoIter = vec::IntoIter<Scored<T>>;
100    type Item = Scored<T>;
101
102    fn into_iter(self) -> vec::IntoIter<Scored<T>> {
103        self.into_vec().into_iter()
104    }
105}
106
107/// Any value associated with a score.
108///
109/// We define Eq and Ord on this type in a way that ignores `value` and only
110/// uses the `score` to determine ordering. The public API of `Scored`
111/// guarantees that scores are never `NaN`.
112#[derive(Clone, Copy, Debug)]
113pub struct Scored<T> {
114    score: f64,
115    value: T,
116}
117
118impl<T> Scored<T> {
119    /// Create a new value `T` with a score of `1.0`.
120    pub fn new(value: T) -> Scored<T> {
121        Scored { score: 1.0, value }
122    }
123
124    /// Return the score for this item.
125    ///
126    /// In general, no restrictions are placed on the range of scores, however
127    /// most search APIs that use it will return scores in the range `[0, 1]`.
128    ///
129    /// The score returned is guaranteed to never be `NaN`.
130    pub fn score(&self) -> f64 {
131        self.score
132    }
133
134    /// Set the score, replacing the existing value with the given value.
135    ///
136    /// This panics if the given score is `NaN`.
137    pub fn set_score(&mut self, score: f64) {
138        assert!(score.is_finite());
139        self.score = score;
140    }
141
142    /// Consume this scored value and return a new scored value that drops the
143    /// existing score and replaces it with the given score.
144    ///
145    /// This panics if the given score is `NaN`.
146    pub fn with_score(mut self, score: f64) -> Scored<T> {
147        self.set_score(score);
148        self
149    }
150
151    /// Consume this scored value and map its value using the function given,
152    /// returning a new scored value with the result of the map and an
153    /// unchanged score.
154    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Scored<U> {
155        Scored { score: self.score, value: f(self.value) }
156    }
157
158    /// Consume this scored value and map its score using the function given,
159    /// return a new `Scored` with an unchanged value.
160    ///
161    /// This panics if score returned by `f` is `NaN`.
162    pub fn map_score<F: FnOnce(f64) -> f64>(self, f: F) -> Scored<T> {
163        let score = f(self.score);
164        self.with_score(score)
165    }
166
167    /// Return a reference to the underlying value.
168    pub fn value(&self) -> &T {
169        &self.value
170    }
171
172    /// Consume this scored value, drop the score and return the underlying
173    /// `T`.
174    pub fn into_value(self) -> T {
175        self.value
176    }
177
178    /// Consume this scored value and return the underlying pair of score and
179    /// `T`.
180    pub fn into_pair(self) -> (f64, T) {
181        (self.score, self.value)
182    }
183}
184
185impl<T: Default> Default for Scored<T> {
186    fn default() -> Scored<T> {
187        Scored::new(T::default())
188    }
189}
190
191impl<T> Eq for Scored<T> {}
192
193impl<T> PartialEq for Scored<T> {
194    fn eq(&self, other: &Scored<T>) -> bool {
195        let (s1, s2) = (self.score, other.score);
196        s1 == s2
197    }
198}
199
200impl<T> Ord for Scored<T> {
201    fn cmp(&self, other: &Scored<T>) -> cmp::Ordering {
202        self.score.partial_cmp(&other.score).unwrap()
203    }
204}
205
206impl<T> PartialOrd for Scored<T> {
207    fn partial_cmp(&self, other: &Scored<T>) -> Option<cmp::Ordering> {
208        Some(self.cmp(other))
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::Scored;
215    use std::f64::NAN;
216
217    #[test]
218    #[should_panic]
219    fn never_nan_1() {
220        Scored::new(()).set_score(NAN);
221    }
222
223    #[test]
224    #[should_panic]
225    fn never_nan_2() {
226        Scored::new(()).with_score(NAN);
227    }
228
229    #[test]
230    #[should_panic]
231    fn never_nan_3() {
232        Scored::new(()).map_score(|_| NAN);
233    }
234}