rank_biased_centroids/
lib.rs

1#![warn(missing_debug_implementations, rust_2018_idioms, clippy::pedantic)]
2//!
3//! The Rank-Biased Centroids (RBC) rank fusion method to combine multiple-rankings of objects.
4//!
5//! This code implements the RBC rank fusion method, as described in:
6//!
7//!```bibtex
8//! @inproceedings{DBLP:conf/sigir/BaileyMST17,
9//!    author    = {Peter Bailey and
10//!                 Alistair Moffat and
11//!                 Falk Scholer and
12//!                 Paul Thomas},
13//!   title     = {Retrieval Consistency in the Presence of Query Variations},
14//!   booktitle = {Proceedings of the 40th International {ACM} {SIGIR} Conference on
15//!                Research and Development in Information Retrieval, Shinjuku, Tokyo,
16//!                Japan, August 7-11, 2017},
17//!   pages     = {395--404},
18//!   publisher = {{ACM}},
19//!   year      = {2017},
20//!   url       = {https://doi.org/10.1145/3077136.3080839},
21//!   doi       = {10.1145/3077136.3080839},
22//!   timestamp = {Wed, 25 Sep 2019 16:43:14 +0200},
23//!   biburl    = {https://dblp.org/rec/conf/sigir/BaileyMST17.bib},
24//! }
25//!```
26//!
27//! The fundamental step in the working of RBC is the usage a `persistence` parameter (`p` or `phi`) to to fusion multiple ranked lists based only on rank information. Larger values of `p` give higher importance to elements at the top of each ranking. From the paper:
28//!
29//! > As extreme values, consider `p = 0` and `p = 1`. When `p = 0`, the agents only ever examine the first item in each of the input rankings, and the fused output is by decreasing score of firstrst preference; this is somewhat akin to a first-past-the-post election regime. When `p = 1`, each agent examines the whole of every list, and the fused ordering is determined by the number of lists that contain each item – a kind of "popularity count" of each item across the input sets. In between these extremes, the expected depth reached by the agents viewing the rankings is given by `1/(1 − p)`. For example, when `p = 0.9`, on average the first 10 items in each ranking are being used to contribute to the fused ordering; of course, in aggregate, across the whole universe of agents, all of the items in every ranking contribute to the overall outcome.
30//!
31//! More from the paper:
32//!
33//! > Each item at rank 1 <= x <= n when the rankings are over n items, we suggest that a geometrically decaying weight function be employed, with the distribution of d over depths x given by (1 − p) p^{x-1} for some value 0 <= p <= 1 determined by considering the purpose for which the fused ranking is being constructed.
34//!
35//! # Example fusion
36//!
37//! For example (also taken from the paper) for diffent rank orderings (R1-R4) of items A-G:
38//!
39//! <table>
40//!     <thead>
41//!         <tr>
42//!             <th>Rank</th>
43//!             <th>R1</th>
44//!             <th>R2</th>
45//!             <th>R3</th>
46//!             <th>R4</th>
47//!         </tr>
48//!     </thead>
49//!     <tbody>
50//!         <tr>
51//!             <td>1</td>
52//!             <td>A</td>
53//!             <td>B</td>
54//!             <td>A</td>
55//!             <td>G</td>
56//!         </td>
57//!         <tr>
58//!             <td>2</td>
59//!             <td>D</td>
60//!             <td>D</td>
61//!             <td>B</td>
62//!             <td>D</td>
63//!         </tr>
64//!         <tr>
65//!             <td>3</td>
66//!             <td>B</td>
67//!             <td>E</td>
68//!             <td>D</td>
69//!             <td>E</td>
70//!         </tr>
71//!         <tr>
72//!             <td>4</td>
73//!             <td>C</td>
74//!             <td>C</td>
75//!             <td>C</td>
76//!             <td>A</td>
77//!         </tr>
78//!         <tr>
79//!             <td>5</td>
80//!             <td>G</td>
81//!             <td>-</td>
82//!             <td>G</td>
83//!             <td>F</td>
84//!         </tr>
85//!         <tr>
86//!             <td>6</td>
87//!             <td>F</td>
88//!             <td>-</td>
89//!             <td>F</td>
90//!             <td>C</td>
91//!         </tr>
92//!         <tr>
93//!             <td>7</td>
94//!             <td>-</td>
95//!             <td>-</td>
96//!             <td>E</td>
97//!             <td>-</td>
98//!         </tr>
99//!     </tbody>
100//! </table>
101//!
102//! Depending on the persistence parameter `p` will result in different output orderings based on each items accumulated weights:
103//!
104//! <table>
105//!     <thead>
106//!         <tr>
107//!             <th>Rank</th>
108//!             <th>p=0.6</th>
109//!             <th>p=0.8</th>
110//!             <th>p=0.9</th>
111//!         </tr>
112//!     </thead>
113//!     <tbody>
114//!         <tr>
115//!             <td>1</td>
116//!             <td>A (0.89)</td>
117//!             <td>D (0.61)</td>
118//!             <td>D (0.35)</td>
119//!         </td>
120//!         <tr>
121//!             <td>2</td>
122//!             <td>D (0.86)</td>
123//!             <td>A (0.50)</td>
124//!             <td>D (0.35)</td>
125//!         </tr>
126//!         <tr>
127//!             <td>3</td>
128//!             <td>B (0.78)</td>
129//!             <td>B (0.49)</td>
130//!             <td>A (0.27)</td>
131//!         </tr>
132//!         <tr>
133//!             <td>4</td>
134//!             <td>G (0.50) </td>
135//!             <td>C (0.37)</td>
136//!             <td>B (0.27)</td>
137//!         </tr>
138//!         <tr>
139//!             <td>5</td>
140//!             <td>E (0.31)</td>
141//!             <td>G (0.37)</td>
142//!             <td>G (0.23)</td>
143//!         </tr>
144//!         <tr>
145//!             <td>6</td>
146//!             <td>C (0.29)</td>
147//!             <td>E (0.31)</td>
148//!             <td>E (0.22)</td>
149//!         </tr>
150//!         <tr>
151//!             <td>7</td>
152//!             <td>F (0.11)</td>
153//!             <td>F (0.21)</td>
154//!             <td>F (0.18</td>
155//!         </tr>
156//!     </tbody>
157//! </table>
158//!
159//!
160//! # Code Example:
161//!
162//! ```
163//! use rank_biased_centroids::rbc;
164//!
165//! let r1 = vec!['A', 'D', 'B', 'C', 'G', 'F'];
166//! let r2 = vec!['B', 'D', 'E', 'C'];
167//! let r3 = vec!['A', 'B', 'D', 'C', 'G', 'F', 'E'];
168//! let r4 = vec!['G', 'D', 'E', 'A', 'F', 'C'];
169//! let p = 0.9;
170//! let res = rbc(vec![r1, r2, r3, r4], p).unwrap();
171//! let exp = vec![
172//!     ('D', 0.35),
173//!     ('C', 0.28),
174//!     ('A', 0.27),
175//!     ('B', 0.27),
176//!     ('G', 0.23),
177//!     ('E', 0.22),
178//!     ('F', 0.18),
179//! ];
180//! for ((c, s), (ec, es)) in res.into_ranked_list_with_scores().into_iter().zip(exp.into_iter()) {
181//!     assert_eq!(c, ec);
182//!     approx::assert_abs_diff_eq!(s, es, epsilon = 0.005);
183//! }
184//! ```
185//!
186//! Weighted runs:
187//!
188//! ```rust
189//! use rank_biased_centroids::rbc_with_weights;
190//! let r1 = vec!['A', 'D', 'B', 'C', 'G', 'F'];
191//! let r2 = vec!['B', 'D', 'E', 'C'];
192//! let r3 = vec!['A', 'B', 'D', 'C', 'G', 'F', 'E'];
193//! let r4 = vec!['G', 'D', 'E', 'A', 'F', 'C'];
194//! let p = 0.9;
195//! let run_weights = vec![0.3, 1.3, 0.4, 1.4];
196//! let res = rbc_with_weights(vec![r1, r2, r3, r4],run_weights, p).unwrap();
197//! let exp = vec![
198//!     ('D', 0.30),
199//!     ('E', 0.24),
200//!     ('C', 0.23),
201//!     ('B', 0.19),
202//!     ('G', 0.19),
203//!     ('A', 0.17),
204//!     ('F', 0.13),
205//! ];
206//! for ((c, s), (ec, es)) in res.into_ranked_list_with_scores().into_iter().zip(exp.into_iter()) {
207//!     assert_eq!(c, ec);
208//!     approx::assert_abs_diff_eq!(s, es, epsilon = 0.005);
209//! }
210//! ```
211//!
212mod state;
213
214use thiserror::Error;
215
216#[derive(Error, Debug)]
217pub enum RbcError {
218    #[error("Persistance parameter p must be 0.0 <= p < 1.0")]
219    InvalidPersistance,
220    #[error("There need to be as many run weights as runs + not inf or nan")]
221    InvalidRunWeights,
222}
223
224use state::RbcState;
225use std::fmt::Debug;
226use std::hash::Hash;
227
228pub use state::RbcRankedList;
229
230///
231/// Main RBC function implementing the computation of Rank-Biased Centroids.
232///
233/// Returns the fused ranked list without scores.
234///
235/// # Example:
236///
237/// ```
238/// use rank_biased_centroids::rbc;
239///
240/// let r1 = vec!['A', 'D', 'B', 'C', 'G', 'F'];
241/// let r2 = vec!['B', 'D', 'E', 'C'];
242/// let r3 = vec!['A', 'B', 'D', 'C', 'G', 'F', 'E'];
243/// let r4 = vec!['G', 'D', 'E', 'A', 'F', 'C'];
244/// let p = 0.9;
245/// let res = rbc(vec![r1, r2, r3, r4], p).unwrap();
246/// let exp = vec!['D','C','A','B','G','E','F'];
247/// assert!(res.into_ranked_list().into_iter().eq(exp.into_iter()));
248/// ```
249/// # Errors
250///
251/// - Will return `Err` if `p` is not 0 <= p < 1
252///
253pub fn rbc<I>(
254    input_rankings: I,
255    p: f64,
256) -> Result<RbcRankedList<<<I as IntoIterator>::Item as IntoIterator>::Item>, RbcError>
257where
258    I: IntoIterator,
259    I::Item: IntoIterator,
260    <<I as IntoIterator>::Item as IntoIterator>::Item: Eq + Hash + Debug,
261{
262    let mut rbc_state = RbcState::with_persistence(p)?;
263
264    // iterate over all lists
265    let ranked_list_iter = input_rankings.into_iter();
266    for ranked_list in ranked_list_iter {
267        for (rank, item) in ranked_list.into_iter().enumerate() {
268            rbc_state.update(rank, item, None);
269        }
270    }
271
272    // finalize
273    Ok(rbc_state.into_result())
274}
275
276///
277/// Main RBC function implementing the computation of Rank-Biased Centroids.
278///
279/// Returns the fused ranked list without scores.
280///
281/// # Example:
282///
283/// ```
284/// use rank_biased_centroids::rbc_with_weights;
285///
286/// let r1 = vec!['A', 'D', 'B', 'C', 'G', 'F'];
287/// let r2 = vec!['B', 'D', 'E', 'C'];
288/// let r3 = vec!['A', 'B', 'D', 'C', 'G', 'F', 'E'];
289/// let r4 = vec!['G', 'D', 'E', 'A', 'F', 'C'];
290/// let p = 0.9;
291/// let res = rbc_with_weights(vec![r1, r2, r3, r4],vec![0.3,1.3,0.4,1.4], p).unwrap();
292/// let exp = vec!['D','E','C','B','G','A','F'];
293/// let result = res.into_ranked_list();
294/// assert!(result.into_iter().eq(exp.into_iter()));
295/// ```
296/// # Errors
297///
298/// - Will return `Err` if `p` is not 0 <= p < 1
299/// - Will return `Err` if run weights len != num runs
300/// - Will return `Err` if run weights are inf or NaN
301///
302pub fn rbc_with_weights<I>(
303    input_rankings: I,
304    run_weights: impl IntoIterator<Item = f64>,
305    p: f64,
306) -> Result<RbcRankedList<<<I as IntoIterator>::Item as IntoIterator>::Item>, RbcError>
307where
308    I: IntoIterator,
309    I::Item: IntoIterator,
310    <<I as IntoIterator>::Item as IntoIterator>::Item: Eq + Hash + Debug,
311{
312    let mut rbc_state = RbcState::with_persistence(p)?;
313
314    // iterate over all lists
315    let mut run_weights_iter = run_weights.into_iter();
316    let mut ranked_list_iter = input_rankings.into_iter();
317    for ranked_list in ranked_list_iter.by_ref() {
318        let run_weight = match run_weights_iter.next() {
319            None => return Err(RbcError::InvalidRunWeights),
320            Some(w) if w.is_infinite() => return Err(RbcError::InvalidRunWeights),
321            Some(w) if w.is_nan() => return Err(RbcError::InvalidRunWeights),
322            Some(w) => Some(w),
323        };
324        for (rank, item) in ranked_list.into_iter().enumerate() {
325            rbc_state.update(rank, item, run_weight);
326        }
327    }
328
329    // more ranked list than weights!
330    if ranked_list_iter.next().is_some() {
331        return Err(RbcError::InvalidRunWeights);
332    }
333
334    // finalize
335    Ok(rbc_state.into_result())
336}