vindicator/
lib.rs

1//! Search manipulation algorithms for multi-source information retrieval.
2//! 
3//! # Example
4//! 
5//! Read a file in the TREC eval result list format, and use
6//! [`fuse_scored`] to merge entries with the same document ID.
7//! 
8//! ```no_run
9//! use vindicator::{fuse_scored, parse_from_trec};
10//! use vindicator::fuser::comb_mnz;
11//! 
12//! # fn run() -> Result<(), Box<dyn std::error::Error>> {
13//! let raw_list = std::fs::read_to_string("trec_file.txt")?;
14//! let list = parse_from_trec(&raw_list)?;
15//!
16//! let fusion: Vec<_> = fuse_scored(&list, comb_mnz);
17//! # Ok(())
18//! # }
19//! # run().unwrap();
20//! ```
21//! 
22//! [`comb_mnz`] is one possible score-based late fusion method. Any other
23//! algorithm producing a new score based on an array of scores can be
24//! integrated. See also [`comb_max`] and [`comb_sum`].
25//! 
26//! [`fuse_scored`]: fuser/fn.fuse_scored.html
27//! [`comb_mnz`]: fuser/fn.comb_mnz.html
28//! [`comb_max`]: fuser/fn.comb_max.html
29//! [`comb_sum`]: fuser/fn.comb_sum.html
30
31use noisy_float::prelude::*;
32use approx::AbsDiffEq;
33
34pub use noisy_float;
35pub use approx;
36
37pub mod fuser;
38pub mod trec;
39
40pub use fuser::fuse_scored;
41pub use trec::parse_from_trec;
42
43/// Type alias for a search result's score. This is assumed to be a s
44pub type Score = N32;
45/// Type alias for a search result's ran.
46pub type Rank = u32;
47
48/// Creates a score value.
49/// 
50/// # Panic
51/// 
52/// Panics if the given value is `NaN`.
53pub fn score(value: f32) -> Score {
54    n32(value)
55}
56
57/// A search result entry with a unique document identifier and a similarity
58/// score. Types need to implement this type in order to be admitted as a
59/// search result.
60pub trait SearchEntry {
61    /// The unique document identifier type.
62    type Id: Eq;
63
64    /// Retrieves this entry's document ID.
65    fn id(&self) -> &Self::Id;
66
67    /// Retrieves this entry's similarity score.
68    fn score(&self) -> Score {
69        n32(1.)
70    }
71
72    /// Constructs a minimalist entry info data structure.
73    fn to_entry(&self) -> EntryInfo<Self::Id>
74    where
75        Self::Id: Clone,
76    {
77        EntryInfo {
78            id: self.id().clone(),
79            score: self.score(),
80        }
81    }
82}
83
84/// A simple struct for minimally describing a scored search result.
85#[derive(Debug, Copy, Clone, Eq, PartialEq)]
86pub struct EntryInfo<I> {
87    /// The entry's document ID.
88    pub id: I,
89    /// The entry's similarity score.
90    pub score: Score,
91}
92
93impl<I> AbsDiffEq for EntryInfo<I>
94where
95    I: PartialEq<I>,
96{
97    type Epsilon = f32;
98
99    fn default_epsilon() -> Self::Epsilon {
100        1e-5
101    }
102    
103    fn abs_diff_eq(
104        &self, 
105        other: &Self, 
106        epsilon: Self::Epsilon, 
107    ) -> bool {
108        self.id == other.id
109            && self.score.raw().abs_diff_eq(&other.score.raw(), epsilon)
110    }
111}
112
113/// Wrapper type for assigning a rank to an arbitrary value.
114#[derive(Debug, Copy, Clone, Eq, PartialEq)]
115pub struct Ranked<T> {
116    /// the attributed rank
117    pub rank: Rank,
118    /// the inner value
119    pub inner: T,
120}
121
122impl<T> SearchEntry for Ranked<T>
123where
124    T: SearchEntry,
125{
126    type Id = T::Id;
127
128    fn id(&self) -> &Self::Id {
129        self.inner.id()
130    }
131
132    fn score(&self) -> Score {
133        self.inner.score()
134    }
135}
136
137/// A simple struct for minimally describing a scored and ranked search result.
138#[derive(Debug, Copy, Clone, Eq, PartialEq)]
139pub struct RankedEntryInfo<I> {
140    /// The entry's document ID.
141    pub id: I,
142    /// The entry's similarity score.
143    pub score: Score,
144    /// The entry's rank.
145    pub rank: Rank,
146}
147
148impl<'a, T: ?Sized> SearchEntry for &'a T
149where
150    T: SearchEntry,
151{
152    type Id = T::Id;
153
154    fn id(&self) -> &Self::Id {
155        (**self).id()
156    }
157    fn score(&self) -> Score {
158        (**self).score()
159    }
160    fn to_entry(&self) -> EntryInfo<Self::Id>
161    where
162        Self::Id: Clone,
163    {
164        (**self).to_entry()
165    }
166}
167
168/// A search entry which is also aware of its rank on the list.
169pub trait RankedSearchEntry: SearchEntry {
170    fn rank(&self) -> Rank;
171}
172
173impl<'a, T: ?Sized> RankedSearchEntry for &'a T
174where
175    T: RankedSearchEntry,
176{
177    fn rank(&self) -> Rank {
178        (**self).rank()
179    }
180}
181
182impl<I> SearchEntry for EntryInfo<I>
183where
184    I: Eq,
185{
186    type Id = I;
187
188    fn id(&self) -> &Self::Id {
189        &self.id
190    }
191    fn score(&self) -> Score {
192        self.score
193    }
194
195    fn to_entry(&self) -> EntryInfo<I>
196    where
197        I: Clone,
198    {
199        self.clone()
200    }
201}
202
203impl<I> SearchEntry for RankedEntryInfo<I>
204where
205    I: Eq,
206{
207    type Id = I;
208
209    fn id(&self) -> &Self::Id {
210        &self.id
211    }
212
213    fn score(&self) -> Score {
214        self.score
215    }
216}
217
218impl<I> RankedSearchEntry for RankedEntryInfo<I>
219where
220    I: Eq,
221{
222    fn rank(&self) -> Rank {
223        self.rank
224    }
225}
226
227/// Builds a new iterator containing search results ranked on their order of
228/// appearance.
229pub fn ranked_list<L, R>(results: L) -> impl Iterator<Item = Ranked<R>>
230where
231    L: IntoIterator<Item = R>,
232    R: SearchEntry,
233{
234    results.into_iter().enumerate().map(|(i, x)| Ranked {
235        inner: x,
236        rank: i as Rank,
237    })
238}