lookup_tables/
search.rs

1// todo: constructors for these instead of default
2// todo: fixed delta search method
3
4/// Linear search to find the bounding indices. Typically faster for small
5/// (<20) values in the table.
6#[derive(Default)]
7pub struct Linear;
8
9#[derive(Default)]
10/// Binary search to find bounding indices. Useful for large datasets (>20)
11pub struct Binary;
12
13/// Store the last set of bounding indices and learly search from the last known match. Effective
14/// for tables with slowly changing values.
15#[derive(Default)]
16pub struct CachedLinearCell {
17    last_lower_idx: std::cell::RefCell<usize>,
18}
19
20impl CachedLinearCell {
21    pub fn new(last_index: usize) -> Self {
22        Self {
23            last_lower_idx: last_index.into(),
24        }
25    }
26}
27
28/// Determine search method dynamically at runtime
29pub enum RuntimeSearch {
30    Linear(Linear),
31    Binary(Binary),
32    CachedLinearCell(CachedLinearCell),
33}
34
35pub trait Search<Indep>
36where
37    Indep: PartialOrd<Indep>,
38{
39    /// Search through a list of values, return the upper and lower indices that bound a given
40    /// value
41    fn search(&self, value: Indep, indep_values: &[Indep]) -> (usize, usize);
42}
43
44fn inbounds_pair_from_lower(low_idx: usize, indep_length: usize) -> (usize, usize) {
45    // cap the low index to be two minus the length, as one minus the length would
46    // put the high index out of bounds
47    let low_idx = std::cmp::min(low_idx, indep_length - 2);
48    let high_idx = low_idx + 1;
49
50    (low_idx, high_idx)
51}
52
53fn inbounds_pair_from_higher(high_idx: usize, length: usize) -> (usize, usize) {
54    // cap the high index to be 1 to ensure the lower index is inbounds at zero
55    let high_idx = std::cmp::max(1, high_idx);
56    // ensure the high index is not greater than length -1, which can happen with binary search
57    // TODO: const param here to decide if we need to check this, since we dont for the other cases
58    let high_idx = std::cmp::min(high_idx, length - 1);
59
60    let low_idx = high_idx - 1;
61
62    (low_idx, high_idx)
63}
64
65impl<Indep> Search<Indep> for Linear
66where
67    Indep: std::cmp::PartialOrd,
68{
69    fn search(&self, value: Indep, indep_values: &[Indep]) -> (usize, usize) {
70        let length = indep_values.len();
71
72        if let Some(high_idx) = indep_values.iter().position(|v| v > &value) {
73            // grab the index pair associated with this, paying close attention to not go out of
74            // bounds
75            inbounds_pair_from_higher(high_idx, length)
76        } else {
77            // we hit the max value in the dataset and `value` was bigger. set the high
78            // index equal to the last value
79            let high_idx = length - 1;
80            let low_idx = high_idx - 1;
81
82            (low_idx, high_idx)
83        }
84    }
85}
86
87impl<Indep> Search<Indep> for Binary
88where
89    Indep: PartialOrd<Indep>,
90{
91    fn search(&self, value: Indep, indep_values: &[Indep]) -> (usize, usize) {
92        let length = indep_values.len();
93        let f = |v: &Indep| v.partial_cmp(&value).unwrap();
94
95        match indep_values.binary_search_by(f) {
96            Ok(matching_index) => inbounds_pair_from_lower(matching_index, length),
97            Err(high_idx) => inbounds_pair_from_higher(high_idx, length),
98        }
99    }
100}
101
102impl<Indep> Search<Indep> for CachedLinearCell
103where
104    Indep: PartialOrd<Indep>,
105{
106    fn search(&self, value: Indep, indep_values: &[Indep]) -> (usize, usize) {
107        let mut borrow_idx: std::cell::RefMut<'_, usize> = self
108            .last_lower_idx
109            .try_borrow_mut()
110            .expect("Cached RefCell was already borrowed. This should never happen");
111        let last_lower: usize = *borrow_idx;
112
113        let length = indep_values.len();
114
115        if indep_values[last_lower] >= value {
116            // we need to search the lower portion of the dataset since our value is smaller than
117            // the last index
118
119            for idx in (0..last_lower).rev() {
120                let idx_value = &indep_values[idx];
121                if idx_value < &value {
122                    // we are now at an index that is above the value, we return out
123                    let index_pair = inbounds_pair_from_lower(idx, length);
124                    *borrow_idx = index_pair.0;
125                    return index_pair;
126                }
127            }
128
129            let index_pair = (0, 1);
130            *borrow_idx = index_pair.0;
131            return index_pair;
132        } else {
133            for idx in last_lower..length {
134                let idx_value = &indep_values[idx];
135                if idx_value > &value {
136                    // we are now at an index that is above the value, we return out
137                    let index_pair = inbounds_pair_from_higher(idx, length);
138                    *borrow_idx = index_pair.0;
139                    return index_pair;
140                }
141            }
142
143            let index_pair = (length - 2, length - 1);
144            *borrow_idx = index_pair.0;
145            return index_pair;
146        }
147    }
148}
149
150impl<Indep> Search<Indep> for RuntimeSearch
151where
152    Indep: PartialOrd<Indep>,
153{
154    fn search(&self, value: Indep, indep_values: &[Indep]) -> (usize, usize) {
155        match &self {
156            RuntimeSearch::Linear(l) => l.search(value, indep_values),
157            RuntimeSearch::Binary(b) => b.search(value, indep_values),
158            RuntimeSearch::CachedLinearCell(c) => c.search(value, indep_values),
159        }
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    fn data() -> Vec<usize> {
168        vec![0, 2, 4, 6, 8, 10]
169    }
170
171    //
172    // Linear Tests
173    //
174
175    #[test]
176    /// check close to the bottom of the table bounds, but still in
177    fn linear_low() {
178        let linear = Linear::default();
179        let x = data();
180        let output = linear.search(1, x.as_slice());
181        dbg!(&output);
182        assert!(output.0 == 0);
183        assert!(output.1 == 1);
184    }
185
186    #[test]
187    /// check close to the top of the table bounds, but still in
188    fn linear_high() {
189        let linear = Linear::default();
190        let x = data();
191        let output = linear.search(9, x.as_slice());
192        assert!(output.0 == 4);
193        assert!(output.1 == 5);
194    }
195
196    //
197    // Binary Tests
198    //
199
200    #[test]
201    /// check close to the bottom of the table bounds, but still in
202    fn binary_low() {
203        let binary = Binary::default();
204        let x = data();
205        let output = binary.search(1, x.as_slice());
206        dbg!(&output);
207        assert!(output.0 == 0);
208        assert!(output.1 == 1);
209    }
210
211    #[test]
212    /// check close to the bottom of the table bounds, but still in
213    fn binary_inbounds() {
214        let binary = Binary::default();
215        let x = data();
216        let output = binary.search(5, x.as_slice());
217        dbg!(&output);
218        assert!(output.0 == 2);
219        assert!(output.1 == 3);
220    }
221
222    #[test]
223    /// check close to the top of the table bounds, but still in
224    fn binary_high() {
225        let binary = Binary::default();
226        let x = data();
227        let output = binary.search(9, x.as_slice());
228        assert!(output.0 == 4);
229        assert!(output.1 == 5);
230    }
231
232    //
233    // Cached Linear Tests
234    //
235
236    #[test]
237    /// check close to the bottom of the table bounds, but still in
238    fn cached_linear_low() {
239        for starting_index in 0..6 {
240            dbg!(starting_index);
241            let cached_linear = CachedLinearCell::new(starting_index);
242            let x = data();
243            let output = cached_linear.search(1, x.as_slice());
244            dbg!(&output);
245            assert!(output.0 == 0);
246            assert!(output.1 == 1);
247        }
248    }
249
250    #[test]
251    /// check close to the bottom of the table bounds, but still in
252    fn cached_linear_inbounds() {
253        for starting_index in 0..6 {
254            dbg!(starting_index);
255            let cached_linear = CachedLinearCell::new(starting_index);
256            let x = data();
257            let output = cached_linear.search(5, x.as_slice());
258            dbg!(&output);
259            assert!(output.0 == 2);
260            assert!(output.1 == 3);
261        }
262    }
263
264    #[test]
265    /// check close to the top of the table bounds, but still in
266    fn cached_linear_high() {
267        for starting_index in 0..6 {
268            dbg!(starting_index);
269            let cached_linear = CachedLinearCell::new(starting_index);
270            let x = data();
271            let output = cached_linear.search(9, x.as_slice());
272            dbg!(output);
273            assert!(output.0 == 4);
274            assert!(output.1 == 5);
275        }
276    }
277}