gazefilter_arrsac/
lib.rs

1#![no_std]
2
3extern crate alloc;
4use core::cmp::Reverse;
5
6use alloc::{vec, vec::Vec};
7use rand_core::RngCore;
8use sample_consensus::{Consensus, Estimator, Model};
9
10/// The ARRSAC algorithm for sample consensus.
11///
12/// Don't forget to shuffle your input data points to avoid bias before
13/// using this consensus process. It will not shuffle your data for you.
14/// If you do not shuffle, the output will be biased towards data at the beginning
15/// of the inputs.
16#[derive(Debug, Clone)]
17pub struct Arrsac<R> {
18    initialization_hypotheses: usize,
19    initialization_blocks: usize,
20    max_candidate_hypotheses: usize,
21    estimations_per_block: usize,
22    block_size: usize,
23    likelihood_ratio_threshold: f32,
24    inlier_threshold: f64,
25    rng: R,
26    random_samples: Vec<u32>,
27}
28
29impl<R> Arrsac<R>
30where
31    R: RngCore,
32{
33    /// `rng` should have the same properties you would want for a Monte Carlo simulation.
34    /// It should generate random numbers quickly without having any discernable patterns.
35    ///
36    /// The `inlier_threshold` is the one parameter that is always specific to your dataset.
37    /// This must be set to the threshold in which a data point's residual is considered an inlier.
38    /// Some of the other parameters may need to be configured based on the amount of data,
39    /// such as `block_size`, `likelihood_ratio_threshold`, and `block_size`. However,
40    /// `inlier_threshold` has to be set based on the residual function used with the model.
41    ///
42    /// `initial_epsilon` must be higher than `initial_delta`. If you modify these values,
43    /// you need to make sure that within one `block_size` the `likelihood_ratio_threshold`
44    /// can be reached and a model can be rejected. Basically, make sure that
45    /// `((1.0 - delta) / (1.0 - epsilon))^block_size >>> likelihood_ratio_threshold`.
46    /// This must be done to ensure outlier models are rejected during the initial generation
47    /// phase, which only processes `block_size` datapoints.
48    ///
49    /// `initial_epsilon` should also be as large as you can set it where it is still relatively
50    /// pessimistic. This is so that we can more easily reject a model early in the process
51    /// to compute an updated value for delta during the adaptive process. This may not be possible
52    /// and will depend on your data.
53    pub fn new(inlier_threshold: f64, rng: R) -> Self {
54        Self {
55            initialization_hypotheses: 256,
56            initialization_blocks: 4,
57            max_candidate_hypotheses: 64,
58            estimations_per_block: 64,
59            block_size: 64,
60            likelihood_ratio_threshold: 1e3,
61            inlier_threshold,
62            rng,
63            random_samples: vec![],
64        }
65    }
66
67    /// Number of models generated in the initial step when epsilon and delta are being estimated.
68    ///
69    /// Default: `256`
70    #[must_use]
71    pub fn initialization_hypotheses(self, initialization_hypotheses: usize) -> Self {
72        Self {
73            initialization_hypotheses,
74            ..self
75        }
76    }
77
78    /// Number of data blocks used to compute the initial estimate of delta and epsilon
79    /// before proceeding with regular block processing. This is used instead of
80    /// an initial epsilon and delta, which were suggested by the paper.
81    ///
82    /// Default: `4`
83    #[must_use]
84    pub fn initialization_blocks(self, initialization_blocks: usize) -> Self {
85        Self {
86            initialization_blocks,
87            ..self
88        }
89    }
90
91    /// Maximum number of best hypotheses to retain during block processing
92    ///
93    /// This number is halved on each block such that on block `n` the number of
94    /// hypotheses retained is `max_candidate_hypotheses >> n`.
95    ///
96    /// Default: `64`
97    #[must_use]
98    pub fn max_candidate_hypotheses(self, max_candidate_hypotheses: usize) -> Self {
99        Self {
100            max_candidate_hypotheses,
101            ..self
102        }
103    }
104
105    /// Number of estmations (may generate multiple hypotheses) that will be ran
106    /// for each block of data evaluated
107    ///
108    /// Default: `64`
109    #[must_use]
110    pub fn estimations_per_block(self, estimations_per_block: usize) -> Self {
111        Self {
112            estimations_per_block,
113            ..self
114        }
115    }
116
117    /// Number of data points evaluated before more hypotheses are generated
118    ///
119    /// Default: `64`
120    #[must_use]
121    pub fn block_size(self, block_size: usize) -> Self {
122        Self { block_size, ..self }
123    }
124
125    /// Once a model reaches this level of unlikelihood, it is rejected. Set this
126    /// higher to make it less restrictive, usually at the cost of more execution time.
127    ///
128    /// Increasing this will make it more likely to find a good result.
129    ///
130    /// Decreasing this will speed up execution.
131    ///
132    /// This ratio is not exposed as a parameter in the original paper, but is instead computed
133    /// recursively for a few iterations. It is roughly equivalent to the **reciprocal** of the
134    /// **probability of rejecting a good model**. You can use that to control the probability
135    /// that a good model is rejected.
136    ///
137    /// Default: `1e3`
138    #[must_use]
139    pub fn likelihood_ratio_threshold(self, likelihood_ratio_threshold: f32) -> Self {
140        Self {
141            likelihood_ratio_threshold,
142            ..self
143        }
144    }
145
146    /// Residual threshold for determining if a data point is an inlier or an outlier of a model
147    #[must_use]
148    pub fn inlier_threshold(self, inlier_threshold: f64) -> Self {
149        Self {
150            inlier_threshold,
151            ..self
152        }
153    }
154
155    /// Adapted from algorithm 3 from "A Comparative Analysis of RANSAC Techniques Leading to Adaptive
156    /// Real-Time Random Sample Consensus", but it was effectively rewritten to avoid the need for
157    /// initial epsilon and delta.
158    ///
159    /// Returns the initial models (and their num inliers) sorted by decreasing inliers
160    /// and `delta` in that order.
161    fn initial_hypotheses<E, Data>(
162        &mut self,
163        estimator: &E,
164        data: impl Iterator<Item = Data> + Clone,
165    ) -> (Vec<(E::Model, usize)>, f32)
166    where
167        E: Estimator<Data>,
168    {
169        assert!(
170            self.initialization_blocks > 0,
171            "ARRSAC must have at least 1 initialization block"
172        );
173        // NOTE: This whole function is different than that specified in the ARRSAC paper.
174        // The reason is that you needed to provide a good initial guess for epsilon and delta
175        // otherwise it could lead to delta exceeding epsilon or situations where models could no
176        // longer be rejected or were almost always rejected. This solution is an imperfect compomise
177        // that assumes that delta will be roughly equal to the inlier ratio of the worst model generated,
178        // which both assumes that the worst model is an outlier and that it is actually representative of the
179        // population. The assumption of epsilon also assumes that the best model is an inlier, but is an
180        // otherwise good initial guess. The other caveat with this approach is that a sufficiently large
181        // set of initial datapoints is required to be able to accurately determine epsilon and delta.
182        // Therefore a new paremeter is added to separate the normal blocks from the initial generation set.
183        let mut hypotheses = vec![];
184        // We don't want more than `block_size` data points to be used to evaluate models initially.
185        let initial_datapoints = core::cmp::min(
186            self.initialization_blocks * self.block_size,
187            data.clone().count(),
188        );
189        // Generate the initial batch of random hypotheses and count their inliers and outliers.
190        for _ in 0..self.initialization_hypotheses {
191            for model in self.generate_random_hypotheses(estimator, data.clone()) {
192                let inliers = self.count_inliers(data.clone().take(initial_datapoints), &model);
193                hypotheses.push((model, inliers));
194            }
195        }
196
197        // Bail early when no hypothesis was found.
198        // This will cause execution to terminate.
199        if hypotheses.is_empty() {
200            return (hypotheses, 0.0);
201        }
202
203        // Sort the hypotheses by their inliers.
204        hypotheses.sort_unstable_by_key(|&(_, inliers)| Reverse(inliers));
205
206        // Compute epsilon and delta using the best and worst model generated.
207        let epsilon = hypotheses
208            .first()
209            .map(|&(_, inliers)| inliers as f32 / initial_datapoints as f32)
210            .unwrap_or_default();
211        let delta = hypotheses
212            .last()
213            .map(|&(_, inliers)| if inliers < E::MIN_SAMPLES {E::MIN_SAMPLES} else {inliers} as f32 / initial_datapoints as f32)
214            .unwrap_or_default();
215
216        if epsilon < delta {
217            // If epsilon is less than delta, then better hypotheses will get rejected and worse accepted,
218            // which is counter to what we want. In this case, we had a bad initialization, so clear the hypotheses.
219            // This will cause execution to terminate.
220            hypotheses.clear();
221            return (hypotheses, delta);
222        }
223
224        // Populate hypotheses with hypotheses generated from the inliers of the best hypothesis.
225        // This will use the initialization datapoints and filter with SPRT.
226        self.populate_hypotheses_sprt(
227            estimator,
228            &mut hypotheses,
229            delta,
230            data,
231            initial_datapoints,
232            self.initialization_hypotheses,
233        );
234
235        // Sort the hypotheses by their inliers.
236        hypotheses.sort_unstable_by_key(|&(_, inliers)| Reverse(inliers));
237
238        // Filter down the hypotheses to just the best ones.
239        hypotheses.truncate(self.max_candidate_hypotheses >> (self.initialization_blocks - 1));
240
241        (hypotheses, delta)
242    }
243
244    /// Populates `self.random_samples` using a len.
245    fn populate_samples(&mut self, num: usize, len: usize) {
246        // We can generate no hypotheses if the amout of data is too low.
247        if len < num {
248            panic!("cannot use arrsac without having enough samples");
249        }
250        let len = len as u32;
251        // Threshold generation below adapted from randomize::RandRangeU32.
252        let threshold = len.wrapping_neg() % len;
253        self.random_samples.clear();
254        for _ in 0..num {
255            loop {
256                let mul = u64::from(self.rng.next_u32()).wrapping_mul(u64::from(len));
257                if mul as u32 >= threshold {
258                    let s = (mul >> 32) as u32;
259                    if !self.random_samples.contains(&s) {
260                        self.random_samples.push(s);
261                        break;
262                    }
263                }
264            }
265        }
266    }
267
268    fn populate_hypotheses_sprt<E, Data>(
269        &mut self,
270        estimator: &E,
271        hypotheses: &mut Vec<(E::Model, usize)>,
272        delta: f32,
273        data: impl Iterator<Item = Data> + Clone,
274        num_checked: usize,
275        num_hypotheses: usize,
276    ) where
277        E: Estimator<Data>,
278    {
279        // Update epsilon using the best model.
280        // Since epsilon can only increase and delta is fixed, we can be sure that these ratios
281        // will still be valid (epsilon > delta).
282        let epsilon = hypotheses[0].1 as f32 / num_checked as f32;
283        // Create the likelihood ratios for inliers and outliers.
284        let positive_likelihood_ratio = delta / epsilon;
285        let negative_likelihood_ratio = (1.0 - delta) / (1.0 - epsilon);
286        // Generate the list of inliers for the best model.
287        let mut inliers = self.inliers(data.clone().take(num_checked), &hypotheses[0].0);
288        if inliers.len() <= E::MIN_SAMPLES {
289            // If we don't have enough samples to generate more models, then we should expand the inliers to
290            // the entire dataset.
291            inliers = self.inliers(data.clone().take(num_checked), &hypotheses[0].0);
292        }
293        // We generate hypotheses until we reach the initial num hypotheses.
294        // We can't count the number generated because it could generate 0 hypotheses
295        // and then the loop would continue indefinitely.
296        let mut random_hypotheses = Vec::new();
297        for _ in 0..num_hypotheses {
298            random_hypotheses.extend(self.generate_random_hypotheses_subset(
299                estimator,
300                data.clone(),
301                &inliers,
302            ));
303            for model in random_hypotheses.drain(..) {
304                if let Some(inliers) = self.asprt(
305                    data.clone().take(num_checked),
306                    &model,
307                    positive_likelihood_ratio,
308                    negative_likelihood_ratio,
309                    E::MIN_SAMPLES,
310                ) {
311                    hypotheses.push((model, inliers));
312                }
313            }
314        }
315    }
316
317    /// Generates as many hypotheses as one call to `Estimator::estimate()` returns from all data.
318    fn generate_random_hypotheses<E, Data>(
319        &mut self,
320        estimator: &E,
321        data: impl Iterator<Item = Data> + Clone,
322    ) -> E::ModelIter
323    where
324        E: Estimator<Data>,
325    {
326        self.populate_samples(E::MIN_SAMPLES, data.clone().count());
327        estimator.estimate(
328            self.random_samples
329                .iter()
330                .map(|&ix| data.clone().nth(ix as usize).unwrap()),
331        )
332    }
333
334    /// Generates as many hypotheses as one call to `Estimator::estimate()` returns from a subset of the data.
335    fn generate_random_hypotheses_subset<E, Data>(
336        &mut self,
337        estimator: &E,
338        data: impl Iterator<Item = Data> + Clone,
339        subset: &[usize],
340    ) -> E::ModelIter
341    where
342        E: Estimator<Data>,
343    {
344        self.populate_samples(E::MIN_SAMPLES, subset.len());
345        estimator.estimate(
346            core::mem::take(&mut self.random_samples)
347                .iter()
348                .map(|&ix| data.clone().nth(subset[ix as usize]).unwrap()),
349        )
350    }
351
352    /// Algorithm 1 in "Randomized RANSAC with Sequential Probability Ratio Test".
353    ///
354    /// This tests if a model is accepted. Returns `Some(inliers)` if accepted or `None` if rejected.
355    ///
356    /// `inlier_threshold` - The model residual error threshold between inliers and outliers
357    /// `positive_likelihood_ratio` - `δ / ε`
358    /// `negative_likelihood_ratio` - `(1 - δ) / (1 - ε)`
359    fn asprt<Data, M: Model<Data>>(
360        &self,
361        data: impl Iterator<Item = Data>,
362        model: &M,
363        positive_likelihood_ratio: f32,
364        negative_likelihood_ratio: f32,
365        minimum_samples: usize,
366    ) -> Option<usize> {
367        let mut likelihood_ratio = 1.0;
368        let mut inliers = 0;
369        for data in data {
370            likelihood_ratio *= if model.residual(&data) < self.inlier_threshold {
371                inliers += 1;
372                positive_likelihood_ratio
373            } else {
374                negative_likelihood_ratio
375            };
376
377            if likelihood_ratio > self.likelihood_ratio_threshold || likelihood_ratio.is_nan() {
378                return None;
379            }
380        }
381
382        (inliers >= minimum_samples).then_some(inliers)
383    }
384
385    /// Determines the number of inliers a model has.
386    fn count_inliers<Data, M: Model<Data>>(
387        &self,
388        data: impl Iterator<Item = Data>,
389        model: &M,
390    ) -> usize {
391        data.filter(|data| model.residual(data) < self.inlier_threshold)
392            .count()
393    }
394
395    /// Gets indices of inliers for a model.
396    fn inliers<Data, M: Model<Data>>(
397        &self,
398        data: impl Iterator<Item = Data>,
399        model: &M,
400    ) -> Vec<usize> {
401        data.enumerate()
402            .filter(|(_, data)| model.residual(data) < self.inlier_threshold)
403            .map(|(ix, _)| ix)
404            .collect()
405    }
406}
407
408impl<E, R, Data> Consensus<E, Data> for Arrsac<R>
409where
410    E: Estimator<Data>,
411    R: RngCore,
412{
413    type Inliers = Vec<usize>;
414
415    fn model<I>(&mut self, estimator: &E, data: I) -> Option<E::Model>
416    where
417        I: Iterator<Item = Data> + Clone,
418    {
419        self.model_inliers(estimator, data).map(|(model, _)| model)
420    }
421
422    fn model_inliers<I>(&mut self, estimator: &E, data: I) -> Option<(E::Model, Self::Inliers)>
423    where
424        I: Iterator<Item = Data> + Clone,
425    {
426        // Don't do anything if we don't have enough data.
427        if data.clone().count() < E::MIN_SAMPLES {
428            return None;
429        }
430        // Generate the initial set of hypotheses. This also gets us an estimate of delta.
431        let (mut hypotheses, delta) = self.initial_hypotheses(estimator, data.clone());
432
433        // If there are no initial hypotheses then initialization failed, so exit early.
434        if hypotheses.is_empty() {
435            return None;
436        }
437
438        // Gradually increase how many datapoints we are evaluating until we evaluate them all.
439        // This starts at the first block that was not evaluated in initial_hypotheses.
440        'outer: for block in self.initialization_blocks.. {
441            let samples_up_to_beginning_of_block = block * self.block_size;
442            let samples_up_to_end_of_block = samples_up_to_beginning_of_block + self.block_size;
443            // Score hypotheses with samples.
444            for sample in samples_up_to_beginning_of_block..samples_up_to_end_of_block {
445                // Score the hypotheses with the new datapoint.
446                let new_datapoint = if let Some(datapoint) = data.clone().nth(sample) {
447                    datapoint
448                } else {
449                    // We reached the last datapoint, so break out of the outer loop.
450                    break 'outer;
451                };
452                for (hypothesis, inlier_count) in hypotheses.iter_mut() {
453                    if hypothesis.residual(&new_datapoint) < self.inlier_threshold {
454                        *inlier_count += 1;
455                    }
456                }
457            }
458            // Sort the hypotheses by their inliers to find the best.
459            hypotheses.sort_unstable_by_key(|&(_, inliers)| Reverse(inliers));
460            // Populate hypotheses with hypotheses that pass SPRT.
461            self.populate_hypotheses_sprt(
462                estimator,
463                &mut hypotheses,
464                delta,
465                data.clone(),
466                samples_up_to_end_of_block,
467                self.estimations_per_block,
468            );
469            // This will retain at least half of the hypotheses each time
470            // and gradually decrease as the number of samples we are evaluating increases.
471            // NOTE:
472            // The paper says to use a peculiar formula that just results in doing
473            // this basic right shift below, but as written it contained some apparent errors in
474            // where it was ran. This seems to be the correct location to do this.
475            hypotheses.sort_unstable_by_key(|&(_, inliers)| Reverse(inliers));
476            hypotheses.truncate(self.max_candidate_hypotheses >> block);
477            if hypotheses.len() <= 1 {
478                break 'outer;
479            }
480        }
481        hypotheses
482            .into_iter()
483            .max_by_key(|&(_, inliers)| inliers)
484            .map(|(model, _)| {
485                let inliers = self.inliers(data.clone(), &model);
486                (model, inliers)
487            })
488    }
489}