mate_selection/
lib.rs

1//! A collection of mate selection methods for evolutionary algorithms
2
3use rand::Rng;
4use serde::{Deserialize, Serialize};
5
6/// Mate selection algorithms randomly select pairs of individuals from a population.  
7/// The sampling probability of each individuals is a function of its reproductive fitness or "score".  
8pub trait MateSelection<R: Rng + ?Sized>: std::fmt::Debug {
9    /// Choose multiple weighted pairs
10    ///
11    /// * Argument `amount` is the number of pairs to return.
12    ///
13    /// * Argument "scores" is a list containing the reproductive fitness of each individual.
14    ///
15    /// * Returns a list of pairs of parents to mate together.  
16    ///   The parents are specified as indices into the scores list.
17    ///
18    /// This implementation almost never mates an individual with itself.
19    fn pairs(&self, rng: &mut R, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
20        let mut pairs = self.select(rng, amount * 2, scores);
21
22        reduce_repeats(&mut pairs);
23
24        transmute_vec_to_pairs(pairs)
25    }
26
27    /// Choose multiple weighted
28    fn select(&self, rng: &mut R, amount: usize, scores: Vec<f64>) -> Vec<usize> {
29        if amount == 0 {
30            return vec![];
31        } else {
32            assert!(!scores.is_empty());
33        }
34
35        let weights = self.sample_weight(scores);
36
37        stochastic_universal_sampling::choose_multiple_weighted(rng, amount, &weights)
38    }
39
40    /// Probability distribution function
41    fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
42        let mut pdf = self.sample_weight(scores);
43        // Normalize the sum to one.
44        let sum: f64 = pdf.iter().sum();
45        let div_sum = 1.0 / sum;
46        for x in pdf.iter_mut() {
47            *x *= div_sum;
48        }
49        pdf
50    }
51
52    /// Transform the reproductive fitness scores into sampling weights.  
53    /// The sampling weights do **not** need to sum to one.
54    fn sample_weight(&self, scores: Vec<f64>) -> Vec<f64>;
55}
56
57/// Select parents with a uniform random probability, ignoring the scores.
58#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
59pub struct Random;
60
61/// Select parents with a probability that is directly proportional to their score.
62///
63/// >   `probability(i) = score(i) / sum(score(x) for x in population)`
64///
65/// This method biases the selection based on the parents scores. This
66/// method is significantly influenced by the magnitude of the fitness
67/// scoring function, and by the signal-to-noise ratio between the average
68/// score and the variations in the scores.
69///
70/// Negative or invalid (NaN) scores are discarded and those individuals are
71/// not permitted to mate.
72#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
73pub struct Proportional;
74
75/// Normalize the fitness scores into a standard normal distribution.
76/// First the scores are normalized into a standard distribution and then they
77/// are shifted by the cutoff, which is naturally measured in standard deviations.
78/// All scores which are less than the cutoff (now sub-zero) are
79/// discarded and those individuals are not permitted to mate.
80/// Finally the scores are divided by their sum to yield a selection probability.
81/// This method improves upon the proportional method by controlling for the
82/// magnitude and variation of the fitness scoring function.
83///
84/// Argument "**cutoff**" is the minimum negative deviation required for mating.
85#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
86pub struct Normalized(pub f64);
87
88/// Select parents from the best ranked individuals in the population.
89/// Among the top scoring individuals, individuals are sampled with uniform
90/// random probability.
91///
92/// Argument "**number**" is the number of individuals who are allowed to mate.
93#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
94struct Best(pub usize);
95
96/// Apply a simple percentile based threshold to the population.
97/// Mating pairs are selected with uniform random probability from the eligible
98/// members of the population.
99///
100/// Argument "**percentile**" is the fraction of the population which is denied
101/// the chance to mate. At `0` everyone is allowed to mate and at `1` only the
102/// single best individual is allowed to mate.
103#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
104pub struct Percentile(pub f64);
105
106/// Select parents based on their ranking in the population. This method sorts
107/// the individuals by their scores in order to rank them from worst to best.
108/// The sampling probability is a linear function of the rank.
109/// >   `probability(rank) = (1/N) * (1 + SP - 2 * SP * (rank-1)/(N-1))`  
110/// >   Where `N` is the population size, and  
111/// >   Where `rank = 1` is the best individual and `rank = N` is the worst.  
112///
113/// Argument "**selection pressure**" measures the inequality in the probability
114/// of being selected. Must be in the range [0, 1].
115///
116/// * At zero, all members are equally likely to be selected.  
117/// * At one, the worst ranked individual will never be selected.  
118#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
119pub struct RankedLinear(pub f64);
120
121/// Select parents based on their ranking in the population, with an
122/// exponentially weighted bias towards better ranked individuals. This method
123/// can apply more selection pressure than the RankedLinear method can, which
124/// is useful when dealing with very large populations or with a very large
125/// number of offspring.
126///
127/// Argument "**median**" describes the exponential slope of the weights curve.
128/// A small median will strongly favor the best individuals, whereas a
129/// large median will sample the individuals more equally. The median is a
130/// rank, and so it is naturally measured in units of individuals.
131/// Approximately half of the sample will be drawn from individuals ranked
132/// better than the median, and the other half will be selected from
133/// individuals with a worse ranking than the median.
134#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
135pub struct RankedExponential(pub usize);
136
137#[cfg(feature = "pyo3")]
138mod python {
139    use super::MateSelection;
140    use pyo3::exceptions::PyValueError;
141    use pyo3::prelude::*;
142
143    /// A collection of mate selection methods for evolutionary algorithms
144    ///
145    /// Mate selection algorithms randomly select pairs of individuals from a
146    /// population. The sampling probability of each individuals is a function
147    /// of its reproductive fitness or "score".
148    ///
149    /// These implementations almost never mate an individual with itself.
150    #[pymodule]
151    fn mate_selection(m: Bound<PyModule>) -> PyResult<()> {
152        m.add_class::<Random>()?;
153        m.add_class::<Proportional>()?;
154        m.add_class::<Normalized>()?;
155        m.add_class::<Best>()?;
156        m.add_class::<Percentile>()?;
157        m.add_class::<RankedLinear>()?;
158        m.add_class::<RankedExponential>()?;
159        Ok(())
160    }
161
162    /// Select parents with a uniform random probability, ignoring the scores.
163    #[pyclass]
164    struct Random(super::Random);
165
166    /// Select parents with a probability that is directly proportional to their score.
167    ///
168    /// >   probability(i) = score(i) / sum(score(x) for x in population)
169    ///
170    /// This method biases the selection based on the parents scores. This
171    /// method is significantly influenced by the magnitude of the fitness
172    /// scoring function, and by the signal-to-noise ratio between the average
173    /// score and the variations in the scores.
174    ///
175    /// Negative or invalid (NaN) scores are discarded and those individuals are
176    /// not permitted to mate.
177    #[pyclass]
178    struct Proportional(super::Proportional);
179
180    /// Normalize the fitness scores into a standard normal distribution. First
181    /// the scores are normalized into a standard distribution and then they
182    /// are shifted by the cutoff, which is naturally measured in standard
183    /// deviations. All scores which are less than the cutoff (now sub-zero)
184    /// are discarded and those individuals are not permitted to mate. Finally
185    /// the scores are divided by their sum to yield a selection probability.
186    /// This method improves upon the proportional method by controlling for
187    /// the magnitude and variation of the fitness scoring function.
188    ///
189    /// Argument "cutoff" is the minimum negative deviation required for mating.
190    #[pyclass]
191    struct Normalized(super::Normalized);
192
193    /// Select parents from the best ranked individuals in the population.
194    /// Among the top scoring individuals, individuals are sampled with uniform
195    /// random probability.
196    ///
197    /// Argument "number" is the number of individuals who are allowed to mate.
198    #[pyclass]
199    struct Best(super::Best);
200
201    /// Apply a simple percentile based threshold to the population.
202    /// Mating pairs are selected with uniform random probability from the
203    /// eligible members of the population.
204    ///
205    /// Argument "percentile" is the fraction of the population which is denied
206    /// the chance to mate. At "0" everyone is allowed to mate and at "1" only the
207    /// single best individual is allowed to mate.
208    #[pyclass]
209    struct Percentile(super::Percentile);
210
211    /// Select parents based on their ranking in the population. This method
212    /// sorts the individuals by their scores in order to rank them from worst
213    /// to best. The sampling probability is a linear function of the rank.
214    ///
215    /// >   probability(rank) = (1/N) * (1 + SP - 2 * SP * (rank-1)/(N-1))  
216    /// >   Where N is the population size, and  
217    /// >   Where rank = 1 is the best individual and rank = N is the worst.  
218    ///
219    /// Argument "selection_pressure" measures the inequality in the
220    /// probability of being selected. Must be in the range [0, 1].
221    /// * At zero, all members are equally likely to be selected.
222    /// * At one, the worst ranked individual will never be selected.
223    #[pyclass]
224    struct RankedLinear(super::RankedLinear);
225
226    /// Select parents based on their ranking in the population, with an
227    /// exponentially weighted bias towards better ranked individuals. This
228    /// method can apply more selection pressure than the RankedLinear method
229    /// can, which is useful when dealing with very large populations or with a
230    /// very large number of offspring.
231    ///
232    /// Argument "median" describes the exponential slope of the weights curve.
233    /// A small median will strongly favor the best individuals, whereas a
234    /// large median will sample the individuals more equally. The median is a
235    /// rank, and so it is naturally measured in units of individuals.
236    /// Approximately half of the sample will be drawn from individuals ranked
237    /// better than the median, and the other half will be selected from
238    /// individuals with a worse ranking than the median.
239    #[pyclass]
240    struct RankedExponential(super::RankedExponential);
241
242    #[pymethods]
243    impl Random {
244        #[new]
245        fn new() -> Self {
246            Self(super::Random)
247        }
248        fn __str__(&self) -> String {
249            "mate_selection.Random()".to_string()
250        }
251        /// Choose multiple weighted pairs
252        /// * Argument "amount" is the number of pairs to return.
253        /// * Argument "scores" is the list of reproductive fitness scores.
254        /// * Returns a list of pairs of parents to mate together.
255        ///   The parents are specified as indices into the scores list.
256        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
257            let rng = &mut rand::thread_rng();
258            self.0.pairs(rng, amount, scores)
259        }
260        /// Choose multiple weighted
261        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
262            let rng = &mut rand::thread_rng();
263            self.0.select(rng, amount, scores)
264        }
265        /// Probability distribution function
266        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
267            <super::Random as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
268        }
269    }
270
271    #[pymethods]
272    impl Proportional {
273        #[new]
274        fn new() -> Self {
275            Self(super::Proportional)
276        }
277        fn __str__(&self) -> String {
278            "mate_selection.Proportional()".to_string()
279        }
280        /// Choose multiple weighted pairs
281        /// * Argument "amount" is the number of pairs to return.
282        /// * Argument "scores" is the list of reproductive fitness scores.
283        /// * Returns a list of pairs of parents to mate together.
284        ///   The parents are specified as indices into the scores list.
285        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
286            let rng = &mut rand::thread_rng();
287            self.0.pairs(rng, amount, scores)
288        }
289        /// Choose multiple weighted
290        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
291            let rng = &mut rand::thread_rng();
292            self.0.select(rng, amount, scores)
293        }
294        /// Probability distribution function
295        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
296            <super::Proportional as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
297        }
298    }
299
300    #[pymethods]
301    impl Normalized {
302        #[new]
303        fn new(cutoff: f64) -> PyResult<Self> {
304            if cutoff.is_finite() {
305                Ok(Self(super::Normalized(cutoff)))
306            } else {
307                Err(PyValueError::new_err(
308                    "argument \"cutoff\" is not a finite number",
309                ))
310            }
311        }
312        fn __str__(&self) -> String {
313            format!("mate_selection.Normalized({})", self.0 .0)
314        }
315        /// Choose multiple weighted pairs
316        /// * Argument "amount" is the number of pairs to return.
317        /// * Argument "scores" is the list of reproductive fitness scores.
318        /// * Returns a list of pairs of parents to mate together.
319        ///   The parents are specified as indices into the scores list.
320        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
321            let rng = &mut rand::thread_rng();
322            self.0.pairs(rng, amount, scores)
323        }
324        /// Choose multiple weighted
325        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
326            let rng = &mut rand::thread_rng();
327            self.0.select(rng, amount, scores)
328        }
329        /// Probability distribution function
330        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
331            <super::Normalized as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
332        }
333    }
334
335    #[pymethods]
336    impl Best {
337        #[new]
338        fn new(number: usize) -> PyResult<Self> {
339            if number > 0 {
340                Ok(Self(super::Best(number)))
341            } else {
342                Err(PyValueError::new_err(
343                    "argument \"number\" is less than one",
344                ))
345            }
346        }
347        fn __str__(&self) -> String {
348            format!("mate_selection.Best({})", self.0 .0)
349        }
350        /// Choose multiple weighted pairs
351        /// * Argument "amount" is the number of pairs to return.
352        /// * Argument "scores" is the list of reproductive fitness scores.
353        /// * Returns a list of pairs of parents to mate together.
354        ///   The parents are specified as indices into the scores list.
355        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
356            let rng = &mut rand::thread_rng();
357            self.0.pairs(rng, amount, scores)
358        }
359        /// Choose multiple weighted
360        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
361            let rng = &mut rand::thread_rng();
362            self.0.select(rng, amount, scores)
363        }
364        /// Probability distribution function
365        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
366            <super::Best as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
367        }
368    }
369
370    #[pymethods]
371    impl Percentile {
372        #[new]
373        fn new(percentile: f64) -> PyResult<Self> {
374            if (0.0..=1.0).contains(&percentile) {
375                Ok(Self(super::Percentile(percentile)))
376            } else {
377                Err(PyValueError::new_err(
378                    "argument \"percentile\" is out of bounds [0, 1]",
379                ))
380            }
381        }
382        fn __str__(&self) -> String {
383            format!("mate_selection.Percentile({})", self.0 .0)
384        }
385        /// Choose multiple weighted pairs
386        /// * Argument "amount" is the number of pairs to return.
387        /// * Argument "scores" is the list of reproductive fitness scores.
388        /// * Returns a list of pairs of parents to mate together.
389        ///   The parents are specified as indices into the scores list.
390        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
391            let rng = &mut rand::thread_rng();
392            self.0.pairs(rng, amount, scores)
393        }
394        /// Choose multiple weighted
395        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
396            let rng = &mut rand::thread_rng();
397            self.0.select(rng, amount, scores)
398        }
399        /// Probability distribution function
400        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
401            <super::Percentile as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
402        }
403    }
404
405    #[pymethods]
406    impl RankedLinear {
407        #[new]
408        fn new(selection_pressure: f64) -> PyResult<Self> {
409            if (0.0..=1.0).contains(&selection_pressure) {
410                Ok(Self(super::RankedLinear(selection_pressure)))
411            } else {
412                Err(PyValueError::new_err(
413                    "argument \"selection_pressure\" is out of bounds [0, 1]",
414                ))
415            }
416        }
417        fn __str__(&self) -> String {
418            format!("mate_selection.RankedLinear({})", self.0 .0)
419        }
420        /// Choose multiple weighted pairs
421        /// * Argument "amount" is the number of pairs to return.
422        /// * Argument "scores" is the list of reproductive fitness scores.
423        /// * Returns a list of pairs of parents to mate together.
424        ///   The parents are specified as indices into the scores list.
425        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
426            let rng = &mut rand::thread_rng();
427            self.0.pairs(rng, amount, scores)
428        }
429        /// Choose multiple weighted
430        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
431            let rng = &mut rand::thread_rng();
432            self.0.select(rng, amount, scores)
433        }
434        /// Probability distribution function
435        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
436            <super::RankedLinear as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
437        }
438    }
439
440    #[pymethods]
441    impl RankedExponential {
442        #[new]
443        fn new(median: usize) -> PyResult<Self> {
444            if median > 0 {
445                Ok(Self(super::RankedExponential(median)))
446            } else {
447                Err(PyValueError::new_err(
448                    "argument \"median\" is less than one",
449                ))
450            }
451        }
452        fn __str__(&self) -> String {
453            format!("mate_selection.RankedExponential({})", self.0 .0)
454        }
455        /// Choose multiple weighted pairs
456        /// * Argument "amount" is the number of pairs to return.
457        /// * Argument "scores" is the list of reproductive fitness scores.
458        /// * Returns a list of pairs of parents to mate together.
459        ///   The parents are specified as indices into the scores list.
460        fn pairs(&self, amount: usize, scores: Vec<f64>) -> Vec<[usize; 2]> {
461            let rng = &mut rand::thread_rng();
462            self.0.pairs(rng, amount, scores)
463        }
464        /// Choose multiple weighted
465        fn select(&self, amount: usize, scores: Vec<f64>) -> Vec<usize> {
466            let rng = &mut rand::thread_rng();
467            self.0.select(rng, amount, scores)
468        }
469        /// Probability distribution function
470        fn pdf(&self, scores: Vec<f64>) -> Vec<f64> {
471            <super::RankedExponential as MateSelection<rand::rngs::ThreadRng>>::pdf(&self.0, scores)
472        }
473    }
474}
475
476impl<R: Rng + ?Sized> MateSelection<R> for Random {
477    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
478        scores.fill(1.0);
479        scores
480    }
481
482    fn pdf(&self, mut scores: Vec<f64>) -> Vec<f64> {
483        if !scores.is_empty() {
484            let p = 1.0 / scores.len() as f64;
485            scores.fill(p);
486        }
487        scores
488    }
489
490    fn select(&self, rng: &mut R, amount: usize, scores: Vec<f64>) -> Vec<usize> {
491        stochastic_universal_sampling::choose_multiple(rng, amount, scores.len())
492    }
493}
494
495impl<R: Rng + ?Sized> MateSelection<R> for Proportional {
496    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
497        // Replace negative & invalid values with zero.
498        for x in scores.iter_mut() {
499            *x = x.max(0.0);
500        }
501        scores
502    }
503}
504
505impl<R: Rng + ?Sized> MateSelection<R> for Normalized {
506    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
507        let cutoff = self.0;
508        assert!(cutoff.is_finite(), "argument \"cutoff\" is not finite");
509
510        // Find and normalize by the average score.
511        let mean = scores.iter().sum::<f64>() / scores.len() as f64;
512        for x in scores.iter_mut() {
513            *x -= mean;
514        }
515        // Find and normalize by the standard deviation of the scores.
516        let var = scores.iter().map(|x| x.powi(2)).sum::<f64>() / scores.len() as f64;
517        let std = var.sqrt();
518        for x in scores.iter_mut() {
519            // Shift the entire distribution and cutoff all scores which
520            // are less than zero.
521            *x = (*x / std - cutoff).max(0.0);
522        }
523        scores
524    }
525}
526
527fn arg_nth_max(amount: usize, data: &[f64]) -> Vec<usize> {
528    if amount == 0 {
529        return vec![];
530    }
531    let pivot = data.len() - amount;
532    let mut data_copy = data.to_vec();
533    let (_, cutoff, _) = data_copy.select_nth_unstable_by(pivot, f64::total_cmp);
534    let cutoff = *cutoff;
535    let mut index = Vec::with_capacity(amount);
536    for (i, x) in data.iter().enumerate() {
537        if *x >= cutoff {
538            index.push(i)
539        }
540    }
541    // Discard extra elements which are equal to the cutoff.
542    if index.len() > amount {
543        let mut num_discard = index.len() - amount;
544        for cursor in (0..index.len()).rev() {
545            if data[index[cursor]] == cutoff {
546                index.swap_remove(cursor);
547                num_discard -= 1;
548                if num_discard == 0 {
549                    break;
550                }
551            }
552        }
553    }
554    index
555}
556
557fn zero_and_write_sparse(data: &mut [f64], index: &[usize], value: f64) {
558    data.fill(0.0);
559    for i in index {
560        data[*i] = value;
561    }
562}
563
564impl Best {
565    fn args(&self) -> usize {
566        let number = self.0;
567        assert!(number > 0, "argument \"number\" is less than one");
568        number
569    }
570}
571impl<R: Rng + ?Sized> MateSelection<R> for Best {
572    fn select(&self, rng: &mut R, amount: usize, scores: Vec<f64>) -> Vec<usize> {
573        let num_best = self.args();
574        let index = arg_nth_max(num_best, &scores);
575        let sample = stochastic_universal_sampling::choose_multiple(rng, amount, index.len());
576        sample.iter().map(|&s| index[s]).collect()
577    }
578    fn pdf(&self, mut scores: Vec<f64>) -> Vec<f64> {
579        let num_best = self.args();
580        let index = arg_nth_max(num_best, &scores);
581        zero_and_write_sparse(&mut scores, &index, 1.0 / num_best as f64);
582        scores
583    }
584    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
585        let num_best = self.args();
586        let index = arg_nth_max(num_best, &scores);
587        zero_and_write_sparse(&mut scores, &index, 1.0);
588        scores
589    }
590}
591
592impl Percentile {
593    fn get_index(&self, scores: &[f64]) -> Vec<usize> {
594        let percentile = self.0;
595        assert!(
596            (0.0..=1.0).contains(&percentile),
597            "argument \"percentile\" is out of bounds [0, 1]"
598        );
599        let num_eligible = ((1.0 - percentile) * scores.len() as f64).round() as usize;
600        arg_nth_max(num_eligible.max(1), &scores)
601    }
602}
603impl<R: Rng + ?Sized> MateSelection<R> for Percentile {
604    fn select(&self, rng: &mut R, amount: usize, scores: Vec<f64>) -> Vec<usize> {
605        let index = self.get_index(&scores);
606        let sample = stochastic_universal_sampling::choose_multiple(rng, amount, index.len());
607        sample.iter().map(|&s| index[s]).collect()
608    }
609    fn pdf(&self, mut scores: Vec<f64>) -> Vec<f64> {
610        let index = self.get_index(&scores);
611        zero_and_write_sparse(&mut scores, &index, 1.0 / index.len() as f64);
612        scores
613    }
614    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
615        let index = self.get_index(&scores);
616        zero_and_write_sparse(&mut scores, &index, 1.0);
617        scores
618    }
619}
620
621impl<R: Rng + ?Sized> MateSelection<R> for RankedLinear {
622    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
623        let selection_pressure = self.0;
624        assert!(
625            (0.0..=1.0).contains(&selection_pressure),
626            "argument \"selection_pressure\" is out of bounds [0, 1]"
627        );
628
629        let div_n = if scores.len() == 1 {
630            0.0 // Value does not matter, just don't crash.
631        } else {
632            1.0 / (scores.len() - 1) as f64
633        };
634        for (rank, index) in argsort(&scores).iter().enumerate() {
635            // Reverse the ranking from ascending to descending order
636            // so that rank 0 is the best & rank N-1 is the worst.
637            let rank = scores.len() - 1 - rank;
638            // Scale the ranking into the range [0, 1].
639            let rank = rank as f64 * div_n;
640            scores[*index] = 1.0 + selection_pressure - 2.0 * selection_pressure * rank;
641        }
642        scores
643    }
644}
645
646impl<R: Rng + ?Sized> MateSelection<R> for RankedExponential {
647    fn sample_weight(&self, mut scores: Vec<f64>) -> Vec<f64> {
648        let median = self.0;
649        assert!(median > 0, "argument \"median\" is less than one");
650        for (rank, index) in argsort(&scores).iter().enumerate() {
651            let rank = scores.len() - rank - 1;
652            scores[*index] = (-(2.0_f64.ln()) * rank as f64 / median as f64).exp();
653        }
654        scores
655    }
656}
657
658fn argsort(scores: &[f64]) -> Vec<usize> {
659    let mut argsort: Vec<_> = (0..scores.len()).collect();
660    argsort.sort_unstable_by(|a, b| f64::total_cmp(&scores[*a], &scores[*b]));
661    argsort
662}
663
664/// This helps avoid mating an individual with itself.
665fn reduce_repeats(data: &mut [usize]) {
666    debug_assert!(is_even(data.len()));
667    // Simple quadratic greedy algorithm for breaking up pairs of repeated elements.
668    // First search for pairs of repeated values.
669    'outer: for cursor in (0..data.len()).step_by(2) {
670        let value = data[cursor];
671        if value == data[cursor + 1] {
672            // Then find a different value to swap with.
673            for search in (cursor + 2..data.len()).step_by(2) {
674                if data[search] != value && data[search + 1] != value {
675                    data.swap(cursor, search);
676                    continue 'outer;
677                }
678            }
679            for search in (0..cursor).step_by(2) {
680                if data[search] != value && data[search + 1] != value {
681                    data.swap(cursor, search);
682                    continue 'outer;
683                }
684            }
685        }
686    }
687}
688
689/// Transmute the vector of samples into pairs of samples, without needlessly copying the data.
690fn transmute_vec_to_pairs(data: Vec<usize>) -> Vec<[usize; 2]> {
691    // Check that there are an even number of values in the vector.
692    assert!(is_even(data.len()));
693    // Check the data alignment.
694    assert_eq!(
695        std::mem::align_of::<usize>(),
696        std::mem::align_of::<[usize; 2]>()
697    );
698    // Take manual control over the data vector.
699    let mut data = std::mem::ManuallyDrop::new(data);
700    unsafe {
701        // Disassemble the vector.
702        let ptr = data.as_mut_ptr();
703        let mut len = data.len();
704        let mut cap = data.capacity();
705        // Transmute the vector.
706        let ptr = std::mem::transmute::<*mut usize, *mut [usize; 2]>(ptr);
707        len /= 2;
708        cap /= 2;
709        // Reassemble and return the data.
710        Vec::from_raw_parts(ptr, len, cap)
711    }
712}
713
714const fn is_even(x: usize) -> bool {
715    x & 1 == 0
716}
717
718#[cfg(test)]
719mod tests {
720    use super::*;
721
722    fn flatten_and_sort(pairs: &Vec<[usize; 2]>) -> Vec<usize> {
723        let mut data: Vec<usize> = pairs.iter().flatten().copied().collect();
724        data.sort_unstable();
725        data
726    }
727
728    #[test]
729    fn is_even() {
730        assert!(super::is_even(0));
731        assert!(!super::is_even(1));
732        assert!(super::is_even(2));
733        assert!(!super::is_even(3));
734    }
735
736    #[test]
737    fn no_data() {
738        let rng = &mut rand::thread_rng();
739        let pairs = Proportional.pairs(rng, 0, vec![]);
740        assert!(pairs.is_empty());
741
742        let pairs = Proportional.pairs(rng, 0, vec![1.0, 2.0, 3.0]);
743        assert!(pairs.is_empty());
744    }
745
746    #[test]
747    fn truncate_top_one() {
748        let rng = &mut rand::thread_rng();
749        // Truncate all but the single best individual.
750        let algo = Percentile(0.99);
751        let weights: Vec<f64> = (0..100).map(|x| x as f64 / 100.0).collect();
752        let pairs = algo.pairs(rng, 1, weights);
753        assert!(pairs == [[99, 99]]);
754    }
755
756    #[test]
757    fn truncate_top_two() {
758        let rng = &mut rand::thread_rng();
759        // Truncate all but the best two individuals.
760        let algo = Percentile(0.98);
761        let weights: Vec<f64> = (0..100).map(|x| x as f64 / 100.0).collect();
762        let pairs = algo.pairs(rng, 1, weights);
763        assert!(pairs == [[98, 99]] || pairs == [[99, 98]]);
764    }
765
766    #[test]
767    fn truncate_none() {
768        let rng = &mut rand::thread_rng();
769        // Truncate none of the individuals.
770        let algo = Percentile(0.0);
771        let weights: Vec<f64> = (0..100).map(|x| x as f64 / 100.0).collect();
772        let pairs = algo.pairs(rng, 50, weights);
773        let selected = flatten_and_sort(&pairs);
774        assert_eq!(selected, (0..100).collect::<Vec<_>>());
775    }
776
777    #[test]
778    fn truncate_all() {
779        let rng = &mut rand::thread_rng();
780        // Truncating all individuals should actually just return the single
781        // best individual. This situation happens when building the starting
782        // population.
783        let algo = Percentile(0.999_999_999); // Technically less than one.
784        let weights: Vec<f64> = (0..100).map(|x| x as f64 / 100.0).collect();
785        let pairs = algo.pairs(rng, 1, weights);
786        assert!(pairs == [[99, 99]]);
787    }
788
789    #[test]
790    fn all_equal_to_the_best() {
791        let rng = &mut rand::thread_rng();
792        Best(3).select(rng, 1, vec![4.0, 4.0, 4.0, 4.0]);
793    }
794
795    #[test]
796    fn propotional() {
797        let rng = &mut rand::thread_rng();
798        // All scores are equal, proportional should select all of the items.
799        let weights = vec![1.0; 10];
800        let algo = Proportional;
801        let selected = flatten_and_sort(&algo.pairs(rng, 5, weights));
802        assert_eq!(selected, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
803    }
804
805    #[test]
806    fn propotional_outlier() {
807        let rng = &mut rand::thread_rng();
808        // Index 0 is an outlier. Proportional selection should allow the
809        // outlier to dominate the sample. The other items should not be selected.
810        let weights = vec![1000_000_000_000_000.0, 1.0, 1.0, 1.0];
811        let algo = Proportional;
812        let selected = flatten_and_sort(&algo.pairs(rng, 10, weights));
813        let inliers: Vec<_> = selected.iter().filter(|&idx| *idx != 0).collect();
814        assert!(inliers.is_empty());
815    }
816
817    #[test]
818    fn propotional_negative() {
819        let rng = &mut rand::thread_rng();
820        // One score is extremely negative and another is NAN.
821        // Proportional should ignore them.
822        let mut weights = vec![1.0; 12];
823        weights[5] = -100.0;
824        weights[6] = f64::NAN;
825        let algo = Proportional;
826        let selected = flatten_and_sort(&algo.pairs(rng, 5, weights));
827        assert_eq!(selected, [0, 1, 2, 3, 4, 7, 8, 9, 10, 11]);
828    }
829
830    #[test]
831    fn normalized() {
832        let rng = &mut rand::thread_rng();
833        // Normalize can deal with negative scores, it does not care about their absolute values.
834        let weights = vec![-20.0, -12.0, -11.0, -10.5, -10.0, -9.5, -9.0, -8.0, 0.0];
835        const MEAN_IDX: usize = 4;
836        const MAX_IDX: usize = 8;
837        let cutoff = -0.01;
838        let algo = Normalized(cutoff);
839        let selected = flatten_and_sort(&algo.pairs(rng, 2, weights));
840        // Only scores greater than the mean should have been selected.
841        assert!(selected.iter().all(|&x| x >= MEAN_IDX));
842        // The sample should contain the highest score, but not be dominated by it.
843        assert!(selected.contains(&MAX_IDX));
844        assert!(!selected.iter().all(|&x| x == MAX_IDX));
845    }
846
847    #[test]
848    fn ranked_linear() {
849        let rng = &mut rand::thread_rng();
850        // Index 0 is an outlier.
851        // Ranking the scores should prevent the outlier from dominating.
852        let weights = vec![1000_000_000_000_000.0, 1.0, 1.0, 1.0];
853
854        // No selection pressure, should select all four scores.
855        let algo = RankedLinear(0.0);
856        let selected = flatten_and_sort(&algo.pairs(rng, 2, weights));
857        assert_eq!(selected, vec![0, 1, 2, 3]);
858    }
859
860    /// Finds those off-by-one errors.
861    #[test]
862    fn ranked_linear_single() {
863        let rng = &mut rand::thread_rng();
864        let weights = vec![4.0];
865        let algo = RankedLinear(0.5);
866        let selected = flatten_and_sort(&algo.pairs(rng, 1, weights));
867        assert_eq!(selected, vec![0, 0]);
868    }
869
870    #[test]
871    fn ranked_linear_outlier() {
872        let rng = &mut rand::thread_rng();
873        // Index 0 is an outlier.
874        // Ranking the scores should prevent the outlier from dominating.
875        let mut weights = vec![1000_000_000_000_000.0];
876        weights.append(&mut vec![1.0; 1000]);
877        // With selection pressure, the outlier still should not dominate the sampling.
878        let algo = RankedLinear(1.0);
879        let selected = flatten_and_sort(&algo.pairs(rng, 10, weights));
880        let inliers: Vec<_> = selected.iter().filter(|&idx| *idx != 0).collect();
881        assert!(!inliers.is_empty());
882    }
883
884    #[test]
885    fn ranked_exponential() {
886        let rng = &mut rand::thread_rng();
887        let test_cases = [
888            (1, 1, 2, 99), // Test selecting with one single weight does not crash.
889            (3, 1, 4, 1),
890            (100, 10, 100, 5),
891            (1000, 10, 100, 5),
892            (10_000, 100, 10_000, 20),
893            (10_000, 1000, 10_000, 50),
894        ];
895        for (num, median, sample, tolerance) in test_cases {
896            let weights: Vec<f64> = (0..num).map(|x| x as f64).collect();
897            let algo = RankedExponential(median);
898            assert_eq!(sample, (sample / 2) * 2); // Sample count needs to be even for this to work.
899            let selected = flatten_and_sort(&algo.pairs(rng, sample / 2, weights));
900            dbg!(&selected);
901            // Count how many elements are from the top ranked individuals.
902            let top_count_actual = selected
903                .iter()
904                .filter(|&&idx| idx >= (num - median))
905                .count();
906            let top_count_desired = sample / 2;
907            dbg!(num, median, sample, tolerance);
908            dbg!(top_count_actual, top_count_desired);
909            assert!((top_count_actual as i64 - top_count_desired as i64).abs() <= tolerance);
910            assert!(top_count_actual > 0);
911        }
912    }
913
914    /// Check that this avoids mating individuals with themselves.
915    #[test]
916    fn pairs() {
917        let rng = &mut rand::thread_rng();
918        // N is the population size.
919        // P is the number of mating pairs.
920        // R is the percent of the pairs that are duplicates.
921        for (n, max_r) in [
922            (2, 5.0),
923            (3, 4.0),
924            (4, 3.0),
925            (5, 3.0),
926            (10, 3.0),
927            (20, 2.0),
928            (100, 1.0),
929            //
930        ] {
931            let p = 10 * n;
932            // let p = 3;
933            let indices = Random.pairs(rng, p, vec![1.0; n]);
934            let num_repeats = indices.iter().filter(|[a, b]| a == b).count();
935            let percent_repeats = 100.0 * num_repeats as f64 / indices.len() as f64;
936
937            println!("Population Size = {n}, Mating Pairs = {p}, Repeats = {percent_repeats:.2} %");
938            dbg!(indices);
939            assert!(percent_repeats <= max_r);
940        }
941    }
942
943    /// Example of the trait used as an argument.
944    #[test]
945    fn argument() {
946        type Rng = rand::rngs::ThreadRng;
947
948        fn foobar(select: &dyn MateSelection<Rng>) {
949            let rng = &mut rand::thread_rng();
950            select.select(rng, 0, vec![]);
951        }
952
953        let x: &dyn MateSelection<Rng> = if rand::random() {
954            &Random
955        } else {
956            &Proportional
957        };
958
959        foobar(x);
960    }
961}
mate_selection/lib.rs

mate_selection/
lib.rs