meiosis 0.1.0

An evolutionary algorithm library with as many compile time checks as possible.
Documentation
use rand::seq::SliceRandom;
use rand::Rng;

use crate::operators::selection::{select_multiple_from_single, NonSelected, Selected, Selection};
use crate::phenotype::Phenotype;
use crate::species::Evaluated;

/// # Notes
/// Roulette Wheel Selection is allowed to select the same [Phenotype] multiple times,
/// and so we require those to be cloneable.
/// # External Resources
/// <https://en.wikipedia.org/wiki/Fitness_proportionate_selection>
#[derive(Copy, Clone, Debug)]
// We want to configure and construct these structs directly for convenience.
#[allow(clippy::exhaustive_structs)]
pub struct RouletteWheelSelection {
    /// Number of parents to select.
    ///
    /// Leaving this on `None` will select as many as there are in the population.
    /// Setting it to a greater value than the population size will result in the following
    /// recombination having more parents available with the fittest ones representing a big part of it.
    pub max_selections: Option<usize>,
}

impl<PHENOTYPE> Selection<PHENOTYPE> for RouletteWheelSelection
where
    PHENOTYPE: Phenotype + Clone,
{
    fn select<RNG>(
        &self,
        rng: &mut RNG,
        population: Vec<Evaluated<PHENOTYPE>>,
    ) -> (Selected<PHENOTYPE>, NonSelected<PHENOTYPE>)
    where
        RNG: Rng,
    {
        select_multiple_from_single(self, population, rng, self.max_selections)
    }

    // this is going to get rather complicated once the type becomes generic, but for now it's okay
    #[allow(clippy::float_arithmetic)]
    // We know the population can not be empty at this point. Even if it was through a bug, that
    // means an invariant was not upheld, thus panicking is okay.
    #[allow(clippy::expect_used)]
    fn select_single<RNG>(
        &self,
        population: Vec<Evaluated<PHENOTYPE>>,
        rng: &mut RNG,
    ) -> (Evaluated<PHENOTYPE>, NonSelected<PHENOTYPE>)
    where
        RNG: Rng,
    {
        // sum up all fitnesses
        let fitness_sum = population
            .iter()
            .map(|e| e.fitness)
            .reduce(|accumulator, item| accumulator + item)
            .expect("selection population should contain members");

        // sometimes all solutions are bad, so we simply pick a random one
        if fitness_sum == 0.0_f64 {
            return (
                population.choose(rng).expect("population should not be empty").clone(),
                population,
            );
        }

        let random_fixpoint = rng.gen_range(0.0_f64..fitness_sum);
        let mut partial_sum = 0.0_f64;

        for evaluated_member in &population {
            // "walk" along the "roulette wheel", adding up already considered individuals
            partial_sum += evaluated_member.fitness;
            if partial_sum > random_fixpoint {
                return (evaluated_member.clone(), population);
            }
        }

        unreachable!()
    }
}

/// Fitness proportionate selection using stochastic acceptance.
/// # Notes
/// Roulette Wheel Selection is allowed to select the same [Phenotype] multiple times,
/// and so we require those to be cloneable.
/// # External Resources
/// <https://arxiv.org/abs/1109.3627>
#[derive(Copy, Clone, Debug, Default)]
// We want to configure and construct these structs directly for convenience.
#[allow(clippy::exhaustive_structs)]
pub struct StochasticAcceptance {
    /// Number of parents to select.
    ///
    /// Leaving this on `None` will select as many as there are in the population.
    /// Setting it to a greater value than the population size will result in the following
    /// recombination having more parents available with the fittest ones representing a big part of it.
    pub max_selections: Option<usize>,
}

impl<PHENOTYPE> Selection<PHENOTYPE> for StochasticAcceptance
where
    PHENOTYPE: Phenotype + Clone,
{
    #[allow(clippy::expect_used)] // if any of these fail it is an irrecoverable error anyway
    fn select<RNG>(
        &self,
        rng: &mut RNG,
        population: Vec<Evaluated<PHENOTYPE>>,
    ) -> (Selected<PHENOTYPE>, NonSelected<PHENOTYPE>)
    where
        RNG: Rng,
    {
        let selections = self.max_selections.unwrap_or(population.len());
        let max_fitness = population
            .iter()
            .map(|e| e.fitness)
            .reduce(f64::max)
            .expect("selection population should contain members");

        let mut selected = Vec::with_capacity(selections);

        while selected.len() < selections {
            // Select randomly one of the individuals.
            // The selection is done with uniform probability (1/N),
            // which does not depend on the individual's fitness.
            let chosen = population.choose(rng).expect("population should not be empty");

            // this is fine here as the index can not be greater than the length-1, thus guaranteed to be in range
            #[allow(clippy::indexing_slicing)]
            // this won't be necessary the moment we introduce generic fitness values
            #[allow(clippy::float_arithmetic)]
            // Calculate the probability of the selected individual being selected.
            let probability = if max_fitness == 0.0_f64 {
                // If there is no member with any fitness (for example because their fitness function
                // is not well enough designed, or impossible to design for "half-way" correct solutions),
                // we assume 50/50.
                0.5_f64
            } else {
                chosen.fitness / max_fitness
            };

            // If it is accepted, we return it
            if rng.gen_bool(probability) {
                selected.push(chosen.clone());
            }
        }

        (selected, population)
    }

    /// # Panics
    /// This function panics if the input population is empty.
    // We know the population can not be empty at this point. Even if it was through a bug, that
    // means an invariant was not upheld, thus panicking is okay.
    #[allow(clippy::expect_used)]
    fn select_single<RNG>(
        &self,
        population: Vec<Evaluated<PHENOTYPE>>,
        rng: &mut RNG,
    ) -> (Evaluated<PHENOTYPE>, NonSelected<PHENOTYPE>)
    where
        RNG: Rng,
    {
        let max_fitness = population
            .iter()
            .map(|e| e.fitness)
            .reduce(f64::max)
            .expect("selection population should contain members");

        // This loop *should* not get stuck, as we theoretically at some point will either
        // have a probability accepted, or we randomly select the member with the highest fitness.
        loop {
            // Select randomly one of the individuals.
            // The selection is done with uniform probability (1/N),
            // which does not depend on the individual's fitness.
            let selected = population.choose(rng).expect("population should not be empty");

            // this is fine here as the index can not be greater than the length-1, thus guaranteed to be in range
            #[allow(clippy::indexing_slicing)]
            // this won't be necessary the moment we introduce generic fitness values
            #[allow(clippy::float_arithmetic)]
            // Calculate the probability of the selected individual being selected.
            let probability = if max_fitness == 0.0_f64 {
                // If there is no member with any fitness (for example because their fitness function
                // is not well enough designed, or impossible to design for "half-way" correct solutions),
                // we assume 50/50.
                0.5_f64
            } else {
                selected.fitness / max_fitness
            };

            // If it is accepted, we return it
            if rng.gen_bool(probability) {
                return (selected.clone(), population);
            }
        }
    }
}

/// Fitness proportionate selection
/// # Panics
/// This strategy does not implement `select_single` and thus will panic if called using [Self].
/// # External Resources
/// <https://en.wikipedia.org/wiki/Stochastic_universal_sampling>
struct StochasticUniversalSampling;

impl<PHENOTYPE> Selection<PHENOTYPE> for StochasticUniversalSampling
where
    PHENOTYPE: Phenotype,
{
    #[allow(clippy::unimplemented)] // I haven't had time to implement this yet
    fn select<R>(
        &self,
        _rng: &mut R,
        _population: Vec<Evaluated<PHENOTYPE>>,
    ) -> (Selected<PHENOTYPE>, NonSelected<PHENOTYPE>)
    where
        R: Rng,
    {
        // SUS(Population, N)
        //     F := total fitness of Population
        //     N := number of offspring to keep
        //     P := distance between the pointers (F/N)
        //     Start := random number between 0 and P
        //     Pointers := [Start + i*P | i in [0..(N-1)]]
        //     return RWS(Population,Pointers)
        //
        // RWS(Population, Points)
        //     Keep = []
        //     for P in Points
        //         I := 0
        //         while fitness sum of Population[0..I] < P
        //             I++
        //         add Population[I] to Keep
        //     return Keep
        unimplemented!()
    }
}

#[cfg(test)]
mod tests {
    use crate::species::Evaluated;
    use rand::rngs::SmallRng;
    use rand::SeedableRng;

    use super::{Phenotype, Selection, StochasticAcceptance};

    #[test]
    fn stochastic_acceptance_single() {
        impl Phenotype for String {
            type Genotype = String;

            fn to_genotype(self) -> Self::Genotype {
                self
            }
        }

        let mut rng = SmallRng::seed_from_u64(0);

        // over a run of thousands of iterations, with a defined probability, we expect a certain
        // number of selections to be of the same magnitude every time this test is run

        let mut members = vec![0.75, 0.2, 0.05]
            .into_iter()
            // to make this easy on us, we set the phenotype to be the fitness as well
            .map(|m| Evaluated {
                phenotype: m.to_string(),
                fitness: m,
                adjusted_fitness: 0.0
            })
            .collect::<Vec<_>>();

        let selection = StochasticAcceptance { max_selections: None };

        let mut high_chance_counter = 0;
        let mut low_chance_counter = 0;
        let mut miniscule_chance_counter = 0;

        let iterations = 100_000;

        #[allow(clippy::float_cmp)]
        for _ in 1..=iterations {
            let (selected, new_population) = selection.select_single(members, &mut rng);

            members = new_population;

            if selected.fitness == 0.75 {
                high_chance_counter += 1
            }

            if selected.fitness == 0.20 {
                low_chance_counter += 1
            }

            if selected.fitness == 0.05 {
                miniscule_chance_counter += 1
            }
        }

        assert!(
            76000 > high_chance_counter && high_chance_counter > 74000,
            "76000 > {} > 74000",
            high_chance_counter
        );
        assert!(
            21000 > low_chance_counter && low_chance_counter > 19000,
            "21000 > {} > 19000",
            low_chance_counter
        );
        assert!(
            6000 > miniscule_chance_counter && high_chance_counter > 4000,
            "6000 > {} > 4000",
            miniscule_chance_counter
        );

        // and just to make sure we haven't missed anything:
        assert_eq!(
            iterations,
            high_chance_counter + low_chance_counter + miniscule_chance_counter
        )
    }

    #[test]
    fn stochastic_acceptance() {
        let mut rng = SmallRng::seed_from_u64(0);

        // over a run of thousands of iterations, with a defined probability, we expect a certain
        // number of selections to be of the same magnitude every time this test is run

        let mut members = vec![0.75, 0.2, 0.05]
            .into_iter()
            // to make this easy on us, we set the phenotype to be the fitness as well
            .map(|m| Evaluated {
                phenotype: m.to_string(),
                fitness: m,
                adjusted_fitness: 0.0
            })
            .collect::<Vec<_>>();

        let selection = StochasticAcceptance {
            // to make sure this behaves as `select_single`, we set it to 1
            // TODO: improve this so the test actually tests a `population.len() - 1` case
            max_selections: Some(1),
        };

        let mut high_chance_counter = 0;
        let mut low_chance_counter = 0;
        let mut miniscule_chance_counter = 0;

        let iterations = 100_000;

        #[allow(clippy::indexing_slicing, clippy::float_cmp)]
        for _ in 1..=iterations {
            let (selected, new_population) = selection.select(&mut rng, members);

            assert_eq!(selected.len(), 1);

            members = new_population;

            if selected[0].fitness == 0.75 {
                high_chance_counter += 1
            }

            if selected[0].fitness == 0.20 {
                low_chance_counter += 1
            }

            if selected[0].fitness == 0.05 {
                miniscule_chance_counter += 1
            }
        }

        assert!(
            76000 > high_chance_counter && high_chance_counter > 74000,
            "76000 > {} > 74000",
            high_chance_counter
        );
        assert!(
            21000 > low_chance_counter && low_chance_counter > 19000,
            "21000 > {} > 19000",
            low_chance_counter
        );
        assert!(
            6000 > miniscule_chance_counter && high_chance_counter > 4000,
            "6000 > {} > 4000",
            miniscule_chance_counter
        );

        // and just to make sure we haven't missed anything:
        assert_eq!(
            iterations,
            high_chance_counter + low_chance_counter + miniscule_chance_counter
        )
    }
}