gene-evo 3.0.0

Standard and novel genetic evolution algorithm implementations, using advanced rust std functionality and zero dependencies.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
//! Stochastic Training.
//! See the documentation for [`StochasticTrainer`] for more details.

use std::{
    fmt,
    random::RandomSource,
    sync::{mpmc, mpsc},
    thread::{self, ScopedJoinHandle},
};

use crate::{
    Genome, PopulationStats, TrainingReportStrategy, bounds, num_cpus, random_choice_weighted,
    random_f32,
};

// imports for documentation
#[allow(unused_imports)]
use crate::continuous::ContinuousTrainer;

// imports for documentation
#[allow(unused_imports)]
use crate::continuous;

/// This is one of two genetic algorithms in this crate, the "stochastic" strategy.
///
/// This is an implementation of the familiar and well known generation-based genetic algorithm, whereby
/// the full population is evaluated for fitness all at once, some portion is culled based on
/// their fitness score, and then the remaining empty spaces are repopulated from the surviving
/// genes, with choice likelihood based on their relative fitness scores. Each iteration of this
/// process is a single "generation" of the algorithm.
///
/// To use, simply construct a new instance, and then call [`StochasticTrainer::train`] to begin the process.
/// For more detailed control over the finishing criteria, reporting strategy, and culling selection strategy,
/// call [`StochasticTrainer::train_custom`] instead.
pub struct StochasticTrainer<'scope, G> {
    /// A collection of all genes in the population, alongside
    /// their associated fitness scores, if computed at this point in the
    /// training process.
    ///
    /// Note: members of the population
    /// in this trainer are unsorted, unlike in [`ContinuousTrainer`]
    pub gene_pool: Vec<(G, Option<f32>)>,

    /// The current number of generations the trainer has iterated.
    pub generation: usize,

    /// The mutation rate of newly reproduced children.
    pub mutation_rate: f32,

    /// The proportion of newly reproduced children that are created as a result of
    /// crossbreeding vs mutations.
    ///
    /// Higher = more crossbreeding, lower = more mutations.
    /// Set to 1 to only create new children via crossbreeding, and 0 to only create new children
    /// via mutation.
    pub reproduction_type_proportion: f32,
    #[allow(unused)]
    worker_pool: Vec<ScopedJoinHandle<'scope, ()>>,
    work_submission: mpmc::Sender<(usize, G)>,
    work_reception: mpsc::Receiver<(usize, f32)>,
    population_size: usize,
}

impl<'scope, G> StochasticTrainer<'scope, G> {
    /// Construct a new trainer with a given population size, mutation rate, and reproduction type proportion.
    ///
    /// A [`RandomSource`] must be passed in order to seed the
    /// initial population.
    ///
    /// A reference to a [`thread::Scope`] must be passed as well in order
    /// to spawn the child worker threads for the lifetime of the trainer.
    pub fn new<R>(
        population_size: usize,
        mutation_rate: f32,
        reproduction_type_proportion: f32,
        rng: &mut R,
        scope: &'scope thread::Scope<'scope, '_>,
    ) -> Self
    where
        G: Genome + Send + 'scope,
        R: RandomSource,
    {
        let (work_submission, inbox) = mpmc::channel();
        let (outbox, work_reception) = mpsc::channel();
        let gene_pool = (0..population_size)
            .map(|_| (G::generate(rng), None))
            .collect();
        let worker_pool = (0..num_cpus())
            .map(|_| {
                let inbox = inbox.clone();
                let outbox = outbox.clone();
                scope.spawn(move || StochasticTrainer::<G>::worker_thread(inbox, outbox))
            })
            .collect();
        Self {
            gene_pool,
            worker_pool,
            work_submission,
            work_reception,
            generation: 0,
            population_size,
            mutation_rate,
            reproduction_type_proportion,
        }
    }

    fn worker_thread<I>(inbox: mpmc::Receiver<(I, G)>, outbox: mpsc::Sender<(I, f32)>)
    where
        G: Genome,
    {
        for (id, gene) in inbox {
            let result = gene.fitness();
            outbox.send((id, result)).unwrap();
        }
    }

    /// Evaluate the fitness of all genes in the population lacking fitness
    /// scores in parallel. 
    /// 
    /// All genes with a `None` as the second value in
    /// their pair in the [`StochasticTrainer::gene_pool`] will have their
    /// fitness evaluated and defined.
    pub fn eval(&mut self)
    where
        G: Clone,
    {
        let to_eval = self
            .gene_pool
            .iter()
            .enumerate()
            .filter_map(|(i, (gene, score))| score.is_none().then(|| (i, gene.clone())))
            .map(|p| self.work_submission.send(p).unwrap())
            .count();
        for (i, score) in self.work_reception.iter().take(to_eval) {
            self.gene_pool[i].1 = Some(score);
        }
    }

    /// Retrieve an iterator over all the individual fitness scores
    /// in the population.
    pub fn scores(&self) -> impl Iterator<Item = f32> {
        self.gene_pool.iter().filter_map(|(_, s)| *s)
    }

    /// Prune the population using the given `selection_strategy`. 
    /// 
    /// The strategy should take a float between 0-1 representing the percentile of the
    /// fitness score associated (higher is better), and return
    /// a bool representing whether or not to retain the gene associated with this score
    /// in the population.
    ///
    /// Requires a [`RandomSource`] to pass forward to the `selection_strategy`, in case
    /// some randomness is desired in the strategy's decision method.
    pub fn prune<R>(&mut self, mut selection_strategy: impl FnMut(f32, &mut R) -> bool, rng: &mut R)
    where
        R: RandomSource,
    {
        let (min_score, max_score) = bounds(self.scores()).unwrap();
        let score_range = max_score - min_score;
        self.gene_pool.retain(|(_, score)| {
            let Some(score) = score else {
                return true;
            };
            let percentile = (*score - min_score) / score_range;
            (selection_strategy)(percentile, rng)
        });
    }

    /// Reproduce new children into the population, up to the population cap.
    /// 
    /// Chooses parent genes from the population to mutate or crossbreed, preferentially weighted by fitness
    /// score.
    ///
    /// Requires a [`RandomSource`] used to perform
    /// mutations on the new children, and to select a random parent to produce offspring from.
    pub fn reproduce<R>(&mut self, rng: &mut R)
    where
        R: RandomSource,
        G: Genome + Clone,
    {
        let (min_score, max_score) = bounds(self.scores()).unwrap();
        let score_range = max_score - min_score;
        let mut total_score = 0.0;
        let percentile_pairs: Vec<_> = self
            .gene_pool
            .iter()
            .enumerate()
            .map(|(i, (_, score))| {
                let Some(score) = score else {
                    return (i, 0.0);
                };
                let percentile = (*score - min_score) / score_range;
                total_score += percentile;
                (i, percentile)
            })
            .collect();
        while self.gene_pool.len() < self.population_size {
            let should_crossbreed = random_f32(rng) < self.reproduction_type_proportion;
            let mut choose_parent_index = || random_choice_weighted(&percentile_pairs, rng);
            let new_child = if should_crossbreed {
                // same mother & father could potentially be chosen; this is acceptable,
                // as it is unlikely with a sufficiently large populations, and the only
                // outcome is a gene that is identical to the singular parent, equivalent
                // to a gene chosen for mutation with mutation rate 0.
                let &mother_index = (choose_parent_index)();
                let &father_index = (choose_parent_index)();
                self.gene_pool[mother_index]
                    .0
                    .crossbreed(&self.gene_pool[father_index].0, rng)
            } else {
                let &parent_index = (choose_parent_index)();
                let mut new_child = self.gene_pool[parent_index].0.clone();
                new_child.mutate(self.mutation_rate, rng);
                new_child
            };
            self.gene_pool.push((new_child, None));
        }
    }

    /// Perform one iteration of the genetic evolution process.
    ///
    /// 1. Increment the generation
    /// 2. Evaluate the fitness for all genes in the current population
    /// 3. Compute statistics about the current population's fitness scores, if necessary
    /// 4. Prune the population using the passed `selection_strategy` (see [`StochasticTrainer::prune`] for a description of what the selection strategy is)
    /// 5. Reproduce the population to the current cap using the passed [`RandomSource`]
    /// 6. Generate a report about the current generation's statistics, if necessary
    ///
    /// As this function is responsible for progress reporting, it receives a reference
    /// to a [`TrainingReportStrategy`].
    /// 
    /// This function is used internally by the training process; it should
    /// typically not be called directly unless the user knows what they're doing.
    pub fn step<R>(
        &mut self,
        selection_strategy: impl FnMut(f32, &mut R) -> bool,
        mut reporting_strategy: Option<
            &mut TrainingReportStrategy<
                impl FnMut(TrainingCriteriaMetrics) -> bool,
                impl FnMut(TrainingStats),
            >,
        >,
        rng: &mut R,
    ) where
        G: Clone + Genome,
        R: RandomSource,
    {
        self.generation += 1;
        self.eval();

        let stats = reporting_strategy
            .as_mut()
            .and_then(|s| (s.should_report)(self.metrics()).then(|| self.stats()));
        self.prune(selection_strategy, rng);
        self.reproduce(rng);
        if let (Some(stats), Some(reporting_strategy)) = (stats, &mut reporting_strategy) {
            (reporting_strategy.report_callback)(stats);
        }
    }

    /// Begin training, finishing once the number of generations passed
    /// have been reached.
    ///
    /// A [`RandomSource`] must be passed as a source of randomness
    /// for choosing parents, mutating genes to produce new offspring,
    /// and to be passed to the selection strategy when
    /// deciding to prune a given gene from the gene pool.
    pub fn train<R>(&mut self, generations: usize, rng: &mut R) -> G
    where
        G: Clone + Genome,
        R: RandomSource,
    {
        self.train_custom(
            default_selection_strategy,
            |m| m.generation <= generations,
            Some(default_reporting_strategy()),
            rng,
        )
    }

    /// Begin training with more detailed custom parameters.
    ///
    /// A manual `selection_strategy` is passed, which determines which genes to prune.
    /// The strategy should take a float between 0-1 representing the percentile of the
    /// fitness score associated (higher is better), and return
    /// a bool representing whether or not to retain the gene associated with this score
    /// in the population.
    /// To mimic the default selection functionality, pass a reference to [`default_selection_strategy`].
    ///
    /// Instead of a specific generation count cutoff point, a function `train_criteria` is passed in,
    /// which takes in an instance of [`TrainingCriteriaMetrics`] and outputs a bool.
    /// This allows greater control over exactly what criteria to finish training under.
    ///
    /// Additionally, the user may pass a `reporting_strategy`, which determines the conditions
    /// and method under which periodic statistical reporting of the population is performed.
    /// Pass `None` to disable reporting entirely, otherwise pass `Some` with an instance of
    /// [`TrainingReportStrategy`] to define the two methods necessary to manage reporting.
    /// To mimic the default reporting strategy, pass the result of [`default_reporting_strategy()`] wrapped
    /// in `Some()`.
    ///
    /// A [`RandomSource`] must be passed as a source of randomness
    /// for selecting parent genes, mutating genes to produce new offspring,
    /// and to be passed to the selection strategy when
    /// deciding to prune a given gene from the gene pool.
    pub fn train_custom<R>(
        &mut self,
        mut selection_strategy: impl FnMut(f32, &mut R) -> bool,
        mut training_criteria: impl FnMut(TrainingCriteriaMetrics) -> bool,
        mut reporting_strategy: Option<
            TrainingReportStrategy<
                impl FnMut(TrainingCriteriaMetrics) -> bool,
                impl FnMut(TrainingStats),
            >,
        >,
        rng: &mut R,
    ) -> G
    where
        G: Clone + Genome,
        R: RandomSource,
    {
        loop {
            self.step::<R>(&mut selection_strategy, reporting_strategy.as_mut(), rng);
            if !(training_criteria)(self.metrics()) {
                break;
            }
        }
        self.gene_pool
            .iter()
            .filter_map(|(g, score)| score.map(|s| (g, s)))
            .max_by(|a, b| a.1.total_cmp(&b.1))
            .unwrap()
            .0
            .clone()
    }

    /// Generate population stats for the current state of this trainer.
    ///
    /// This function is called whenever the reporting strategy is asked
    /// to produce a report about the current population, but it may also be called
    /// manually here.
    pub fn stats(&self) -> TrainingStats {
        TrainingStats {
            population_stats: self.scores().collect(),
            generation: self.generation,
        }
    }

    /// Generate training criteria metrics for the current state of this trainer.
    ///
    /// This is a strict subset of the data available in an instance of [`TrainingStats`]
    /// returned from calling [`StochasticTrainer::stats`]. However, these
    /// metrics were chosen specifically for their computation efficiency, and thus can be
    /// re-evaluated frequently with minimal cost. These metrics are used both to determine
    /// whether or not to continue training, and whether or not to display a report about
    /// training progress.
    ///
    /// Unfortunately, due to the unsorted nature of the gene pool in efficient stochastic training,
    /// these statistics are much more minimal than the ones available in
    /// [`continuous::TrainingCriteriaMetrics`]
    pub fn metrics(&self) -> TrainingCriteriaMetrics {
        TrainingCriteriaMetrics {
            generation: self.generation,
        }
    }
}

/// A collection of relevant & quick to compute metrics that
/// can be used to inform whether or not to continue training.
///
/// Unfortunately, because the gene population isn't inherently
/// ordered like in [`continuous::ContinuousTrainer`],
/// the only metric that can be presented quickly for this purpose
/// is the current `generation` of the training process.
pub struct TrainingCriteriaMetrics {
    /// Total number of generations that have elapsed.
    pub generation: usize,
}

/// A collection of statistics about the population as a whole.
///
/// Relatively more expensive to compute than training metrics, so
/// should be computed infrequently.
#[derive(Clone, Copy, Debug)]
pub struct TrainingStats {
    /// A collection of standard population stats: see [`PopulationStats`]
    /// for more information
    pub population_stats: PopulationStats,

    /// Total number of generations that have elapsed.
    pub generation: usize,
}

impl fmt::Display for TrainingStats {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "generation {} {}",
            self.generation, self.population_stats
        )
    }
}

/// Returns the default reporting strategy used by [`StochasticTrainer::train`].
///
/// Logs population statistics to the console after every generation.
pub fn default_reporting_strategy()
-> TrainingReportStrategy<impl FnMut(TrainingCriteriaMetrics) -> bool, impl FnMut(TrainingStats)> {
    TrainingReportStrategy {
        should_report: |_| true,
        report_callback: |s| println!("{s}"),
    }
}

/// The default selection strategy used by [`StochasticTrainer::train`].
///
/// Randomly selects to retain or cull the given fitness score, with a
/// likelihood weighted higher the closer the score is to 1, and lower the closer
/// it is to 0.
pub fn default_selection_strategy<R>(score: f32, rng: &mut R) -> bool
where
    R: RandomSource,
{
    score > random_f32(rng)
}