1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
//! Representations capable of Genetic Programming. use itertools::Itertools; use polytype::TypeSchema; use rand::{seq, Rng}; use std::cmp::Ordering; use Task; /// Parameters for genetic programming. pub struct GPParams { pub population_size: usize, pub tournament_size: usize, /// Probability for a mutation. If mutation doesn't happen, the crossover will happen. pub mutation_prob: f64, /// The number of new children added to the population with each step of evolution. /// Traditionally, this would be set to 1. If it is larger than 1, mutations and crossover will /// be repeated until the threshold of `n_delta` is met. pub n_delta: usize, } /// A kind of representation suitable for **genetic programming**. /// /// Implementors of `GP` must provide methods for [`genesis`], [`mutate`], [`crossover`]. A /// [`Task`] provides a fitness function via its [`oracle`]: we adopt the convention that smaller /// values are better (so one can think of the [`oracle`] as providing a measure of error). To opt /// out of the default tournament-based selection, implementors may override the [`tournament`] /// method. /// /// Typically, you will interact with this trait using the [`init`] and [`evolve`] methods on /// existing implementations, such as [`pcfg::Grammar`]. /// /// The provided methods (namely [`init`] and [`evolve`]) manipulate a _population_, which is a /// sorted list of programs according to fitness. If you otherwise modify the population such that /// it is no longer sorted appropriately, these methods will behave incorrectly. /// /// # Examples /// /// This example has a lot of code, but it is quite straightforward. Given a PCFG and an evaluator /// for sentences, a task is created by measuring proximity of an evaluted sentence to some target. /// We finally pick some parameters and evolve a population for a few hundred generations. /// /// ``` /// #[macro_use] /// extern crate polytype; /// extern crate programinduction; /// extern crate rand; /// use programinduction::pcfg::{self, Grammar, Rule}; /// use programinduction::{GPParams, Task, GP}; /// use rand::{rngs::SmallRng, SeedableRng}; /// /// fn evaluator(name: &str, inps: &[i32]) -> Result<i32, ()> { /// match name { /// "0" => Ok(0), /// "1" => Ok(1), /// "plus" => Ok(inps[0] + inps[1]), /// _ => unreachable!(), /// } /// } /// /// fn main() { /// let g = Grammar::new( /// tp!(EXPR), /// vec![ /// Rule::new("0", tp!(EXPR), 1.0), /// Rule::new("1", tp!(EXPR), 1.0), /// Rule::new("plus", tp!(@arrow[tp!(EXPR), tp!(EXPR), tp!(EXPR)]), 1.0), /// ], /// ); /// let target = 6; /// let task = Task { /// oracle: Box::new(|g: &Grammar, expr| { /// if let Ok(n) = g.eval(expr, &evaluator) { /// (n - target).abs() as f64 // numbers close to target /// } else { /// std::f64::INFINITY /// } /// }), /// tp: ptp!(EXPR), /// observation: (), /// }; /// /// let gpparams = GPParams { /// population_size: 10, /// tournament_size: 5, /// mutation_prob: 0.6, /// n_delta: 1, /// }; /// let params = pcfg::GeneticParams::default(); /// let generations = 1000; /// let rng = &mut SmallRng::from_seed([1u8; 16]); /// /// let mut pop = g.init(¶ms, rng, &gpparams, &task); /// for _ in 0..generations { /// g.evolve(¶ms, rng, &gpparams, &task, &mut pop) /// } /// /// // perfect winner is found! /// let &(ref winner, score) = &pop[0]; /// assert_eq!(6, g.eval(winner, &evaluator).unwrap()); /// assert_eq!(0.0, score); /// } /// ``` /// /// [`genesis`]: #tymethod.genesis /// [`mutate`]: #tymethod.mutate /// [`crossover`]: #tymethod.crossover /// [`tournament`]: #method.tournament /// [`init`]: #method.mutate /// [`evolve`]: #method.crossover /// [`Task`]: struct.Task.html /// [`oracle`]: struct.Task.html#structfield.oracle /// [`pcfg::Grammar`]: pcfg/struct.Grammar.html pub trait GP: Send + Sync + Sized { /// An Expression is a sentence in the representation. **Tasks are solved by Expressions**. type Expression: Clone + Send + Sync; /// Extra parameters for a representation go here. type Params; /// Create an initial population for a particular requesting type. fn genesis<R: Rng>( &self, params: &Self::Params, rng: &mut R, pop_size: usize, tp: &TypeSchema, ) -> Vec<Self::Expression>; /// Mutate a single program. fn mutate<R: Rng>( &self, params: &Self::Params, rng: &mut R, prog: &Self::Expression, ) -> Self::Expression; /// Perform crossover between two programs. There must be at least one child. fn crossover<R: Rng>( &self, params: &Self::Params, rng: &mut R, parent1: &Self::Expression, parent2: &Self::Expression, ) -> Vec<Self::Expression>; /// A tournament selects an individual from a population. fn tournament<'a, R: Rng>( &self, rng: &mut R, tournament_size: usize, population: &'a [(Self::Expression, f64)], ) -> &'a Self::Expression { seq::sample_iter(rng, 0..population.len(), tournament_size) .expect("tournament size was bigger than population") .into_iter() .map(|i| &population[i]) .max_by(|&&(_, ref x), &&(_, ref y)| x.partial_cmp(y).expect("found NaN")) .map(|&(ref expr, _)| expr) .expect("tournament cannot select winner from no contestants") } /// Initializes a population, which is a list of programs and their scores sorted by score. /// The most-fit individual is the first element in the population. fn init<R: Rng, O: Sync>( &self, params: &Self::Params, rng: &mut R, gpparams: &GPParams, task: &Task<Self, Self::Expression, O>, ) -> Vec<(Self::Expression, f64)> { let exprs = self.genesis(params, rng, gpparams.population_size, &task.tp); exprs .into_iter() .map(|expr| { let l = (task.oracle)(self, &expr); (expr, l) }) .sorted_by(|&(_, ref x), &(_, ref y)| x.partial_cmp(y).expect("found NaN")) } /// Evolves a population. This will repeatedly run a Bernoulli trial with parameter /// [`mutation_prob`] and perform mutation or crossover depending on the outcome until /// [`n_delta`] expressions are determined. /// /// [`mutation_prob`]: struct.GPParams.html#mutation_prob /// [`n_delta`]: struct.GPParams.html#n_delta fn evolve<R: Rng, O: Sync>( &self, params: &Self::Params, rng: &mut R, gpparams: &GPParams, task: &Task<Self, Self::Expression, O>, population: &mut Vec<(Self::Expression, f64)>, ) { let mut new_exprs = Vec::with_capacity(gpparams.n_delta); while new_exprs.len() < gpparams.n_delta { if rng.gen_bool(gpparams.mutation_prob) { let parent = self.tournament(rng, gpparams.tournament_size, population); let child = self.mutate(params, rng, parent); let fitness = (task.oracle)(self, &child); new_exprs.push((child, fitness)); } else { let parent1 = self.tournament(rng, gpparams.tournament_size, population); let parent2 = self.tournament(rng, gpparams.tournament_size, population); let children = self.crossover(params, rng, parent1, parent2); let mut scored_children = children .into_iter() .map(|child| { let fitness = (task.oracle)(self, &child); (child, fitness) }) .collect(); new_exprs.append(&mut scored_children); } } new_exprs.truncate(gpparams.n_delta); for child in new_exprs { sorted_place(child, population) } } } fn sorted_place<T>(child: (T, f64), pop: &mut Vec<(T, f64)>) { let mut size = pop.len(); if size == 0 { return; } let idx = { let mut base = 0usize; while size > 1 { let half = size / 2; let mid = base + half; // mid is always in [0, size), that means mid is >= 0 and < size. // mid >= 0: by definition // mid < size: mid = size / 2 + size / 4 + size / 8 ... let other = unsafe { pop.get_unchecked(mid) }; let cmp = other.1.partial_cmp(&child.1).expect("found NaN"); base = if cmp == Ordering::Greater { base } else { mid }; size -= half; } // base is always in [0, size) because base <= mid. let other = unsafe { pop.get_unchecked(base) }; let cmp = other.1.partial_cmp(&child.1).expect("found NaN"); if cmp == Ordering::Equal { base } else { base + (cmp == Ordering::Less) as usize } }; if idx < size { pop[idx] = child; } }