programinduction 0.3.1

//! Representations capable of Exploration-Compression.

use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::thread;
use crossbeam_channel::bounded;
use polytype::TypeSchema;
use rayon::prelude::*;

use Task;

/// Parameters for the EC algorithm.
///
/// The first of these limits/timeouts to be hit determines termination of enumeration. It is
/// dangerous to have both search limits set to `None`!
pub struct ECParams {
    /// The maximum frontier size; the number of task solutions to be hit before enumeration is
    /// stopped for a particular task.
    pub frontier_limit: usize,
    /// A timeout before enumeration is stopped, run independently per distinct `TypeSchema` being
    /// enumerated. If this is reached, there may be fewer than `frontier_limit` many solutions.
    pub search_limit_timeout: Option<Duration>,
    /// An approximate limit on enumerated description length. If this is reached, there may be
    /// fewer than `frontier_limit` many solutions.
    pub search_limit_description_length: Option<f64>,
}

/// A kind of representation suitable for **exploration-compression**.
///
/// For details on the EC algorithm, see the module-level documentation [here].
///
/// Implementors of `EC` need only provide an [`enumerate`] and [`compress`] methods. By doing so,
/// we provide the [`ec`], [`ec_with_recognition`], and [`explore`] methods.
///
/// Typically, you will interact with this trait via existing implementations, such as with
/// [`lambda::Language`] or [`pcfg::Grammar`].
///
/// [here]: index.html#bayesian-program-learning-with-the-ec-algorithm
/// [`enumerate`]: #tymethod.enumerate
/// [`compress`]: #tymethod.compress
/// [`ec`]: #method.ec
/// [`ec_with_recognition`]: #method.ec_with_recognition
/// [`explore`]: #method.explore
/// [`lambda::Language`]: lambda/struct.Language.html
/// [`pcfg::Grammar`]: pcfg/struct.Grammar.html
pub trait EC: Send + Sync + Sized {
    /// An Expression is a sentence in the representation. Tasks are solved by Expressions.
    type Expression: Clone + Send + Sync;
    /// Many representations have some parameters for compression. They belong here.
    type Params;

    /// Get an iterator over [`Expression`]s for a given type and corresponding log-priors.
    /// This enumeration should be best-first: the log-prior of enumerated expressions should
    /// generally increase, so simple expressions are enumerated first.
    ///
    /// This will in most cases iterate infinitely, giving increasingly complex expressions.
    ///
    /// [`Expression`]: #associatedtype.Expression
    fn enumerate<'a>(
        &'a self,
        tp: TypeSchema,
    ) -> Box<Iterator<Item = (Self::Expression, f64)> + 'a>;
    /// Update the representation based on findings of expressions that solve [`Task`]s.
    ///
    /// The `frontiers` argument, and similar return value, must always be of the same size as
    /// `tasks`. Each frontier is a possibly-empty list of expressions that solve the corresponding
    /// task, and the log-prior and log-likelihood for that expression.
    ///
    /// [`Task`]: struct.Task.html
    fn compress<O: Sync>(
        &self,
        params: &Self::Params,
        tasks: &[Task<Self, Self::Expression, O>],
        frontiers: Vec<ECFrontier<Self>>,
    ) -> (Self, Vec<ECFrontier<Self>>);

    // provided methods:

    /// The entry point for one iteration of the EC algorithm.
    ///
    /// Returned solutions include the log-prior and log-likelihood of successful expressions.
    ///
    /// # Examples
    ///
    /// ```
    /// # extern crate programinduction;
    /// use programinduction::{lambda, ECParams, EC};
    /// use programinduction::domains::circuits;
    ///
    /// # fn main() {
    /// let mut dsl = circuits::dsl();
    /// let tasks = circuits::make_tasks(250);
    /// let ec_params = ECParams {
    ///     frontier_limit: 10,
    ///     search_limit_timeout: None,
    ///     search_limit_description_length: Some(8.0),
    /// };
    /// let params = lambda::CompressionParams::default();
    ///
    /// let mut frontiers = Vec::new();
    /// for i in 1..6 {
    ///     println!("running EC iteration {}", i);
    ///
    ///     let (new_dsl, new_frontiers) = dsl.ec(&ec_params, &params, &tasks);
    ///     dsl = new_dsl;
    ///     frontiers = new_frontiers;
    ///
    ///     let n_hit = frontiers.iter().filter(|f| !f.is_empty()).count();
    ///     println!("hit {} of {}", n_hit, tasks.len());
    /// }
    /// assert!(!dsl.invented.is_empty());
    /// for &(ref expr, _, _) in &dsl.invented {
    ///     println!("invented {}", dsl.display(expr))
    /// }
    /// # }
    /// ```
    ///
    fn ec<O: Sync>(
        &self,
        ecparams: &ECParams,
        params: &Self::Params,
        tasks: &[Task<Self, Self::Expression, O>],
    ) -> (Self, Vec<ECFrontier<Self>>) {
        let frontiers = self.explore(ecparams, tasks);
        self.compress(params, tasks, frontiers)
    }

    /// The entry point for one iteration of the EC algorithm with a recognizer, very similar to
    /// [`ec`].
    ///
    /// The recognizer supplies a representation for every task which is then used for
    /// exploration-compression.
    ///
    /// Returned solutions include the log-prior and log-likelihood of successful expressions.
    ///
    /// [`ec`]: #method.ec
    fn ec_with_recognition<O: Sync, R>(
        &self,
        ecparams: &ECParams,
        params: &Self::Params,
        tasks: &[Task<Self, Self::Expression, O>],
        recognizer: R,
    ) -> (Self, Vec<ECFrontier<Self>>)
    where
        R: FnOnce(&Self, &[Task<Self, Self::Expression, O>]) -> Vec<Self>,
    {
        let recognized = recognizer(self, tasks);
        let frontiers = self.explore_with_recognition(ecparams, tasks, &recognized);
        self.compress(params, tasks, frontiers)
    }

    /// Enumerate solutions for the given tasks.
    ///
    /// Considers a "solution" to be any expression with finite log-probability according to a
    /// task's oracle.
    ///
    /// Each task will be associated with at most `params.frontier_limit` many such expressions,
    /// and enumeration is stopped when `params.search_limit` valid expressions have been checked.
    ///
    /// # Examples
    ///
    /// ```
    /// # #[macro_use]
    /// # extern crate polytype;
    /// # extern crate programinduction;
    /// use programinduction::{EC, ECParams};
    /// use programinduction::pcfg::{Grammar, Rule, task_by_evaluation};
    ///
    /// fn evaluator(name: &str, inps: &[i32]) -> i32 {
    ///     match name {
    ///         "0" => 0,
    ///         "1" => 1,
    ///         "plus" => inps[0] + inps[1],
    ///         _ => unreachable!(),
    ///     }
    /// }
    ///
    /// # fn main() {
    /// let g = Grammar::new(
    ///     tp!(EXPR),
    ///     vec![
    ///         Rule::new("0", tp!(EXPR), 1.0),
    ///         Rule::new("1", tp!(EXPR), 1.0),
    ///         Rule::new("plus", tp!(@arrow[tp!(EXPR), tp!(EXPR), tp!(EXPR)]), 1.0),
    ///     ],
    /// );
    /// let ec_params = ECParams {
    ///     frontier_limit: 1,
    ///     search_limit_timeout: Some(std::time::Duration::new(1, 0)),
    ///     search_limit_description_length: None,
    /// };
    /// // task: the number 4
    /// let task = task_by_evaluation(&evaluator, &4, tp!(EXPR));
    ///
    /// let frontiers = g.explore(&ec_params, &[task]);
    /// assert!(frontiers[0].best_solution().is_some());
    /// # }
    /// ```
    fn explore<O: Sync>(
        &self,
        ec_params: &ECParams,
        tasks: &[Task<Self, Self::Expression, O>],
    ) -> Vec<ECFrontier<Self>> {
        let mut tps = HashMap::new();
        for (i, task) in tasks.into_iter().enumerate() {
            tps.entry(&task.tp).or_insert_with(Vec::new).push((i, task))
        }
        let mut results: Vec<ECFrontier<Self>> =
            (0..tasks.len()).map(|_| ECFrontier::default()).collect();
        {
            let mutex = Arc::new(Mutex::new(&mut results));
            tps.into_par_iter()
                .map(|(tp, tasks)| enumerate_solutions(self, ec_params, tp.clone(), tasks))
                .flat_map(|iter| iter)
                .for_each(move |(i, frontier)| {
                    let mut results = mutex.lock().unwrap();
                    results[i] = frontier
                });
        }
        results
    }

    /// Like [`explore`], but with specific "recognized" representations for each task.
    ///
    /// [`explore`]: #method.explore
    fn explore_with_recognition<O: Sync>(
        &self,
        ec_params: &ECParams,
        tasks: &[Task<Self, Self::Expression, O>],
        representations: &[Self],
    ) -> Vec<ECFrontier<Self>> {
        tasks
            .par_iter()
            .zip(representations)
            .enumerate()
            .map(|(i, (t, repr))| {
                enumerate_solutions(repr, ec_params, t.tp.clone(), vec![(i, t)])
                    .pop()
                    .unwrap()
                    .1
            })
            .collect()
    }
}

/// Enumerate solutions for the given tasks which all accord to the given type.
///
/// Considers a "solution" to be any expression with finite log-probability according to a
/// task's oracle.
///
/// Each task will be associated with at most `params.frontier_limit` many such expressions,
/// and enumeration is stopped when `params.search_limit` valid expressions have been checked.
fn enumerate_solutions<L, X, O: Sync>(
    repr: &L,
    params: &ECParams,
    tp: TypeSchema,
    tasks: Vec<(usize, &Task<L, X, O>)>,
) -> Vec<(usize, ECFrontier<L>)>
where
    X: Send + Sync + Clone,
    L: EC<Expression = X>,
{
    // initialization
    let mut frontiers = tasks
        .iter()
        .map(|&(j, _)| (j, ECFrontier::default()))
        .collect();
    let mut tasks: Vec<_> = tasks
        .into_iter()
        .enumerate()
        .map(|(i, (_, t))| (i, t))
        .collect();

    // termination conditions
    let mut timeout_complete: Box<Fn() -> bool> = Box::new(|| false);
    let (tx, rx) = bounded(1);
    if let Some(duration) = params.search_limit_timeout {
        thread::spawn(move || {
            thread::sleep(duration);
            tx.send(()).unwrap_or(());
        });
        timeout_complete = Box::new(move || rx.try_recv().is_ok());
    }
    let mut dl_complete: Box<Fn(f64) -> bool> = Box::new(|_| false);
    if let Some(dl) = params.search_limit_description_length {
        dl_complete = Box::new(move |logprior| -logprior > dl);
    }

    // update frontiers and check for termination
    let mut update = |frontiers: &mut Vec<(usize, ECFrontier<L>)>, expr: X, logprior: f64| {
        let evaluations: Vec<_> = tasks
            .par_iter()
            .map(|&(i, t)| {
                let log_likelihood = (t.oracle)(repr, &expr);
                (i, t, log_likelihood)
            })
            .collect();
        tasks = evaluations
            .into_iter()
            .filter_map(|(i, t, l)| {
                if l.is_finite() {
                    frontiers[i].1.push(expr.clone(), logprior, l);
                    if frontiers[i].1.len() >= params.frontier_limit {
                        None
                    } else {
                        Some((i, t))
                    }
                } else {
                    Some((i, t))
                }
            })
            .collect();
        !(tasks.is_empty() || timeout_complete() || dl_complete(logprior))
    };

    for (expr, log_prior) in repr.enumerate(tp) {
        if !update(&mut frontiers, expr, log_prior) {
            break;
        }
    }
    frontiers
}

/// A set of expressions which solve a task.
///
/// Stores tuples of [`Expression`], log-prior, and log-likelihood.
///
/// [`Expression`]: trait.EC.html#associatedtype.Expression
#[derive(Clone, Debug)]
pub struct ECFrontier<L: EC>(pub Vec<(L::Expression, f64, f64)>);
impl<L: EC> ECFrontier<L> {
    pub fn push(&mut self, expr: L::Expression, log_prior: f64, log_likelihood: f64) {
        self.0.push((expr, log_prior, log_likelihood))
    }
    pub fn best_solution(&self) -> Option<&(L::Expression, f64, f64)> {
        self.0
            .iter()
            .max_by(|&&(_, xp, xl), &&(_, yp, yl)| (xp + xl).partial_cmp(&(yp + yl)).unwrap())
    }
}
impl<L: EC> Default for ECFrontier<L> {
    fn default() -> Self {
        ECFrontier(vec![])
    }
}
impl<L: EC> Deref for ECFrontier<L> {
    type Target = Vec<(L::Expression, f64, f64)>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
}
impl<L: EC> DerefMut for ECFrontier<L> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}