forester 0.0.3 - Docs.rs


use num_traits::Bounded;
use rand::distributions::range::SampleRange;
use rand::{thread_rng, Rng};

use criterion::VarianceCriterion;
use data::{SampleDescription, TrainingData};
use iter_mean::IterMean;
use split_between::SplitBetween;

#[derive(Debug, Clone)]
pub struct Sample<'a, X: 'a, Y> {
    pub x: &'a[X],
    pub y: Y,
}

impl<'a, X: 'a, Y> Sample<'a, X, Y> {
    pub fn new(x: &'a[X], y: Y) -> Self {
        Sample {
            x,
            y,
        }
    }
}

impl<'a, X> SampleDescription for Sample<'a, X, f64>
    where X: Clone + PartialOrd + SampleRange + Bounded + SplitBetween
{
    type ThetaSplit = usize;
    type ThetaLeaf = f64;
    type Feature = X;
    type Target = f64;
    type Prediction = f64;

    fn target(&self) -> Self::Target {
        self.y
    }

    fn sample_as_split_feature(&self, theta: &Self::ThetaSplit) -> Self::Feature {
        self.x[*theta].clone()
    }

    fn sample_predict(&self, w: &Self::ThetaLeaf) -> Self::Prediction {
        *w
    }
}

impl<'a, X> TrainingData<Sample<'a, X, f64>> for [Sample<'a, X, f64>]
    where X: Clone + PartialOrd + SampleRange + Bounded + SplitBetween
{
    type Criterion = VarianceCriterion;

    fn n_samples(&self) -> usize {
        self.len()
    }

    fn gen_split_feature(&self) -> usize {
        thread_rng().gen_range(0, self[0].x.len())
    }

    fn all_split_features(&self) -> Option<Box<Iterator<Item=usize>>> {
        Some(Box::new(0..self[0].x.len()))
    }

    fn train_leaf_predictor(&self) -> f64 {
        f64::mean(self.iter().map(|sample| &sample.y))
    }

    fn feature_bounds(&self, theta: &usize) -> (X, X) {
        self.iter()
            .map(|sample| sample.sample_as_split_feature(theta))
            .fold((X::max_value(), X::min_value()),
                  |(min, max), x| {
                      (if x < min {x.clone()} else {min},
                       if x > max {x} else {max})
                  })
    }
}