[][src]Struct gbdt::decision_tree::DecisionTree

pub struct DecisionTree { /* fields omitted */ }

The decision tree.

Methods

impl DecisionTree[src]

pub fn new() -> Self[src]

Return a new decision tree with default values (feature_size = 1, max_depth = 2, min_leaf_size = 1, loss = Loss::SquaredError, feature_sample_ratio = 1.0)

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();

pub fn set_feature_size(&mut self, size: usize)[src]

Set the size of feautures. Training data and test data should have same feature size.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_size(3);

pub fn set_max_depth(&mut self, max_depth: u32)[src]

Set the max depth of the decision tree. The root node is considered to be in the layer 0.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_max_depth(2);

pub fn set_min_leaf_size(&mut self, min_leaf_size: usize)[src]

Set the minimum number of samples required to be at a leaf node during training.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_min_leaf_size(1);

pub fn set_loss(&mut self, loss: Loss)[src]

Set the loss function type.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_loss(Loss::SquaredError);

pub fn set_feature_sample_ratio(&mut self, feature_sample_ratio: f64)[src]

Set the portion of features to be splited. When spliting a node, a subset of the features (feature_size * feature_sample_ratio) will be randomly selected to calculate impurity.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_sample_ratio(0.9);

pub fn fit_n(
    &mut self,
    train_data: &DataVec,
    subset: &[usize],
    cache: &mut TrainingCache
)
[src]

Use the subset of the train_data to train a decision tree

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0,1,2];
tree.fit_n(&dv, &subset, &mut cache);

pub fn fit(&mut self, train_data: &DataVec, cache: &mut TrainingCache)[src]

Use the samples in train_data to train the decision tree.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    1.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    2.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);

pub fn predict_n(&self, test_data: &DataVec, subset: &[usize]) -> PredVec[src]

Inference the subset of the test_data. Return a vector of predicted values. If the i is in the subset, then output[i] is the prediction. If i is not in the subset, then output[i] is 0.0

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
let subset = [0,1,2];
println!("{:?}", tree.predict_n(&dv, &subset));


// output:
// [2.0, 0.75, 0.75, 0.0]

Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree's feature size, it will panic.

pub fn predict(&self, test_data: &DataVec) -> PredVec[src]

Inference the values of samples in the test_data. Return a vector of the predicted values.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
println!("{:?}", tree.predict(&dv));


// output:
// [2.0, 0.75, 0.75, 3.0]

Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree's feature size, it will panic.

pub fn print(&self)[src]

Print the decision tree. For debug use.

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);


tree.print();

// output:

//  ----DTNode { feature_index: 0, feature_value: 1.05, pred: 1.5, is_leaf: false }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 2.0, is_leaf: true }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 1.0, is_leaf: true }

pub fn get_from_xgboost(node: &Value) -> Result<Self, Box<dyn Error>>[src]

Build a decision tree from xgboost's model. xgboost can dump the model in JSON format. We used serde_json to parse a JSON string.

Example

use serde_json::{Result, Value};
use gbdt::decision_tree::DecisionTree;
let data = r#"
      { "nodeid": 0, "depth": 0, "split": 0, "split_condition": 750, "yes": 1, "no": 2, "missing": 2, "children": [
         { "nodeid": 1, "leaf": 25.7333336 },
         { "nodeid": 2, "leaf": 15.791667 }]}"#;
let node: Value = serde_json::from_str(data).unwrap();
let dt = DecisionTree::get_from_xgboost(&node);

pub fn len(&self) -> usize[src]

For debug use. Return the number of nodes in current decision tree

Example

use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);

assert_eq!(tree.len(), 3)

pub fn is_empty(&self) -> bool[src]

Returns true if the current decision tree is empty

Trait Implementations

impl Default for DecisionTree[src]

impl Debug for DecisionTree[src]

impl Serialize for DecisionTree[src]

impl<'de> Deserialize<'de> for DecisionTree[src]

Auto Trait Implementations

Blanket Implementations

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> DeserializeOwned for T where
    T: Deserialize<'de>, 
[src]