pub struct DecisionTree { /* private fields */ }
Expand description

The decision tree.

Implementations§

source§

impl DecisionTree

source

pub fn new() -> Self

Return a new decision tree with default values (feature_size = 1, max_depth = 2, min_leaf_size = 1, loss = Loss::SquaredError, feature_sample_ratio = 1.0)

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
source

pub fn set_feature_size(&mut self, size: usize)

Set the size of feautures. Training data and test data should have same feature size.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
source

pub fn set_max_depth(&mut self, max_depth: u32)

Set the max depth of the decision tree. The root node is considered to be in the layer 0.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_max_depth(2);
source

pub fn set_min_leaf_size(&mut self, min_leaf_size: usize)

Set the minimum number of samples required to be at a leaf node during training.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_min_leaf_size(1);
source

pub fn set_loss(&mut self, loss: Loss)

Set the loss function type.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_loss(Loss::SquaredError);
source

pub fn set_feature_sample_ratio(&mut self, feature_sample_ratio: f64)

Set the portion of features to be splited. When spliting a node, a subset of the features (feature_size * feature_sample_ratio) will be randomly selected to calculate impurity.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_sample_ratio(0.9);
source

pub fn fit_n( &mut self, train_data: &DataVec, subset: &[usize], cache: &mut TrainingCache )

Use the subset of the train_data to train a decision tree

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0,1,2];
tree.fit_n(&dv, &subset, &mut cache);
source

pub fn fit(&mut self, train_data: &DataVec, cache: &mut TrainingCache)

Use the samples in train_data to train the decision tree.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    1.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    2.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);
source

pub fn predict_n(&self, test_data: &DataVec, subset: &[usize]) -> PredVec

Inference the subset of the test_data. Return a vector of predicted values. If the i is in the subset, then output[i] is the prediction. If i is not in the subset, then output[i] is 0.0

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
let subset = [0,1,2];
println!("{:?}", tree.predict_n(&dv, &subset));


// output:
// [2.0, 0.75, 0.75, 0.0]
§Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree’s feature size, it will panic.

source

pub fn predict(&self, test_data: &DataVec) -> PredVec

Inference the values of samples in the test_data. Return a vector of the predicted values.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
println!("{:?}", tree.predict(&dv));


// output:
// [2.0, 0.75, 0.75, 3.0]
§Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree’s feature size, it will panic.

source

pub fn print(&self)

Print the decision tree. For debug use.

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);


tree.print();

// output:

//  ----DTNode { feature_index: 0, feature_value: 1.05, pred: 1.5, is_leaf: false }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 2.0, is_leaf: true }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 1.0, is_leaf: true }
source

pub fn get_from_xgboost(node: &Value) -> Result<Self>

Build a decision tree from xgboost’s model. xgboost can dump the model in JSON format. We used serde_json to parse a JSON string.

§Example
use serde_json::{Result, Value};
use gbdt::decision_tree::DecisionTree;
let data = r#"
      { "nodeid": 0, "depth": 0, "split": 0, "split_condition": 750, "yes": 1, "no": 2, "missing": 2, "children": [
         { "nodeid": 1, "leaf": 25.7333336 },
         { "nodeid": 2, "leaf": 15.791667 }]}"#;
let node: Value = serde_json::from_str(data).unwrap();
let dt = DecisionTree::get_from_xgboost(&node);
source

pub fn len(&self) -> usize

For debug use. Return the number of nodes in current decision tree

§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);

assert_eq!(tree.len(), 3)
source

pub fn is_empty(&self) -> bool

Returns true if the current decision tree is empty

Trait Implementations§

source§

impl Debug for DecisionTree

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Default for DecisionTree

source§

fn default() -> Self

Returns the “default value” for a type. Read more
source§

impl<'de> Deserialize<'de> for DecisionTree

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl Serialize for DecisionTree

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,