Struct gbdt::decision_tree::DecisionTree
source · pub struct DecisionTree { /* private fields */ }
Expand description
The decision tree.
Implementations§
source§impl DecisionTree
impl DecisionTree
sourcepub fn new() -> Self
pub fn new() -> Self
Return a new decision tree with default values (feature_size = 1, max_depth = 2, min_leaf_size = 1, loss = Loss::SquaredError, feature_sample_ratio = 1.0)
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
sourcepub fn set_feature_size(&mut self, size: usize)
pub fn set_feature_size(&mut self, size: usize)
Set the size of feautures. Training data and test data should have same feature size.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
sourcepub fn set_max_depth(&mut self, max_depth: u32)
pub fn set_max_depth(&mut self, max_depth: u32)
Set the max depth of the decision tree. The root node is considered to be in the layer 0.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_max_depth(2);
sourcepub fn set_min_leaf_size(&mut self, min_leaf_size: usize)
pub fn set_min_leaf_size(&mut self, min_leaf_size: usize)
Set the minimum number of samples required to be at a leaf node during training.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_min_leaf_size(1);
sourcepub fn set_loss(&mut self, loss: Loss)
pub fn set_loss(&mut self, loss: Loss)
Set the loss function type.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_loss(Loss::SquaredError);
sourcepub fn set_feature_sample_ratio(&mut self, feature_sample_ratio: f64)
pub fn set_feature_sample_ratio(&mut self, feature_sample_ratio: f64)
Set the portion of features to be splited. When spliting a node, a subset of the features (feature_size * feature_sample_ratio) will be randomly selected to calculate impurity.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_sample_ratio(0.9);
sourcepub fn fit_n(
&mut self,
train_data: &DataVec,
subset: &[usize],
cache: &mut TrainingCache
)
pub fn fit_n( &mut self, train_data: &DataVec, subset: &[usize], cache: &mut TrainingCache )
Use the subset
of the train_data
to train a decision tree
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
2.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
0.5,
None
);
let data4 = Data::new_training_data(
vec![2.0, 2.3, 1.2],
1.0,
3.0,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0,1,2];
tree.fit_n(&dv, &subset, &mut cache);
sourcepub fn fit(&mut self, train_data: &DataVec, cache: &mut TrainingCache)
pub fn fit(&mut self, train_data: &DataVec, cache: &mut TrainingCache)
Use the samples in train_data
to train the decision tree.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
1.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
2.0,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);
sourcepub fn predict_n(&self, test_data: &DataVec, subset: &[usize]) -> PredVec
pub fn predict_n(&self, test_data: &DataVec, subset: &[usize]) -> PredVec
Inference the subset of the test_data
. Return a vector of
predicted values. If the i
is in the subset, then output[i] is the prediction.
If i
is not in the subset, then output[i] is 0.0
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
2.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
0.5,
None
);
let data4 = Data::new_training_data(
vec![2.0, 2.3, 1.2],
1.0,
3.0,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);
// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// inference the test data with the decision tree
let subset = [0,1,2];
println!("{:?}", tree.predict_n(&dv, &subset));
// output:
// [2.0, 0.75, 0.75, 0.0]
§Panic
If the function is called before the decision tree is trained, it will panic.
If the test data have a smaller feature size than the tree’s feature size, it will panic.
sourcepub fn predict(&self, test_data: &DataVec) -> PredVec
pub fn predict(&self, test_data: &DataVec) -> PredVec
Inference the values of samples in the test_data
. Return a vector of the predicted
values.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
2.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
0.5,
None
);
let data4 = Data::new_training_data(
vec![2.0, 2.3, 1.2],
1.0,
3.0,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);
// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// inference the test data with the decision tree
println!("{:?}", tree.predict(&dv));
// output:
// [2.0, 0.75, 0.75, 3.0]
§Panic
If the function is called before the decision tree is trained, it will panic.
If the test data have a smaller feature size than the tree’s feature size, it will panic.
sourcepub fn print(&self)
pub fn print(&self)
Print the decision tree. For debug use.
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
2.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
0.5,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);
tree.print();
// output:
// ----DTNode { feature_index: 0, feature_value: 1.05, pred: 1.5, is_leaf: false }
// ----DTNode { feature_index: 0, feature_value: 0.0, pred: 2.0, is_leaf: true }
// ----DTNode { feature_index: 0, feature_value: 0.0, pred: 1.0, is_leaf: true }
sourcepub fn get_from_xgboost(node: &Value) -> Result<Self>
pub fn get_from_xgboost(node: &Value) -> Result<Self>
Build a decision tree from xgboost’s model. xgboost can dump the model in JSON format. We used serde_json to parse a JSON string.
§Example
use serde_json::{Result, Value};
use gbdt::decision_tree::DecisionTree;
let data = r#"
{ "nodeid": 0, "depth": 0, "split": 0, "split_condition": 750, "yes": 1, "no": 2, "missing": 2, "children": [
{ "nodeid": 1, "leaf": 25.7333336 },
{ "nodeid": 2, "leaf": 15.791667 }]}"#;
let node: Value = serde_json::from_str(data).unwrap();
let dt = DecisionTree::get_from_xgboost(&node);
sourcepub fn len(&self) -> usize
pub fn len(&self) -> usize
For debug use. Return the number of nodes in current decision tree
§Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
vec![1.0, 2.0, 3.0],
1.0,
2.0,
None
);
let data2 = Data::new_training_data(
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data(
vec![2.0, 2.0, 1.0],
1.0,
0.5,
None
);
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);
assert_eq!(tree.len(), 3)