[−][src]Struct gbdt::decision_tree::DecisionTree
The decision tree.
Methods
impl DecisionTree
[src]
pub fn new() -> Self
[src]
Return a new decision tree with default values (feature_size = 1, max_depth = 2, min_leaf_size = 1, loss = Loss::SquaredError, feature_sample_ratio = 1.0)
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new();
pub fn set_feature_size(&mut self, size: usize)
[src]
Set the size of feautures. Training data and test data should have same feature size.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new(); tree.set_feature_size(3);
pub fn set_max_depth(&mut self, max_depth: u32)
[src]
Set the max depth of the decision tree. The root node is considered to be in the layer 0.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new(); tree.set_max_depth(2);
pub fn set_min_leaf_size(&mut self, min_leaf_size: usize)
[src]
Set the minimum number of samples required to be at a leaf node during training.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new(); tree.set_min_leaf_size(1);
pub fn set_loss(&mut self, loss: Loss)
[src]
Set the loss function type.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new(); tree.set_loss(Loss::SquaredError);
pub fn set_feature_sample_ratio(&mut self, feature_sample_ratio: f64)
[src]
Set the portion of features to be splited. When spliting a node, a subset of the features (feature_size * feature_sample_ratio) will be randomly selected to calculate impurity.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree}; let mut tree = DecisionTree::new(); tree.set_feature_sample_ratio(0.9);
pub fn fit_n(
&mut self,
train_data: &DataVec,
subset: &[usize],
cache: &mut TrainingCache
)
[src]
&mut self,
train_data: &DataVec,
subset: &[usize],
cache: &mut TrainingCache
)
Use the subset
of the train_data
to train a decision tree
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 2.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 0.5, None ); let data4 = Data::new_training_data( vec![2.0, 2.3, 1.2], 1.0, 3.0, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); dv.push(data4.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); let subset = [0,1,2]; tree.fit_n(&dv, &subset, &mut cache);
pub fn fit(&mut self, train_data: &DataVec, cache: &mut TrainingCache)
[src]
Use the samples in train_data
to train the decision tree.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 1.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 2.0, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); tree.fit(&dv, &mut cache);
pub fn predict_n(&self, test_data: &DataVec, subset: &[usize]) -> PredVec
[src]
Inference the subset of the test_data
. Return a vector of
predicted values. If the i
is in the subset, then output[i] is the prediction.
If i
is not in the subset, then output[i] is 0.0
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 2.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 0.5, None ); let data4 = Data::new_training_data( vec![2.0, 2.3, 1.2], 1.0, 3.0, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); dv.push(data4.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); tree.fit(&dv, &mut cache); // set up the test data let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); dv.push(data4.clone()); // inference the test data with the decision tree let subset = [0,1,2]; println!("{:?}", tree.predict_n(&dv, &subset)); // output: // [2.0, 0.75, 0.75, 0.0]
Panic
If the function is called before the decision tree is trained, it will panic.
If the test data have a smaller feature size than the tree's feature size, it will panic.
pub fn predict(&self, test_data: &DataVec) -> PredVec
[src]
Inference the values of samples in the test_data
. Return a vector of the predicted
values.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 2.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 0.5, None ); let data4 = Data::new_training_data( vec![2.0, 2.3, 1.2], 1.0, 3.0, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); dv.push(data4.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); tree.fit(&dv, &mut cache); // set up the test data let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); dv.push(data4.clone()); // inference the test data with the decision tree println!("{:?}", tree.predict(&dv)); // output: // [2.0, 0.75, 0.75, 3.0]
Panic
If the function is called before the decision tree is trained, it will panic.
If the test data have a smaller feature size than the tree's feature size, it will panic.
pub fn print(&self)
[src]
Print the decision tree. For debug use.
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 2.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 0.5, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); let subset = [0, 1]; tree.fit_n(&dv, &subset, &mut cache); tree.print(); // output: // ----DTNode { feature_index: 0, feature_value: 1.05, pred: 1.5, is_leaf: false } // ----DTNode { feature_index: 0, feature_value: 0.0, pred: 2.0, is_leaf: true } // ----DTNode { feature_index: 0, feature_value: 0.0, pred: 1.0, is_leaf: true }
pub fn get_from_xgboost(node: &Value) -> Result<Self, Box<dyn Error>>
[src]
Build a decision tree from xgboost's model. xgboost can dump the model in JSON format. We used serde_json to parse a JSON string.
Example
use serde_json::{Result, Value}; use gbdt::decision_tree::DecisionTree; let data = r#" { "nodeid": 0, "depth": 0, "split": 0, "split_condition": 750, "yes": 1, "no": 2, "missing": 2, "children": [ { "nodeid": 1, "leaf": 25.7333336 }, { "nodeid": 2, "leaf": 15.791667 }]}"#; let node: Value = serde_json::from_str(data).unwrap(); let dt = DecisionTree::get_from_xgboost(&node);
pub fn len(&self) -> usize
[src]
For debug use. Return the number of nodes in current decision tree
Example
use gbdt::config::Loss; use gbdt::decision_tree::{Data, DecisionTree, TrainingCache}; // set up training data let data1 = Data::new_training_data( vec![1.0, 2.0, 3.0], 1.0, 2.0, None ); let data2 = Data::new_training_data( vec![1.1, 2.1, 3.1], 1.0, 1.0, None ); let data3 = Data::new_training_data( vec![2.0, 2.0, 1.0], 1.0, 0.5, None ); let mut dv = Vec::new(); dv.push(data1.clone()); dv.push(data2.clone()); dv.push(data3.clone()); // train a decision tree let mut tree = DecisionTree::new(); tree.set_feature_size(3); tree.set_max_depth(2); tree.set_min_leaf_size(1); tree.set_loss(Loss::SquaredError); let mut cache = TrainingCache::get_cache(3, &dv, 2); let subset = [0, 1]; tree.fit_n(&dv, &subset, &mut cache); assert_eq!(tree.len(), 3)
pub fn is_empty(&self) -> bool
[src]
Returns true if the current decision tree is empty
Trait Implementations
impl Default for DecisionTree
[src]
impl Debug for DecisionTree
[src]
impl Serialize for DecisionTree
[src]
fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error> where
__S: Serializer,
[src]
__S: Serializer,
impl<'de> Deserialize<'de> for DecisionTree
[src]
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
Auto Trait Implementations
impl Send for DecisionTree
impl Sync for DecisionTree
Blanket Implementations
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> DeserializeOwned for T where
T: Deserialize<'de>,
[src]
T: Deserialize<'de>,