pub struct Config {
pub feature_size: usize,
pub max_depth: u32,
pub iterations: usize,
pub shrinkage: ValueType,
pub feature_sample_ratio: f64,
pub data_sample_ratio: f64,
pub min_leaf_size: usize,
pub loss: Loss,
pub debug: bool,
pub initial_guess_enabled: bool,
pub training_optimization_level: u8,
}
Expand description
The config for the gradient boosting algorithm.
Fields§
§feature_size: usize
The size of features. Training data and test data should have the same feature size. (default = 1)
max_depth: u32
The max depth of a single decision tree. The root node is considered to be in the layer 0. (default = 2)
iterations: usize
The iterations to train, which is also the number of trees in the gradient boosting algorithm. (default = 2)
shrinkage: ValueType
The learning rate parameter of the gradient boosting algorithm.(default = 1.0)
feature_sample_ratio: f64
Portion of features to be splited. (default = 1.0)
data_sample_ratio: f64
Portion of data to be splited. (default = 1.0)
min_leaf_size: usize
The minimum number of samples required to be at a leaf node during training. (default = 1.0)
loss: Loss
The loss function type. (default = SquareError)
debug: bool
Whether the debug information should be outputed. (default = false)
initial_guess_enabled: bool
Whether initial guess for test data is enabled. (default = false)
training_optimization_level: u8
Training optimization level (default = 2).
0: least memory, slowest speed.
1: more memory usage, faster speed.
2: most memory usage, fastest speed.
Implementations§
source§impl Config
impl Config
sourcepub fn new() -> Config
pub fn new() -> Config
Return a new config with default settings.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_feature_size(&mut self, n: usize)
pub fn set_feature_size(&mut self, n: usize)
Set feature size.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_feature_size(10);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_shrinkage(&mut self, eta: ValueType)
pub fn set_shrinkage(&mut self, eta: ValueType)
Set learning rate.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_shrinkage(1.0);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_training_optimization_level(&mut self, level: u8)
pub fn set_training_optimization_level(&mut self, level: u8)
Set training optimization level (default = 2).
0: least memory, slowest speed.
1: more memory usage, faster speed.
2: most memory usage, fastest speed.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_training_optimization_level(2);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_max_depth(&mut self, n: u32)
pub fn set_max_depth(&mut self, n: u32)
Set max depth of the tree.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_max_depth(5);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_iterations(&mut self, n: usize)
pub fn set_iterations(&mut self, n: usize)
Set iterations of the algorithm.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_iterations(5);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_feature_sample_ratio(&mut self, n: f64)
pub fn set_feature_sample_ratio(&mut self, n: f64)
Set feature sample ratio.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_feature_sample_ratio(0.9);
sourcepub fn set_data_sample_ratio(&mut self, n: f64)
pub fn set_data_sample_ratio(&mut self, n: f64)
Set data sample ratio.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_data_sample_ratio(0.9);
sourcepub fn set_min_leaf_size(&mut self, n: usize)
pub fn set_min_leaf_size(&mut self, n: usize)
Set minimal leaf size.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_min_leaf_size(3);
sourcepub fn set_loss(&mut self, l: &str)
pub fn set_loss(&mut self, l: &str)
Set loss type: “SquaredError”, “LogLikelyhood”, “LAD”, “reg:linear”, “binary:logistic”, “reg:logistic”, “binary:logitraw”, “multi:softprob”, “multi:softmax”, “rank:pairwise”
§Example
use gbdt::config::{Config, Loss, loss2string};
let mut cfg = Config::new();
cfg.set_loss("LAD");
// Alternative way
cfg.set_loss(&loss2string(&Loss::SquaredError));
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn set_debug(&mut self, option: bool)
pub fn set_debug(&mut self, option: bool)
Set debug mode.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.set_debug(true);
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
sourcepub fn enabled_initial_guess(&mut self, option: bool)
pub fn enabled_initial_guess(&mut self, option: bool)
Set whether initial guess of test data is enabled.
§Example
use gbdt::config::Config;
let mut cfg = Config::new();
cfg.enabled_initial_guess(false);