Struct gbdt::input::InputFormat
source · pub struct InputFormat {
pub ftype: FileFormat,
pub header: bool,
pub label_idx: usize,
pub enable_unknown_value: bool,
pub delimeter: char,
pub feature_size: usize,
}
Expand description
The input file format struct.
Fields§
§ftype: FileFormat
Data file format
header: bool
Set if ftype is set to FileFormat. Indicates whether the csv has header.
label_idx: usize
Set if ftype is set to FileFormat. Indicates which colume is the data label. (default = 0)
enable_unknown_value: bool
Set if ftype is set to FileFormat. Indicates if we allow unknown value in data file or not.
delimeter: char
Delimeter of the data file.
feature_size: usize
Set if ftype is set to FileFormat. Indicates the total feature size.
Implementations§
source§impl InputFormat
impl InputFormat
sourcepub fn csv_format() -> InputFormat
pub fn csv_format() -> InputFormat
Return a default CSV input format.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
println!("{}", fmt.to_string());
Examples found in repository?
examples/test-iris.rs (line 23)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
examples/test-agaricus-lepiota.rs (line 25)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-multi-softmax.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn main() {
// Use xg.py in xgb-data/xgb_multi_softmax to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_multi_softmax/xgb.model "multi:softmax" xgb-data/xgb_multi_softmax/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softmax/gbdt.model", "multi:softmax")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_multi_softmax/dermatology.data.test";
let mut input_format = input::InputFormat::csv_format();
input_format.set_label_index(34);
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let (labels, _probs) = gbdt.predict_multiclass(&test_data, 6);
assert_eq!(labels.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_multi_softmax/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in labels.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (*value1 as ValueType - *value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-multi-softprob.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
// Use xg.py in xgb-data/xgb_multi_softprob to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_multi_softprob/xgb.model "multi:softprob" xgb-data/xgb_multi_softprob/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softprob/gbdt.model", "multi:softprob")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_multi_softprob/dermatology.data.test";
let mut input_format = input::InputFormat::csv_format();
input_format.set_label_index(34);
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let (labels, probs) = gbdt.predict_multiclass(&test_data, 6);
assert_eq!(labels.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_multi_softprob/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let split_results: Vec<&str> = text.trim().split(',').collect();
for item in split_results.iter() {
let value: ValueType = item.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
}
let mut flat_probs = Vec::new();
for item in probs.iter() {
for value in item.iter() {
flat_probs.push(*value);
}
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in flat_probs.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
sourcepub fn txt_format() -> InputFormat
pub fn txt_format() -> InputFormat
Return a default LibSVM input format.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
println!("{}", fmt.to_string());
Examples found in repository?
examples/test-multithreads.rs (line 21)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
fn main() {
let thread_num = 12;
let feature_size = 36;
let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
// load model
let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");
// load test data
let mut fmt = input::InputFormat::txt_format();
fmt.set_feature_size(feature_size);
fmt.set_delimeter(' ');
let mut test_data = input::load(test_file, fmt).unwrap();
// split test data to `thread_num` vectors.
let t1 = Instant::now();
let mut handles = vec![];
let mut test_data_vec = vec![];
let data_size = test_data.len();
let batch_size = (data_size - 1) / thread_num + 1;
for one_batch in test_data.chunks(batch_size) {
test_data_vec.push(one_batch.to_vec())
}
test_data.clear();
test_data.shrink_to_fit();
let t2 = Instant::now();
println!("split data: {:?}", t2 - t1);
// Create `thread_num` threads. Call gbdt::predict in parallel
let t1 = Instant::now();
let gbdt_arc = Arc::new(gbdt);
for data in test_data_vec.into_iter() {
let gbdt_clone = Arc::clone(&gbdt_arc);
let handle = thread::spawn(move || gbdt_clone.predict(&data));
handles.push(handle)
}
// collect results
let mut preds = Vec::with_capacity(data_size);
for handle in handles {
preds.append(&mut handle.join().unwrap());
}
let t2 = Instant::now();
println!("predict data: {:?}", t2 - t1);
assert_eq!(preds.len(), data_size);
}
More examples
examples/test-xgb-rank-pairwise.rs (line 18)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn main() {
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(47);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(36);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_linear/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-reg-logistic.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logistic.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_binary_logistic to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_binary_logistic/xgb.model "binary:logistic" xgb-data/xgb_binary_logistic/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logistic/gbdt.model", "binary:logistic")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_binary_logistic/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_binary_logistic/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logitraw.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_binary_logitraw to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_binary_logitraw/xgb.model "binary:logitraw" xgb-data/xgb_binary_logitraw/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logitraw/gbdt.model", "binary:logitraw")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_binary_logitraw/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_binary_logitraw/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
Additional examples can be found in:
sourcepub fn to_string(&self) -> String
pub fn to_string(&self) -> String
Transform the input format to human readable string.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
println!("{}", fmt.to_string());
sourcepub fn set_feature_size(&mut self, size: usize)
pub fn set_feature_size(&mut self, size: usize)
Set feature size for the LibSVM input format.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
fmt.set_feature_size(126); // the total feature size
Examples found in repository?
examples/test-iris.rs (line 24)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
examples/test-multithreads.rs (line 22)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
fn main() {
let thread_num = 12;
let feature_size = 36;
let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
// load model
let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");
// load test data
let mut fmt = input::InputFormat::txt_format();
fmt.set_feature_size(feature_size);
fmt.set_delimeter(' ');
let mut test_data = input::load(test_file, fmt).unwrap();
// split test data to `thread_num` vectors.
let t1 = Instant::now();
let mut handles = vec![];
let mut test_data_vec = vec![];
let data_size = test_data.len();
let batch_size = (data_size - 1) / thread_num + 1;
for one_batch in test_data.chunks(batch_size) {
test_data_vec.push(one_batch.to_vec())
}
test_data.clear();
test_data.shrink_to_fit();
let t2 = Instant::now();
println!("split data: {:?}", t2 - t1);
// Create `thread_num` threads. Call gbdt::predict in parallel
let t1 = Instant::now();
let gbdt_arc = Arc::new(gbdt);
for data in test_data_vec.into_iter() {
let gbdt_clone = Arc::clone(&gbdt_arc);
let handle = thread::spawn(move || gbdt_clone.predict(&data));
handles.push(handle)
}
// collect results
let mut preds = Vec::with_capacity(data_size);
for handle in handles {
preds.append(&mut handle.join().unwrap());
}
let t2 = Instant::now();
println!("predict data: {:?}", t2 - t1);
assert_eq!(preds.len(), data_size);
}
examples/test-xgb-rank-pairwise.rs (line 19)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn main() {
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(47);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 20)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(36);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_linear/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-agaricus-lepiota.rs (line 26)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-reg-logistic.rs (line 20)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
Additional examples can be found in:
sourcepub fn set_label_index(&mut self, idx: usize)
pub fn set_label_index(&mut self, idx: usize)
Set for label index for CSV format.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
fmt.set_label_index(34);
Examples found in repository?
examples/test-iris.rs (line 25)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(4);
cfg.set_max_depth(4);
cfg.set_iterations(100);
cfg.set_shrinkage(0.1);
cfg.set_loss("LAD");
cfg.set_debug(true);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/iris/train.txt";
let test_file = "dataset/iris/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(4);
input_format.set_label_index(4);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save the model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load the model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
correct += 1;
} else {
wrong += 1;
};
println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
assert!(wrong <= 2);
}
More examples
examples/test-agaricus-lepiota.rs (line 27)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
let mut cfg = Config::new();
cfg.set_feature_size(22);
cfg.set_max_depth(3);
cfg.set_iterations(50);
cfg.set_shrinkage(0.1);
cfg.set_loss("LogLikelyhood");
cfg.set_debug(true);
//cfg.set_data_sample_ratio(0.8);
//cfg.set_feature_sample_ratio(0.5);
cfg.set_training_optimization_level(2);
// load data
let train_file = "dataset/agaricus-lepiota/train.txt";
let test_file = "dataset/agaricus-lepiota/test.txt";
let mut input_format = InputFormat::csv_format();
input_format.set_feature_size(22);
input_format.set_label_index(22);
let mut train_dv: DataVec =
load(train_file, input_format).expect("failed to load training data");
let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");
// train and save model
let mut gbdt = GBDT::new(&cfg);
gbdt.fit(&mut train_dv);
gbdt.save_model("gbdt.model")
.expect("failed to save the model");
// load model and do inference
let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
let predicted: PredVec = model.predict(&test_dv);
assert_eq!(predicted.len(), test_dv.len());
let mut correct = 0;
let mut wrong = 0;
for i in 0..predicted.len() {
let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
if (test_dv[i].label - label).abs() < 0.0001 {
correct += 1;
} else {
wrong += 1;
};
//println!("[{}] {} {}", i, test_dv[i].label, predicted[i]);
}
println!("correct: {}", correct);
println!("wrong: {}", wrong);
let auc = AUC(&test_dv, &predicted, test_dv.len());
println!("AUC: {}", auc);
use gbdt::fitness::almost_equal;
assert_eq!(wrong, 0);
assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-multi-softmax.rs (line 20)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn main() {
// Use xg.py in xgb-data/xgb_multi_softmax to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_multi_softmax/xgb.model "multi:softmax" xgb-data/xgb_multi_softmax/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softmax/gbdt.model", "multi:softmax")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_multi_softmax/dermatology.data.test";
let mut input_format = input::InputFormat::csv_format();
input_format.set_label_index(34);
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let (labels, _probs) = gbdt.predict_multiclass(&test_data, 6);
assert_eq!(labels.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_multi_softmax/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in labels.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (*value1 as ValueType - *value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-multi-softprob.rs (line 20)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
fn main() {
// Use xg.py in xgb-data/xgb_multi_softprob to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_multi_softprob/xgb.model "multi:softprob" xgb-data/xgb_multi_softprob/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softprob/gbdt.model", "multi:softprob")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_multi_softprob/dermatology.data.test";
let mut input_format = input::InputFormat::csv_format();
input_format.set_label_index(34);
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let (labels, probs) = gbdt.predict_multiclass(&test_data, 6);
assert_eq!(labels.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_multi_softprob/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let split_results: Vec<&str> = text.trim().split(',').collect();
for item in split_results.iter() {
let value: ValueType = item.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
}
let mut flat_probs = Vec::new();
for item in probs.iter() {
for value in item.iter() {
flat_probs.push(*value);
}
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in flat_probs.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
sourcepub fn set_delimeter(&mut self, delim: char)
pub fn set_delimeter(&mut self, delim: char)
Set for label index for CSV format.
§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
fmt.set_delimeter(' ');
Examples found in repository?
examples/test-multithreads.rs (line 23)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
fn main() {
let thread_num = 12;
let feature_size = 36;
let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
// load model
let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");
// load test data
let mut fmt = input::InputFormat::txt_format();
fmt.set_feature_size(feature_size);
fmt.set_delimeter(' ');
let mut test_data = input::load(test_file, fmt).unwrap();
// split test data to `thread_num` vectors.
let t1 = Instant::now();
let mut handles = vec![];
let mut test_data_vec = vec![];
let data_size = test_data.len();
let batch_size = (data_size - 1) / thread_num + 1;
for one_batch in test_data.chunks(batch_size) {
test_data_vec.push(one_batch.to_vec())
}
test_data.clear();
test_data.shrink_to_fit();
let t2 = Instant::now();
println!("split data: {:?}", t2 - t1);
// Create `thread_num` threads. Call gbdt::predict in parallel
let t1 = Instant::now();
let gbdt_arc = Arc::new(gbdt);
for data in test_data_vec.into_iter() {
let gbdt_clone = Arc::clone(&gbdt_arc);
let handle = thread::spawn(move || gbdt_clone.predict(&data));
handles.push(handle)
}
// collect results
let mut preds = Vec::with_capacity(data_size);
for handle in handles {
preds.append(&mut handle.join().unwrap());
}
let t2 = Instant::now();
println!("predict data: {:?}", t2 - t1);
assert_eq!(preds.len(), data_size);
}
More examples
examples/test-xgb-rank-pairwise.rs (line 20)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn main() {
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(47);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 21)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(36);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_linear/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-reg-logistic.rs (line 21)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logistic.rs (line 21)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_binary_logistic to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_binary_logistic/xgb.model "binary:logistic" xgb-data/xgb_binary_logistic/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logistic/gbdt.model", "binary:logistic")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_binary_logistic/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_binary_logistic/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logitraw.rs (line 21)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
fn main() {
// Use xg.py in xgb-data/xgb_binary_logitraw to generate a model and get prediction results from xgboost.
// Call this command to convert xgboost model:
// python examples/convert_xgboost.py xgb-data/xgb_binary_logitraw/xgb.model "binary:logitraw" xgb-data/xgb_binary_logitraw/gbdt.model
// load model
let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logitraw/gbdt.model", "binary:logitraw")
.expect("failed to load model");
// load test data
let test_file = "xgb-data/xgb_binary_logitraw/agaricus.txt.test";
let mut input_format = input::InputFormat::txt_format();
input_format.set_feature_size(126);
input_format.set_delimeter(' ');
let test_data = input::load(test_file, input_format).expect("failed to load test data");
// inference
println!("start prediction");
let predicted: PredVec = gbdt.predict(&test_data);
assert_eq!(predicted.len(), test_data.len());
// compare to xgboost prediction results
let predict_result = "xgb-data/xgb_binary_logitraw/pred.csv";
let mut xgb_results = Vec::new();
let file = File::open(predict_result).expect("failed to load pred.csv");
let reader = BufReader::new(file);
for line in reader.lines() {
let text = line.expect("failed to read data from pred.csv");
let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
xgb_results.push(value);
}
let mut max_diff: ValueType = -1.0;
for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
println!("{} {}", value1, value2);
let diff = (value1 - value2).abs();
if diff > max_diff {
max_diff = diff;
}
}
println!(
"Compared to results from xgboost, max error is: {:.10}",
max_diff
);
assert!(max_diff < 0.01);
}
Additional examples can be found in:
Trait Implementations§
source§impl Clone for InputFormat
impl Clone for InputFormat
source§fn clone(&self) -> InputFormat
fn clone(&self) -> InputFormat
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moresource§impl Debug for InputFormat
impl Debug for InputFormat
source§impl<'de> Deserialize<'de> for InputFormat
impl<'de> Deserialize<'de> for InputFormat
source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
source§impl Serialize for InputFormat
impl Serialize for InputFormat
impl Copy for InputFormat
Auto Trait Implementations§
impl RefUnwindSafe for InputFormat
impl Send for InputFormat
impl Sync for InputFormat
impl Unpin for InputFormat
impl UnwindSafe for InputFormat
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more