Struct gbdt::input::InputFormat

source ·
pub struct InputFormat {
    pub ftype: FileFormat,
    pub header: bool,
    pub label_idx: usize,
    pub enable_unknown_value: bool,
    pub delimeter: char,
    pub feature_size: usize,
}
Expand description

The input file format struct.

Fields§

§ftype: FileFormat

Data file format

§header: bool

Set if ftype is set to FileFormat. Indicates whether the csv has header.

§label_idx: usize

Set if ftype is set to FileFormat. Indicates which colume is the data label. (default = 0)

§enable_unknown_value: bool

Set if ftype is set to FileFormat. Indicates if we allow unknown value in data file or not.

§delimeter: char

Delimeter of the data file.

§feature_size: usize

Set if ftype is set to FileFormat. Indicates the total feature size.

Implementations§

source§

impl InputFormat

source

pub fn csv_format() -> InputFormat

Return a default CSV input format.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
println!("{}", fmt.to_string());
Examples found in repository?
examples/test-iris.rs (line 23)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(4);
    cfg.set_max_depth(4);
    cfg.set_iterations(100);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LAD");
    cfg.set_debug(true);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/iris/train.txt";
    let test_file = "dataset/iris/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(4);
    input_format.set_label_index(4);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save the model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load the model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
            correct += 1;
        } else {
            wrong += 1;
        };
        println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    assert!(wrong <= 2);
}
More examples
Hide additional examples
examples/test-agaricus-lepiota.rs (line 25)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(22);
    cfg.set_max_depth(3);
    cfg.set_iterations(50);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LogLikelyhood");
    cfg.set_debug(true);
    //cfg.set_data_sample_ratio(0.8);
    //cfg.set_feature_sample_ratio(0.5);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/agaricus-lepiota/train.txt";
    let test_file = "dataset/agaricus-lepiota/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(22);
    input_format.set_label_index(22);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
        if (test_dv[i].label - label).abs() < 0.0001 {
            correct += 1;
        } else {
            wrong += 1;
        };
        //println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    let auc = AUC(&test_dv, &predicted, test_dv.len());
    println!("AUC: {}", auc);

    use gbdt::fitness::almost_equal;
    assert_eq!(wrong, 0);
    assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-multi-softmax.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn main() {
    // Use xg.py in xgb-data/xgb_multi_softmax to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_multi_softmax/xgb.model "multi:softmax" xgb-data/xgb_multi_softmax/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softmax/gbdt.model", "multi:softmax")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_multi_softmax/dermatology.data.test";
    let mut input_format = input::InputFormat::csv_format();
    input_format.set_label_index(34);
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let (labels, _probs) = gbdt.predict_multiclass(&test_data, 6);
    assert_eq!(labels.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_multi_softmax/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in labels.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (*value1 as ValueType - *value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-multi-softprob.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
fn main() {
    // Use xg.py in xgb-data/xgb_multi_softprob to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_multi_softprob/xgb.model "multi:softprob" xgb-data/xgb_multi_softprob/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softprob/gbdt.model", "multi:softprob")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_multi_softprob/dermatology.data.test";
    let mut input_format = input::InputFormat::csv_format();
    input_format.set_label_index(34);
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let (labels, probs) = gbdt.predict_multiclass(&test_data, 6);
    assert_eq!(labels.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_multi_softprob/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let split_results: Vec<&str> = text.trim().split(',').collect();
        for item in split_results.iter() {
            let value: ValueType = item.parse().expect("failed to parse data from pred.csv");
            xgb_results.push(value);
        }
    }

    let mut flat_probs = Vec::new();
    for item in probs.iter() {
        for value in item.iter() {
            flat_probs.push(*value);
        }
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in flat_probs.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
source

pub fn txt_format() -> InputFormat

Return a default LibSVM input format.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
println!("{}", fmt.to_string());
Examples found in repository?
examples/test-multithreads.rs (line 21)
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
fn main() {
    let thread_num = 12;
    let feature_size = 36;
    let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";

    // load model
    let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");

    // load test data
    let mut fmt = input::InputFormat::txt_format();
    fmt.set_feature_size(feature_size);
    fmt.set_delimeter(' ');
    let mut test_data = input::load(test_file, fmt).unwrap();

    // split test data to `thread_num` vectors.
    let t1 = Instant::now();
    let mut handles = vec![];
    let mut test_data_vec = vec![];
    let data_size = test_data.len();
    let batch_size = (data_size - 1) / thread_num + 1;
    for one_batch in test_data.chunks(batch_size) {
        test_data_vec.push(one_batch.to_vec())
    }

    test_data.clear();
    test_data.shrink_to_fit();
    let t2 = Instant::now();
    println!("split data: {:?}", t2 - t1);

    // Create `thread_num` threads. Call gbdt::predict in parallel
    let t1 = Instant::now();
    let gbdt_arc = Arc::new(gbdt);
    for data in test_data_vec.into_iter() {
        let gbdt_clone = Arc::clone(&gbdt_arc);
        let handle = thread::spawn(move || gbdt_clone.predict(&data));
        handles.push(handle)
    }

    // collect results
    let mut preds = Vec::with_capacity(data_size);
    for handle in handles {
        preds.append(&mut handle.join().unwrap());
    }

    let t2 = Instant::now();
    println!("predict data: {:?}", t2 - t1);
    assert_eq!(preds.len(), data_size);
}
More examples
Hide additional examples
examples/test-xgb-rank-pairwise.rs (line 18)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn main() {
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(47);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(36);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_linear/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-reg-logistic.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logistic.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_binary_logistic to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_binary_logistic/xgb.model "binary:logistic" xgb-data/xgb_binary_logistic/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logistic/gbdt.model", "binary:logistic")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_binary_logistic/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_binary_logistic/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logitraw.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_binary_logitraw to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_binary_logitraw/xgb.model "binary:logitraw" xgb-data/xgb_binary_logitraw/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logitraw/gbdt.model", "binary:logitraw")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_binary_logitraw/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_binary_logitraw/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
source

pub fn to_string(&self) -> String

Transform the input format to human readable string.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
println!("{}", fmt.to_string());
source

pub fn set_feature_size(&mut self, size: usize)

Set feature size for the LibSVM input format.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
fmt.set_feature_size(126); // the total feature size
Examples found in repository?
examples/test-iris.rs (line 24)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(4);
    cfg.set_max_depth(4);
    cfg.set_iterations(100);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LAD");
    cfg.set_debug(true);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/iris/train.txt";
    let test_file = "dataset/iris/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(4);
    input_format.set_label_index(4);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save the model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load the model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
            correct += 1;
        } else {
            wrong += 1;
        };
        println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    assert!(wrong <= 2);
}
More examples
Hide additional examples
examples/test-multithreads.rs (line 22)
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
fn main() {
    let thread_num = 12;
    let feature_size = 36;
    let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";

    // load model
    let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");

    // load test data
    let mut fmt = input::InputFormat::txt_format();
    fmt.set_feature_size(feature_size);
    fmt.set_delimeter(' ');
    let mut test_data = input::load(test_file, fmt).unwrap();

    // split test data to `thread_num` vectors.
    let t1 = Instant::now();
    let mut handles = vec![];
    let mut test_data_vec = vec![];
    let data_size = test_data.len();
    let batch_size = (data_size - 1) / thread_num + 1;
    for one_batch in test_data.chunks(batch_size) {
        test_data_vec.push(one_batch.to_vec())
    }

    test_data.clear();
    test_data.shrink_to_fit();
    let t2 = Instant::now();
    println!("split data: {:?}", t2 - t1);

    // Create `thread_num` threads. Call gbdt::predict in parallel
    let t1 = Instant::now();
    let gbdt_arc = Arc::new(gbdt);
    for data in test_data_vec.into_iter() {
        let gbdt_clone = Arc::clone(&gbdt_arc);
        let handle = thread::spawn(move || gbdt_clone.predict(&data));
        handles.push(handle)
    }

    // collect results
    let mut preds = Vec::with_capacity(data_size);
    for handle in handles {
        preds.append(&mut handle.join().unwrap());
    }

    let t2 = Instant::now();
    println!("predict data: {:?}", t2 - t1);
    assert_eq!(preds.len(), data_size);
}
examples/test-xgb-rank-pairwise.rs (line 19)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn main() {
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(47);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 20)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(36);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_linear/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-agaricus-lepiota.rs (line 26)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(22);
    cfg.set_max_depth(3);
    cfg.set_iterations(50);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LogLikelyhood");
    cfg.set_debug(true);
    //cfg.set_data_sample_ratio(0.8);
    //cfg.set_feature_sample_ratio(0.5);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/agaricus-lepiota/train.txt";
    let test_file = "dataset/agaricus-lepiota/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(22);
    input_format.set_label_index(22);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
        if (test_dv[i].label - label).abs() < 0.0001 {
            correct += 1;
        } else {
            wrong += 1;
        };
        //println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    let auc = AUC(&test_dv, &predicted, test_dv.len());
    println!("AUC: {}", auc);

    use gbdt::fitness::almost_equal;
    assert_eq!(wrong, 0);
    assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-reg-logistic.rs (line 20)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
source

pub fn set_label_index(&mut self, idx: usize)

Set for label index for CSV format.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::csv_format();
fmt.set_label_index(34);
Examples found in repository?
examples/test-iris.rs (line 25)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(4);
    cfg.set_max_depth(4);
    cfg.set_iterations(100);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LAD");
    cfg.set_debug(true);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/iris/train.txt";
    let test_file = "dataset/iris/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(4);
    input_format.set_label_index(4);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save the model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load the model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        if almost_equal_thrs(test_dv[i].label, predicted[i], 0.0001) {
            correct += 1;
        } else {
            wrong += 1;
        };
        println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    assert!(wrong <= 2);
}
More examples
Hide additional examples
examples/test-agaricus-lepiota.rs (line 27)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
fn main() {
    let mut cfg = Config::new();
    cfg.set_feature_size(22);
    cfg.set_max_depth(3);
    cfg.set_iterations(50);
    cfg.set_shrinkage(0.1);
    cfg.set_loss("LogLikelyhood");
    cfg.set_debug(true);
    //cfg.set_data_sample_ratio(0.8);
    //cfg.set_feature_sample_ratio(0.5);
    cfg.set_training_optimization_level(2);

    // load data
    let train_file = "dataset/agaricus-lepiota/train.txt";
    let test_file = "dataset/agaricus-lepiota/test.txt";

    let mut input_format = InputFormat::csv_format();
    input_format.set_feature_size(22);
    input_format.set_label_index(22);
    let mut train_dv: DataVec =
        load(train_file, input_format).expect("failed to load training data");
    let test_dv: DataVec = load(test_file, input_format).expect("failed to load test data");

    // train and save model
    let mut gbdt = GBDT::new(&cfg);
    gbdt.fit(&mut train_dv);
    gbdt.save_model("gbdt.model")
        .expect("failed to save the model");

    // load model and do inference
    let model = GBDT::load_model("gbdt.model").expect("failed to load the model");
    let predicted: PredVec = model.predict(&test_dv);

    assert_eq!(predicted.len(), test_dv.len());
    let mut correct = 0;
    let mut wrong = 0;
    for i in 0..predicted.len() {
        let label = if predicted[i] > 0.5 { 1.0 } else { -1.0 };
        if (test_dv[i].label - label).abs() < 0.0001 {
            correct += 1;
        } else {
            wrong += 1;
        };
        //println!("[{}]  {}  {}", i, test_dv[i].label, predicted[i]);
    }

    println!("correct: {}", correct);
    println!("wrong:   {}", wrong);

    let auc = AUC(&test_dv, &predicted, test_dv.len());
    println!("AUC: {}", auc);

    use gbdt::fitness::almost_equal;
    assert_eq!(wrong, 0);
    assert!(almost_equal(auc, 1.0));
}
examples/test-xgb-multi-softmax.rs (line 20)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn main() {
    // Use xg.py in xgb-data/xgb_multi_softmax to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_multi_softmax/xgb.model "multi:softmax" xgb-data/xgb_multi_softmax/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softmax/gbdt.model", "multi:softmax")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_multi_softmax/dermatology.data.test";
    let mut input_format = input::InputFormat::csv_format();
    input_format.set_label_index(34);
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let (labels, _probs) = gbdt.predict_multiclass(&test_data, 6);
    assert_eq!(labels.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_multi_softmax/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in labels.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (*value1 as ValueType - *value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-multi-softprob.rs (line 20)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
fn main() {
    // Use xg.py in xgb-data/xgb_multi_softprob to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_multi_softprob/xgb.model "multi:softprob" xgb-data/xgb_multi_softprob/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_multi_softprob/gbdt.model", "multi:softprob")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_multi_softprob/dermatology.data.test";
    let mut input_format = input::InputFormat::csv_format();
    input_format.set_label_index(34);
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let (labels, probs) = gbdt.predict_multiclass(&test_data, 6);
    assert_eq!(labels.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_multi_softprob/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let split_results: Vec<&str> = text.trim().split(',').collect();
        for item in split_results.iter() {
            let value: ValueType = item.parse().expect("failed to parse data from pred.csv");
            xgb_results.push(value);
        }
    }

    let mut flat_probs = Vec::new();
    for item in probs.iter() {
        for value in item.iter() {
            flat_probs.push(*value);
        }
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in flat_probs.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
source

pub fn set_delimeter(&mut self, delim: char)

Set for label index for CSV format.

§Example
use gbdt::input::InputFormat;
let mut fmt = InputFormat::txt_format();
fmt.set_delimeter(' ');
Examples found in repository?
examples/test-multithreads.rs (line 23)
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
fn main() {
    let thread_num = 12;
    let feature_size = 36;
    let model_path = "xgb-data/xgb_reg_linear/gbdt.model";
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";

    // load model
    let gbdt = GBDT::from_xgboost_dump(model_path, "reg:linear").expect("faild to load model");

    // load test data
    let mut fmt = input::InputFormat::txt_format();
    fmt.set_feature_size(feature_size);
    fmt.set_delimeter(' ');
    let mut test_data = input::load(test_file, fmt).unwrap();

    // split test data to `thread_num` vectors.
    let t1 = Instant::now();
    let mut handles = vec![];
    let mut test_data_vec = vec![];
    let data_size = test_data.len();
    let batch_size = (data_size - 1) / thread_num + 1;
    for one_batch in test_data.chunks(batch_size) {
        test_data_vec.push(one_batch.to_vec())
    }

    test_data.clear();
    test_data.shrink_to_fit();
    let t2 = Instant::now();
    println!("split data: {:?}", t2 - t1);

    // Create `thread_num` threads. Call gbdt::predict in parallel
    let t1 = Instant::now();
    let gbdt_arc = Arc::new(gbdt);
    for data in test_data_vec.into_iter() {
        let gbdt_clone = Arc::clone(&gbdt_arc);
        let handle = thread::spawn(move || gbdt_clone.predict(&data));
        handles.push(handle)
    }

    // collect results
    let mut preds = Vec::with_capacity(data_size);
    for handle in handles {
        preds.append(&mut handle.join().unwrap());
    }

    let t2 = Instant::now();
    println!("predict data: {:?}", t2 - t1);
    assert_eq!(preds.len(), data_size);
}
More examples
Hide additional examples
examples/test-xgb-rank-pairwise.rs (line 20)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn main() {
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_rank_pairwise/xgb.model "rank:pairwise" xgb-data/xgb_rank_pairwise/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_rank_pairwise/gbdt.model", "rank:pairwise")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_rank_pairwise/mq2008.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(47);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_rank_pairwise/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-reg-linear.rs (line 21)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_linear to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_linear/xgb.model "reg:linear" xgb-data/xgb_reg_linear/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_linear/gbdt.model", "reg:linear")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_linear/machine.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(36);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_linear/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-reg-logistic.rs (line 21)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_reg_logistic to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_reg_logistic/xgb.model "reg:logistic" xgb-data/xgb_reg_logistic/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_reg_logistic/gbdt.model", "reg:logistic")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_reg_logistic/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_reg_logistic/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logistic.rs (line 21)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_binary_logistic to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_binary_logistic/xgb.model "binary:logistic" xgb-data/xgb_binary_logistic/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logistic/gbdt.model", "binary:logistic")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_binary_logistic/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_binary_logistic/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}
examples/test-xgb-binary-logitraw.rs (line 21)
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() {
    // Use xg.py in xgb-data/xgb_binary_logitraw to generate a model and get prediction results from xgboost.
    // Call this command to convert xgboost model:
    // python examples/convert_xgboost.py xgb-data/xgb_binary_logitraw/xgb.model "binary:logitraw" xgb-data/xgb_binary_logitraw/gbdt.model
    // load model
    let gbdt = GBDT::from_xgboost_dump("xgb-data/xgb_binary_logitraw/gbdt.model", "binary:logitraw")
        .expect("failed to load model");

    // load test data
    let test_file = "xgb-data/xgb_binary_logitraw/agaricus.txt.test";
    let mut input_format = input::InputFormat::txt_format();
    input_format.set_feature_size(126);
    input_format.set_delimeter(' ');
    let test_data = input::load(test_file, input_format).expect("failed to load test data");

    // inference
    println!("start prediction");
    let predicted: PredVec = gbdt.predict(&test_data);
    assert_eq!(predicted.len(), test_data.len());

    // compare to xgboost prediction results
    let predict_result = "xgb-data/xgb_binary_logitraw/pred.csv";

    let mut xgb_results = Vec::new();
    let file = File::open(predict_result).expect("failed to load pred.csv");
    let reader = BufReader::new(file);
    for line in reader.lines() {
        let text = line.expect("failed to read data from pred.csv");
        let value: ValueType = text.parse().expect("failed to parse data from pred.csv");
        xgb_results.push(value);
    }

    let mut max_diff: ValueType = -1.0;
    for (value1, value2) in predicted.iter().zip(xgb_results.iter()) {
        println!("{} {}", value1, value2);
        let diff = (value1 - value2).abs();
        if diff > max_diff {
            max_diff = diff;
        }
    }

    println!(
        "Compared to results from xgboost, max error is: {:.10}",
        max_diff
    );
    assert!(max_diff < 0.01);
}

Trait Implementations§

source§

impl Clone for InputFormat

source§

fn clone(&self) -> InputFormat

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for InputFormat

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl<'de> Deserialize<'de> for InputFormat

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl Serialize for InputFormat

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
source§

impl Copy for InputFormat

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,