1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use arff::dynamic::de::from_dataset;
use serde::de::DeserializeOwned;
use dataset::DataSet;
use measure_accumulator::MeasureAccumulator;
use procedures::Procedure;
pub struct SupervisedRegression {
pub(crate) id: String,
pub(crate) name: String,
pub(crate) source_data: DataSet,
pub(crate) estimation_procedure: Box<Procedure>,
}
impl SupervisedRegression {
pub fn id(&self) -> &str {
&self.id
}
pub fn name(&self) -> &str {
&self.name
}
pub fn run_static<X, Y, F, M>(&self, flow: F) -> M
where
F: Fn(&mut Iterator<Item = (&X, &Y)>, &mut Iterator<Item = &X>) -> Box<Iterator<Item = Y>>,
X: DeserializeOwned,
Y: DeserializeOwned,
M: MeasureAccumulator<Y>,
{
let (dx, dy) = self.source_data
.clone_split()
.expect("Supervised Regression requires a target column");
let x: Vec<X> = from_dataset(&dx).unwrap();
let y: Vec<Y> = from_dataset(&dy).unwrap();
let mut measure = M::new();
for fold in self.estimation_procedure.iter() {
let mut train = fold.trainset.iter().map(|&i| (&x[i], &y[i]));
let mut test = fold.testset.iter().map(|&i| &x[i]);
let predictit = flow(&mut train, &mut test);
for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
measure.update_one(known, &pred);
}
}
measure
}
pub fn run<X, Y, F, M>(&self, flow: F) -> M
where
F: Fn(&mut Iterator<Item = (&[X], &Y)>, &mut Iterator<Item = &[X]>)
-> Box<Iterator<Item = Y>>,
X: DeserializeOwned,
Y: DeserializeOwned,
M: MeasureAccumulator<Y>,
{
let (dx, dy) = self.source_data
.clone_split()
.expect("Supervised Regression requires a target column");
let x: Vec<X> = from_dataset(&dx).unwrap();
let y: Vec<Y> = from_dataset(&dy).unwrap();
let mut measure = M::new();
for fold in self.estimation_procedure.iter() {
let mut train = fold.trainset
.iter()
.map(|&i| (&x[i * dx.n_cols()..(i + 1) * dx.n_cols()], &y[i]));
let mut test = fold.testset
.iter()
.map(|&i| &x[i * dx.n_cols()..(i + 1) * dx.n_cols()]);
let predictit = flow(&mut train, &mut test);
for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
measure.update_one(known, &pred);
}
}
measure
}
}