radiate_gp/regression/
data.rs1use radiate::random_provider;
2
3#[derive(Debug, Clone, Default)]
4pub struct Row {
5 input: Vec<f32>,
6 output: Vec<f32>,
7}
8
9impl Row {
10 pub fn new(input: Vec<f32>, output: Vec<f32>) -> Self {
11 Row { input, output }
12 }
13
14 pub fn input(&self) -> &Vec<f32> {
15 &self.input
16 }
17
18 pub fn output(&self) -> &Vec<f32> {
19 &self.output
20 }
21}
22
23#[derive(Default, Clone)]
24pub struct DataSet {
25 rows: Vec<Row>,
26}
27
28impl DataSet {
29 pub fn new(inputs: Vec<Vec<f32>>, outputs: Vec<Vec<f32>>) -> Self {
30 let mut samples = Vec::new();
31 for (input, output) in inputs.into_iter().zip(outputs.into_iter()) {
32 samples.push(Row { input, output });
33 }
34 DataSet { rows: samples }
35 }
36
37 pub fn iter(&self) -> std::slice::Iter<Row> {
38 self.rows.iter()
39 }
40
41 pub fn len(&self) -> usize {
42 self.rows.len()
43 }
44
45 pub fn shuffle(mut self) -> Self {
46 random_provider::shuffle(&mut self.rows);
47 self
48 }
49
50 pub fn features(&self) -> Vec<Vec<f32>> {
51 self.rows.iter().map(|row| row.input.clone()).collect()
52 }
53
54 pub fn labels(&self) -> Vec<Vec<f32>> {
55 self.rows.iter().map(|row| row.output.clone()).collect()
56 }
57
58 pub fn split(self, ratio: f32) -> (Self, Self) {
59 let split = (self.len() as f32 * ratio).round() as usize;
60 let (left, right) = self.rows.split_at(split);
61
62 (
63 DataSet {
64 rows: left.to_vec(),
65 },
66 DataSet {
67 rows: right.to_vec(),
68 },
69 )
70 }
71
72 pub fn standardize(mut self) -> Self {
73 let mut means = vec![0.0; self.rows[0].input.len()];
74 let mut stds = vec![0.0; self.rows[0].input.len()];
75
76 for sample in self.rows.iter() {
77 for (i, &val) in sample.input.iter().enumerate() {
78 means[i] += val;
79 }
80 }
81
82 let n = self.len() as f32;
83 for mean in means.iter_mut() {
84 *mean /= n;
85 }
86
87 for sample in self.rows.iter() {
88 for (i, &val) in sample.input.iter().enumerate() {
89 stds[i] += (val - means[i]).powi(2);
90 }
91 }
92
93 for std in stds.iter_mut() {
94 *std = (*std / n).sqrt();
95 }
96
97 for sample in self.rows.iter_mut() {
98 for (i, val) in sample.input.iter_mut().enumerate() {
99 *val = (*val - means[i]) / stds[i];
100 }
101 }
102
103 self
104 }
105
106 pub fn normalize(mut self) -> Self {
107 let mut mins = vec![f32::MAX; self.rows[0].input.len()];
108 let mut maxs = vec![f32::MIN; self.rows[0].input.len()];
109
110 for sample in self.rows.iter() {
111 for (i, &val) in sample.input.iter().enumerate() {
112 if val < mins[i] {
113 mins[i] = val;
114 }
115
116 if val > maxs[i] {
117 maxs[i] = val;
118 }
119 }
120 }
121
122 for sample in self.rows.iter_mut() {
123 for (i, val) in sample.input.iter_mut().enumerate() {
124 *val = (*val - mins[i]) / (maxs[i] - mins[i]);
125 }
126 }
127
128 self
129 }
130}