radiate_gp/regression/
data.rs

1use radiate::random_provider;
2
3#[derive(Debug, Clone, Default)]
4pub struct Row {
5    input: Vec<f32>,
6    output: Vec<f32>,
7}
8
9impl Row {
10    pub fn new(input: Vec<f32>, output: Vec<f32>) -> Self {
11        Row { input, output }
12    }
13
14    pub fn input(&self) -> &Vec<f32> {
15        &self.input
16    }
17
18    pub fn output(&self) -> &Vec<f32> {
19        &self.output
20    }
21}
22
23#[derive(Default, Clone)]
24pub struct DataSet {
25    rows: Vec<Row>,
26}
27
28impl DataSet {
29    pub fn new(inputs: Vec<Vec<f32>>, outputs: Vec<Vec<f32>>) -> Self {
30        let mut samples = Vec::new();
31        for (input, output) in inputs.into_iter().zip(outputs.into_iter()) {
32            samples.push(Row { input, output });
33        }
34        DataSet { rows: samples }
35    }
36
37    pub fn iter(&self) -> std::slice::Iter<Row> {
38        self.rows.iter()
39    }
40
41    pub fn len(&self) -> usize {
42        self.rows.len()
43    }
44
45    pub fn shuffle(mut self) -> Self {
46        random_provider::shuffle(&mut self.rows);
47        self
48    }
49
50    pub fn features(&self) -> Vec<Vec<f32>> {
51        self.rows.iter().map(|row| row.input.clone()).collect()
52    }
53
54    pub fn labels(&self) -> Vec<Vec<f32>> {
55        self.rows.iter().map(|row| row.output.clone()).collect()
56    }
57
58    pub fn split(self, ratio: f32) -> (Self, Self) {
59        let split = (self.len() as f32 * ratio).round() as usize;
60        let (left, right) = self.rows.split_at(split);
61
62        (
63            DataSet {
64                rows: left.to_vec(),
65            },
66            DataSet {
67                rows: right.to_vec(),
68            },
69        )
70    }
71
72    pub fn standardize(mut self) -> Self {
73        let mut means = vec![0.0; self.rows[0].input.len()];
74        let mut stds = vec![0.0; self.rows[0].input.len()];
75
76        for sample in self.rows.iter() {
77            for (i, &val) in sample.input.iter().enumerate() {
78                means[i] += val;
79            }
80        }
81
82        let n = self.len() as f32;
83        for mean in means.iter_mut() {
84            *mean /= n;
85        }
86
87        for sample in self.rows.iter() {
88            for (i, &val) in sample.input.iter().enumerate() {
89                stds[i] += (val - means[i]).powi(2);
90            }
91        }
92
93        for std in stds.iter_mut() {
94            *std = (*std / n).sqrt();
95        }
96
97        for sample in self.rows.iter_mut() {
98            for (i, val) in sample.input.iter_mut().enumerate() {
99                *val = (*val - means[i]) / stds[i];
100            }
101        }
102
103        self
104    }
105
106    pub fn normalize(mut self) -> Self {
107        let mut mins = vec![f32::MAX; self.rows[0].input.len()];
108        let mut maxs = vec![f32::MIN; self.rows[0].input.len()];
109
110        for sample in self.rows.iter() {
111            for (i, &val) in sample.input.iter().enumerate() {
112                if val < mins[i] {
113                    mins[i] = val;
114                }
115
116                if val > maxs[i] {
117                    maxs[i] = val;
118                }
119            }
120        }
121
122        for sample in self.rows.iter_mut() {
123            for (i, val) in sample.input.iter_mut().enumerate() {
124                *val = (*val - mins[i]) / (maxs[i] - mins[i]);
125            }
126        }
127
128        self
129    }
130}