1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/// A struct to store data for the neural network
#[derive(Debug, Clone)]
pub struct DataValue {
/// The input value for the neural network
pub input: Vec<f64>,
/// The expected output for that input value
pub expected_output: Vec<f64>,
}
impl DataValue {
#[cfg(feature = "idx")]
/// Create a Vec<DataValue> from 2 idx files. One for the inputs, and one for the labels.
///
/// Expectations:
/// The data is stored as vec of the MSI (Most significant index) in the idx. The rest will be flattened
/// The flattened label vec will have the same length as the data one.
///
/// Arguments:
/// * `input_idx` - The IDX file for the input data
/// * `label_idx` - The IDX file for the labels
/// * `normalize` - An optional f64 to normalize the values by
pub fn from_data_label_idx(input_idx: &mut (impl std::io::Read + std::io::Seek), label_idx: &mut (impl std::io::Read + std::io::Seek), normalize: Option<f64>) -> crate::error::Result<Vec<DataValue>> {
use idx_lib::*;
// Fun chained iterator shenanigans
let data = read_idx(input_idx)?;
let labels: Vec<usize> = read_idx(label_idx)?
// Just here to assert that labels is 1d
.flatten()
// Convert to vec
.to_vec()
// Convert to f64 and back
// (f64 is the only thing we can always 100% convert to. Now we convert them back)
.iter()
.map(|x| x.cast_as::<f64>().unwrap() as usize)
.collect();
let output_length = *labels.iter().max().unwrap();
let out_vec = vec![0.0; output_length+1];
let data_parsed = data
// Iterate through all of the actual data values
.outer_iter()
// Flatten each of them (to prep them to be inputs) and convert to a Vec
.map(|x| x.flatten().to_vec())
// Convert to f64s
.map(|x: Vec<_>| x.iter().map(|y| y.cast_as::<f64>().unwrap() / match normalize {
None => 1.0,
Some(val) => val
}).collect::<Vec<_>>())
// Combine them with the labels
.zip(labels.iter())
// Convert to DataValues
.map(|(ip, lab)| {
let mut out = out_vec.clone();
out[*lab] = 1.0;
DataValue {
input: ip,
expected_output: out
}
})
.collect::<Vec<_>>();
//let data_parsed = data_parsed[0].flatten();
//println!("{data_parsed:#?}");
Ok(data_parsed)
}
}