mod tsne;
use std::fs::File;
use std::time::Instant;
use tsne::*;
pub fn run(
x: &mut [f64],
n: usize,
d: usize,
y: &mut [f64],
no_dims: usize,
perplexity: f64,
theta: f64,
skip_random_init: bool,
max_iter: u64,
stop_lying_iter: u64,
mom_switch_iter: u64,
) {
if n - 1 < 3 * perplexity as usize {
panic!(
"Perplexity: {} too large for the number of data points!\n",
perplexity
);
}
print!("Using perplexity: {} \n", perplexity,);
let exact: bool = if theta == 0.0 { true } else { false };
let mut momentum: f64 = 0.5;
const FINAL_MOMENTUM: f64 = 0.8;
const ETA: f64 = 200.0;
let mut start: Instant;
let mut end: Instant;
let mut dy: Vec<f64> = vec![0.0; n * no_dims];
let mut uy: Vec<f64> = vec![0.0; n * no_dims];
let mut gains: Vec<f64> = vec![1.0; n * no_dims];
print!("Computing input similarities...\n");
start = Instant::now();
zero_mean(x, n, d);
let mut max_x: f64 = 0.0;
for i in 0..(n * d) {
if x[i].abs() > max_x {
max_x = x[i].abs();
}
}
for i in 0..(n * d) {
x[i] /= max_x;
}
let mut p: Vec<f64> = Vec::new();
let mut row_p: Vec<usize> = Vec::new();
let mut col_p: Vec<usize> = Vec::new();
let mut val_p: Vec<f64> = Vec::new();
if exact {
print!("Executing exact version.\n");
p = vec![0.0; n * n];
compute_fixed_gaussian_perplexity(x, n, d, &mut p, perplexity);
print!("Symmetrizing matrix...\n");
let mut nn: usize = 0;
for _n in 0..n {
let mut mn: usize = (_n + 1) * n;
for m in (_n + 1)..n {
p[nn + m] += p[mn + _n];
p[mn + _n] += p[nn + m];
mn += n;
}
nn += n;
}
let mut sum_p: f64 = 0.0;
for i in 0..(n * n) {
sum_p += p[i];
}
for i in 0..(n * n) {
p[i] /= sum_p;
}
} else {
print!("Executing Barnes-Hut version.\n");
let k: usize = (3.0 * perplexity) as usize;
row_p = vec![0; n + 1];
col_p = vec![0; n * k];
val_p = vec![0.0; n * k];
compute_gaussian_perplexity(x, n, d, &mut row_p, &mut col_p, &mut val_p, perplexity, k);
print!("Symmetrizing matrix...\n");
symmetrize_matrix(&mut row_p, &mut col_p, &mut val_p, n);
let mut sum_p: f64 = 0.0;
for i in 0..row_p[n] {
sum_p += val_p[i];
}
for i in 0..row_p[n] {
val_p[i] /= sum_p;
}
}
end = Instant::now();
if exact {
for i in 0..(n * n) {
p[i] *= 12.0;
}
} else {
for i in 0..row_p[n] {
val_p[i] *= 12.0;
}
}
if !skip_random_init {
print!("Sampling random solution...\n");
for i in 0..(n * no_dims) {
y[i] = randn() * 0.0001;
}
}
if exact {
print!(
"Input similarities computed in {} seconds.\nLearning embedding...\n",
end.duration_since(start).as_secs_f32()
);
} else {
print!(
"Input similarities computed in {} seconds (sparsity = {}).\nLearning embedding...\n",
end.duration_since(start).as_secs_f32(),
row_p[n] as f64 / (n * n) as f64
);
}
let start_fitting: Instant = Instant::now();
for iter in 0..max_iter {
start = Instant::now();
if exact {
compute_gradient(&mut p, y, n, no_dims, &mut dy);
} else {
compute_gradient_approx(
&mut row_p, &mut col_p, &mut val_p, y, n, no_dims, &mut dy, theta,
);
}
for i in 0..(n * no_dims) {
gains[i] = if dy[i].signum() != uy[i].signum() {
gains[i] + 0.2
} else {
gains[i] * 0.8
}
}
for i in 0..(n * no_dims) {
if gains[i] < 0.01 {
gains[i] = 0.01;
}
}
for i in 0..(n * no_dims) {
uy[i] = momentum * uy[i] - ETA * gains[i] * dy[i];
}
for i in 0..(n * no_dims) {
y[i] += uy[i];
}
zero_mean(y, n, no_dims);
if iter == stop_lying_iter {
if exact {
for i in 0..(n * n) {
p[i] /= 12.0;
}
} else {
for i in 0..row_p[n] {
val_p[i] /= 12.0;
}
}
}
if iter == mom_switch_iter {
momentum = FINAL_MOMENTUM;
}
if iter == 0 {
let c: f64;
if exact {
c = evaluate_error(&mut p, y, n, no_dims);
} else {
c = evaluate_error_approx(&mut row_p, &mut col_p, &mut val_p, y, n, no_dims, theta)
}
print!("Iteration 0, error is {}\n", c);
}
if iter > 0 && (iter % 50 == 0 || iter == max_iter - 1) {
end = Instant::now();
let c: f64;
if exact {
c = evaluate_error(&mut p, y, n, no_dims);
} else {
c = evaluate_error_approx(&mut row_p, &mut col_p, &mut val_p, y, n, no_dims, theta)
}
print!(
"Iteration {}: error is {} (50 iterations in {} seconds)\n",
iter,
c,
end.duration_since(start).as_secs_f32()
);
}
}
print!(
"Fitting performed in {} seconds.\n",
end.duration_since(start_fitting).as_secs_f32()
)
}
pub fn load_csv(
file_path: &str,
has_headers: bool,
has_target: bool,
target_hd: &str,
target_col: usize,
) -> (Vec<f64>, Option<Vec<String>>) {
let mut data: Vec<f64> = Vec::new();
let mut labels: Vec<String>;
let file = match File::open(file_path) {
Ok(file) => file,
Err(e) => panic!("Couldn't open the .csv file: {}", e),
};
let mut rdr = csv::ReaderBuilder::new()
.has_headers(!has_headers)
.from_reader(file);
let mut tc: usize = 0;
if has_target {
labels = Vec::new();
if has_headers {
let headers: csv::StringRecord = match rdr.records().next() {
Some(hds) => match hds {
Ok(hds) => hds,
Err(e) => panic!("An error occurred while parsing headers: {}", e),
},
None => panic!("Error: headers not found."),
};
for i in 0..headers.len() {
let header: String = headers.get(i).unwrap().parse().unwrap();
println!("{}", header);
if header == target_hd {
tc = i;
}
}
} else {
tc = target_col;
}
for result in rdr.records() {
let record = match result {
Ok(res) => res,
Err(e) => panic!("Error while parsing records, {}", e),
};
for i in 0..record.len() {
if i != tc {
data.push(record.get(i).unwrap().parse().unwrap())
} else {
labels.push(record.get(i).unwrap().to_string());
}
}
}
(data, Some(labels))
} else {
for result in rdr.records() {
let record = match result {
Ok(res) => res,
Err(e) => panic!("Error while parsing records, {}", e),
};
for i in 0..record.len() {
data.push(record.get(i).unwrap().parse().unwrap())
}
}
(data, None)
}
}
pub fn wite_csv(file_path: &str, embedding: Vec<f64>, dims: usize) {
let mut wtr: csv::Writer<File> = match csv::Writer::from_path(file_path) {
Ok(writer) => writer,
Err(e) => panic!(
"An error has occurred during the opening of the file : {}",
e
),
};
let to_write: Vec<String> = embedding
.iter()
.map(|el| el.to_string())
.collect::<Vec<String>>();
match dims {
2 => match wtr.write_record(&["x", "y"]) {
Ok(_) => (),
Err(e) => panic!("Error during write: {}", e),
},
3 => match wtr.write_record(&["x", "y", "z"]) {
Ok(_) => (),
Err(e) => panic!("Error during write: {}", e),
},
_ => println!(
"Found more than three or less than two dimensions, {}.csv won't have an header.",
file_path
),
}
for chunk in to_write.chunks(dims) {
match wtr.write_record(chunk) {
Ok(_) => (),
Err(e) => panic!("Error during write: {}", e),
}
}
match wtr.flush() {
Ok(_) => (),
Err(e) => panic!("Couldn't write file: {}", e),
}
}
#[cfg(test)]
mod tests {
#[test]
#[cfg(not(tarpaulin_include))]
fn it_works() {
let n = 8;
let d = 4;
let theta = 0.5;
let perplexity = 1.0;
let max_iter = 2000;
let no_dims = 2;
let (mut data, labels) = match super::load_csv("data.csv", true, true, "target", 0) {
(data, None) => panic!("This is not supposed to happen!"),
(data, Some(labels)) => (data, labels),
};
let mut y: Vec<f64> = vec![0.0; n * no_dims];
super::run(
&mut data, n, d, &mut y, no_dims, perplexity, theta, false, max_iter, 250, 250,
);
super::run(
&mut data, n, d, &mut y, no_dims, perplexity, 0.0, false, max_iter, 250, 250,
);
super::wite_csv("embedding.csv", y, 2);
}
}