extern crate ross;
extern crate getopts;
extern crate rand;
use std::fs::File;
use std::io::BufReader;
use std::io::BufRead;
use std::collections::HashMap;
use std::env;
use std::f32;
use ross::ross_base_options;
const TEN: f32 = 10.0;
const READ_SEPARATOR :char = '~';
fn main(){
let args: Vec<String> = env::args().collect();
let opts = ross_base_options();
let matches = opts.parse(&args[1..]).expect("ERROR: could not parse parameters");
if matches.opt_present("h") {
println!("Emma: collapse identical reads into single reads, recalculating quality values. If paired end, then each set of reads must be identical to be collapsed. Rachel's daughter Emma was played by twins, essentially collapsing two individuals into one character!");
println!("{}",opts.usage(&opts.short_usage(&args[0])));
std::process::exit(0);
}
let paired_end = matches.opt_present("paired-end");
let mut seq_count :HashMap<String, u32> =HashMap::new();
let mut seq_error_rate :HashMap<String, Vec<f32>> = HashMap::new();
let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
let mut lines = my_buffer.lines();
while let Some(_) = lines.next() {
let current_seq1 = lines.next()
.expect("ERROR getting seq line")
.expect("ERROR parsing seq line");
lines.next()
.expect("ERROR getting plus line")
.expect("ERROR parsing plus line");
let current_qual1= lines.next()
.expect("ERROR getting qual line")
.expect("ERROR parsing qual line");
let mut seq_key = current_seq1.clone();
let mut combined_qual = current_qual1.clone();
if paired_end {
lines.next()
.expect("ERROR getting id line")
.expect("ERROR parsing id line");
let current_seq2 = lines.next()
.expect("ERROR getting seq line")
.expect("ERROR parsing seq line");
lines.next()
.expect("ERROR getting plus line")
.expect("ERROR parsing plus line");
let current_qual2= lines.next()
.expect("ERROR getting qual line")
.expect("ERROR parsing qual line");
seq_key.push(READ_SEPARATOR);
seq_key.push_str(¤t_seq2);
combined_qual.push(READ_SEPARATOR);
combined_qual.push_str(¤t_qual2);
}
let count = seq_count.entry(seq_key.clone()).or_insert(0);
*count += 1;
if !seq_error_rate.contains_key(&seq_key) {
let mut qual_vec :Vec<f32> = Vec::new();
for qual_char in combined_qual.chars() {
let qual_int = qual_char as u8 as f32 - 33.0;
let p :f32 = TEN.powf((-1.0 * qual_int)/TEN);
qual_vec.push(p);
}
seq_error_rate.insert(seq_key.clone(), qual_vec);
}
else {
let qual_vec = seq_error_rate.entry(seq_key.clone()).or_insert(Vec::new());
let these_errors = combined_qual.chars().map(|qual_char|{
let qual_int = qual_char as u8 as f32 - 33.0;
let p :f32 = TEN.powf((-1.0 * qual_int)/TEN);
return p;
}).collect();
let new_qual = combine_error_vectors(&qual_vec,&these_errors);
*qual_vec = new_qual;
}
}
let max_qual_char = 'I';
let min_qual_char = '!';
let max_qual = max_qual_char as u8;
let min_qual = min_qual_char as u8;
let mut seq_counter=0;
for (seq,combined_qual) in seq_error_rate {
seq_counter += 1;
let mut qual_cigar = String::new();
for p in combined_qual {
let mut qual_recalc :f32 = -TEN * (p).log(TEN)+33.0;
if qual_recalc.is_infinite() || qual_recalc > max_qual as f32 {
qual_recalc = max_qual as f32;
}
let mut qual_recalc_char = qual_recalc.round() as u8 as char;
if (qual_recalc_char as u8) > max_qual {
qual_recalc_char = max_qual_char;
}
if (qual_recalc_char as u8) < min_qual {
qual_recalc_char = min_qual_char;
}
qual_cigar.push(qual_recalc_char);
}
if paired_end {
let separator_pos = seq.find(READ_SEPARATOR).expect("ERROR finding read separator");
let r1_seq = seq[0..separator_pos].to_string();
let r2_seq = seq[separator_pos+1..].to_string();
let r1_qual= qual_cigar[0..separator_pos].to_string();
let r2_qual= qual_cigar[separator_pos+1..].to_string();
println!("@{}/1\n{}\n+\n{}",seq_counter,r1_seq,r1_qual);
println!("@{}/2\n{}\n+\n{}",seq_counter,r2_seq,r2_qual);
} else {
println!("@{}\n{}\n+\n{}",seq_counter,seq,qual_cigar);
}
}
}
fn combine_error_vectors(errors1 :&Vec<f32>, errors2: &Vec<f32>) -> Vec<f32> {
if errors1.len() != errors2.len() {
panic!("Lengths of error vectors do not match: {} and {}", errors1.len(), errors2.len());
}
let mut errors_iter2=errors2.iter();
let mut new_errors :Vec<f32> = Vec::new(); for p1 in errors1 {
let p2 = errors_iter2.next().expect("ERROR: could not get the error probability from the second read");
new_errors.push(p1 * p2);
}
return new_errors;
}
#[allow(dead_code)]
fn recalculate_qual(qual_str: &str, count: u32) -> String {
let mut qual_out = String::new();
let max_qual = 'I' as u8;
let min_qual = '!' as u8;
let qual = qual_str.to_string();
for qual_char in qual.chars() {
let qual_int = qual_char as u8 as f32 - 33.0;
let p :f32 = TEN.powf((-1.0 * qual_int)/TEN);
let p_recalc :f32 = p.powi(count as i32);
let mut qual_recalc :f32 = -TEN * (p_recalc).log(TEN)+33.0;
if qual_recalc.is_infinite() || qual_recalc > 255.0 {
qual_recalc = 255.0;
}
let mut qual_recalc_char = qual_recalc.round() as u8 as char;
if (qual_recalc_char as u8) > max_qual {
qual_recalc_char = 'I';
}
if (qual_recalc_char as u8) < min_qual {
qual_recalc_char = '!';
}
qual_out.push(qual_recalc_char);
}
return qual_out;
}