use std::collections::HashMap;
use std::clone::Clone;
#[test]
fn test_new_seq() {
let id = "MY_ID".to_string();
let seq = "AATNGGCC".to_string();
let qual = "#ABCDE!!".to_string();
let cleanable = Seq::new(&id,&seq,&qual);
let formatted = format!("@{}\n{}\n+\n{}", &id, &seq, &qual);
assert_eq!(cleanable.to_string(), formatted);
}
#[test]
fn test_cleanable() {
let id = "MY_ID".to_string();
let seq = "AATNGGCC".to_string();
let qual = "#ABCDE!!".to_string();
let mut cleanable = Seq::new(&id,&seq,&qual);
cleanable.lower_ambiguity_q();
cleanable.trim();
assert_eq!(cleanable.to_string(), "@MY_ID\nATNGG\n+\nAB!DE".to_string());
}
#[derive(Debug)]
pub struct Seq {
pub id: String,
pub seq: String,
pub qual: String,
pub pairid: String,
pub thresholds: HashMap<String,f32>,
}
pub trait Cleanable{
fn new (id: &String, seq: &String, qual: &String) -> Seq;
fn blank () -> Seq;
fn is_blank (&self) -> bool;
fn from_string (seq_str: &String) -> Seq;
fn sanitize_id(id: &String) -> String;
fn lower_ambiguity_q(&mut self) -> ();
fn trim(&mut self) -> ();
fn is_high_quality(&mut self) -> bool;
fn to_string(&self) -> String;
fn print(&self) -> ();
}
impl Cleanable for Seq {
fn new (id: &String, seq: &String, qual: &String) -> Seq{
let id_copy = Self::sanitize_id(&id);
let mut thresholds = HashMap::new();
thresholds.insert("min_avg_qual".to_string(),20.0);
thresholds.insert("min_length".to_string(),100.0);
thresholds.insert("min_trim_qual".to_string(),20.0);
return Seq{
id: id_copy,
seq: seq.clone(),
qual: qual.clone(),
pairid: String::new(),
thresholds: thresholds,
};
}
fn blank () -> Seq{
return Seq::new(&String::new(),&String::new(),&String::new());
}
fn is_blank (&self) -> bool {
if self.seq.len() == 0 && self.qual.len() == 0 {
return true;
}
return false;
}
fn from_string (seq_str: &String) -> Seq {
let mut lines = seq_str.lines();
let id = lines.next().expect("Could not parse ID");
let seq = lines.next().expect("Could not parse sequence");
lines.next().expect("Could not parse +");
let qual_opt = lines.next();
if qual_opt == None {
return Seq::blank();
}
let qual = qual_opt.expect("Could not read the qual line");
return Seq{
id: id.to_string(),
seq: seq.to_string(),
qual: qual.to_string(),
pairid: String::new(),
thresholds: HashMap::new(),
}
}
fn sanitize_id(id: &String) -> String {
if id.len() == 0 {
return String::new();
}
let mut id_copy = id.clone();
if id_copy.chars().nth(0).expect("ID was empty") == '@' {
id_copy.pop();
}
return id_copy;
}
fn lower_ambiguity_q(&mut self){
let zero_score:char = 33 as char;
let low_score :char = (33 + 0) as u8 as char;
let mut low_qual_idx = vec![false; self.seq.len()];
for (i,nt) in self.seq.chars().enumerate(){
if nt == 'N' || nt == 'n' || self.qual.chars().nth(i).expect("Expected a char") < low_score {
low_qual_idx[i] = true;
}
}
let mut new_seq =String::new();
let mut new_qual=String::new();
for (i,nt) in self.seq.chars().enumerate(){
if low_qual_idx[i] {
new_seq.push('N');
new_qual.push(zero_score);
} else{
new_seq.push(nt);
new_qual.push_str(&self.qual[i..i+1]);
}
}
self.seq=new_seq;
self.qual=new_qual;
}
fn trim(&mut self) {
let min_qual = *self.thresholds.entry("min_trim_qual".to_string())
.or_insert(0.0) as u8;
let mut trim5=0;
let mut trim3=&self.qual.len()-0;
for qual in self.qual.chars(){
if qual as u8 - 33 < min_qual {
trim5+=1;
} else {
break;
}
}
for qual in self.qual.chars().rev() {
if qual as u8 - 33 < min_qual {
trim3-=1;
} else {
break;
}
}
if trim5 >= trim3 {
self.qual = String::new();
self.seq = String::new();
} else {
self.qual = self.qual[trim5..trim3].to_string();
self.seq = self.seq[trim5..trim3].to_string();
}
}
fn is_high_quality(&mut self) -> bool {
let min_length = self.thresholds.get(&"min_length".to_string()).expect("min_length does not look like a number");
let seq_len = self.seq.len() as f32;
if seq_len < *min_length {
return false;
}
let mut total_qual = 0;
for qual in self.qual.chars() {
total_qual += qual as u32;
}
let avg_qual = (total_qual as f32/seq_len) - 33.0;
let min_qual = self.thresholds.get(&"min_avg_qual".to_string()).expect("min_avg_qual does not look like a number");
if avg_qual < *min_qual {
return false;
}
return true;
}
fn to_string(&self) -> String {
let mut entry = String::new();
if self.id.len() > 0 && self.id.chars().nth(0).expect("Seq ID was not set") != '@' {
entry.push('@');
}
entry.push_str(self.id.trim());
entry.push_str("\n");
entry.push_str(self.seq.trim());
entry.push_str("\n+\n");
entry.push_str(&self.qual.trim());
return entry;
}
fn print(&self) -> () {
println!("{}",self.to_string());
}
}
impl Clone for Seq {
fn clone(&self) -> Seq {
return Seq{
id: self.id.clone(),
seq: self.seq.clone(),
qual: self.qual.clone(),
pairid: self.pairid.clone(),
thresholds: self.thresholds.clone(),
}
}
}