extern crate fasten;
extern crate statistical;
extern crate getopts;
extern crate threadpool;
use std::fs::File;
use std::io::BufReader;
use std::cmp::{min,max};
use std::process::exit;
use std::collections::HashMap;
use std::io::BufRead;
use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::logmsg;
use fasten::reverse_complement;
use fasten::io::fastq;
use fasten::io::seq::Seq;
fn main(){
let mut opts = fasten_base_options();
opts.optopt("f","first-base","The first base to keep (default: 0)","INT");
opts.optopt("l","last-base","The last base to keep (default: 0)","INT");
opts.optopt("a","adapterseqs","fasta file of adapters","path/to/file.fa");
let matches = fasten_base_options_matches("Blunt-end trims using 0-based coordinates", opts);
let adapterseqs:String = {
if matches.opt_present("adapterseqs") {
matches.opt_str("adapterseqs")
.expect("ERROR: could not understand parameter --adapterseqs")
} else {
"".to_string()
}
};
let mut adapters:Vec<String> = Vec::new();
if matches.opt_present("adapterseqs") && adapterseqs.len() > 0 {
if !std::path::Path::new(&adapterseqs).exists() {
logmsg(format!("ERROR: adapter file {} does not exist", &adapterseqs));
exit(1);
}
adapters = read_fasta(&adapterseqs)
.values()
.map(|x| x.to_string())
.collect();
}
let first_base:usize ={
if matches.opt_present("first-base") {
matches.opt_str("first-base")
.expect("ERROR: could not understand parameter --first-base")
.parse()
.expect("ERROR: --first-base is not an INT")
} else {
0
}
};
let last_base:usize ={
if matches.opt_present("last-base") {
matches.opt_str("last-base")
.expect("ERROR: could not understand parameter --last-base")
.parse()
.expect("ERROR: --last-base is not an INT")
} else {
0
}
};
let _num_cpus:usize = {
if matches.opt_present("numcpus") {
logmsg("Warning: multithreading this script currently slows it down. Resetting to 1 cpu. Avoid this warning by not using --numcpus");
1 as usize
} else {
1 as usize
}
};
let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
let fastq_reader = fastq::FastqReader::new(my_buffer);
let fastq_iter = fastq_reader.into_iter();
for seq in fastq_iter {
let trimmed:String = trim_worker(seq, first_base, last_base, &adapters);
println!("{}", trimmed);
}
}
fn trim_worker(seq:Seq, suggested_first_base:usize, suggested_last_base:usize, adapters:&Vec<String> ) -> String {
let mut first_base = 0;
let mut last_base = seq.seq.len()-1;
let mut description = String::new();
for adapter in adapters {
let adapter_length = adapter.len();
if adapter_length >= seq.seq.len() {
continue;
}
for i in 0..seq.seq.len()-adapter_length {
if &seq.seq[i..adapter_length+i] == adapter {
first_base = adapter_length + i;
description.push_str(&format!(" trimmed_adapter_fwd={}", &adapter));
}
}
let revcom = reverse_complement(&adapter);
for i in 0..seq.seq.len()-adapter_length {
let end_slice: &str = &seq.seq[&seq.seq.len()-1 - adapter_length - i..].trim();
if end_slice == revcom {
last_base = seq.seq.len() - adapter_length - i;
description.push_str(&format!(" trimmed_adapter_rev={}", &revcom));
}
}
}
first_base = max(first_base, suggested_first_base);
if first_base >= seq.seq.len() {
logmsg("Warning: the left trim is longer than the sequence length. Skipping.");
first_base = 0;
}
last_base = {
if suggested_last_base == 0 {
last_base
} else {
min(last_base, suggested_last_base)
}
};
if last_base < 1 {
logmsg("Warning: the right trim is longer than the sequence length. Skipping.");
last_base = seq.seq.len()-1;
}
description.push_str(&format!(" trimmed_left={} trimmed_right={}", first_base, last_base-1));
let sequence = &seq.seq[first_base..last_base];
let quality = &seq.qual[first_base..last_base];
let trimmed = format!("{}{}\n{}\n+\n{}", seq.id, description, sequence, quality);
return trimmed;
}
fn read_fasta(file_path: &str) -> HashMap<String, String> {
let mut data = HashMap::new();
let file = File::open(file_path).expect("Invalid filepath");
let reader = BufReader::new(file);
let mut seq_id = String::new();
for line in reader.lines() {
let line = line.unwrap();
if line.starts_with('>') {
seq_id = line.trim_start_matches('>').to_string();
} else {
data.entry(seq_id.clone()).or_insert_with(String::new).push_str(&line);
}
}
data
}