extern crate docopt;
use docopt::Docopt;
use std::fs::File;
use std::io::{BufRead, Read, Write, BufReader, stderr};
static USAGE: &'static str = "
Usage: dotplot [options] <file1> <file2>
Options:
-w=K, --window window size [default: 10]
-t=K, --threshold similarity threshold [default: 7]
-p treat input as plaintext, not fasta
-h --help show help
";
fn dot_plot(seq1: &[u8], seq2: &[u8], window_size: u32, threshold: u32) {
let mut err = stderr();
if window_size < 1 {
let _ = writeln!(err, "Invalid window size: {}", window_size);
return;
}
if threshold > window_size {
let _ = writeln!(err, "Invalid threshold: {}", threshold);
return;
}
for (i, s1) in seq1.windows(window_size as usize).enumerate() {
for (j, s2) in seq2.windows(window_size as usize).enumerate() {
let mut similarity = 0;
for (a, b) in s1.iter().zip(s2.iter()) {
if a == b {
similarity += 1;
}
}
if similarity >= threshold {
println!("{} {}", i, j);
}
}
}
}
fn read_fasta(file_path: &str) -> Vec<u8> {
let mut buf = String::new();
let file = BufReader::new(File::open(file_path).expect("Error reading fasta file"));
for res in file.lines() {
let r = res.unwrap();
let line = &r;
if !line.starts_with(">") {
buf.push_str(line.trim());
}
}
return buf.into_bytes();
}
fn main() {
let doc = match Docopt::new(USAGE) {
Ok(result) => result,
Err(reason) => panic!(reason.to_string()),
};
let args = match doc.parse() {
Ok(result) => result,
Err(_) => {
println!("{}", USAGE);
return;
}
};
let window_size = args.get_str("-w").parse().expect("Error parsing -w");
let threshold = args.get_str("-t").parse().expect("Error parsing -t");
let fasta = !args.get_bool("-p");
let help = args.get_bool("-h");
if help {
println!("{}", USAGE);
return;
}
let file1 = args.get_str("<file1>");
let file2 = args.get_str("<file2>");
let mut seq1: Vec<u8> = Vec::new();
let mut seq2: Vec<u8> = Vec::new();
if !fasta {
let _ = File::open(file1)
.expect("Error reading file 1")
.read_to_end(&mut seq1);
let _ = File::open(file2)
.expect("Error reading file 2")
.read_to_end(&mut seq2);
} else {
seq1 = read_fasta(file1);
seq2 = read_fasta(file2);
}
dot_plot(&seq1, &seq2, window_size, threshold);
}