use std::collections::HashSet;
use std::fs;
use std::io;
use std::path::PathBuf;
use fst;
use regex;
use rayon::iter::{ParallelBridge, ParallelIterator};
use crate::errors;
use crate::io::fasta;
#[derive(Debug, StructOpt)]
#[structopt(verbatim_doc_comment)]
pub struct ProtToTrypToLca {
#[structopt(short = "o", long = "one-on-one")]
pub one_on_one: bool,
#[structopt(parse(from_os_str))]
pub fst_file: PathBuf,
#[structopt(short = "m", long = "in-memory")]
pub fst_in_memory: bool,
#[structopt(short = "c", long = "chunksize", default_value = "240")]
pub chunk_size: usize,
#[structopt(short = "p", long = "pattern", default_value = "([KR])([^P])")]
pub pattern: String,
#[structopt(short = "l", long = "minlen", default_value = "5")]
pub min_length: usize,
#[structopt(short = "L", long = "maxlen", default_value = "50")]
pub max_length: usize,
#[structopt(short = "k", long = "keep", default_value = "")]
pub contains: String,
#[structopt(short = "d", long = "drop", default_value = "")]
pub lacks: String,
}
pub fn prot2tryp2lca(args: ProtToTrypToLca) -> errors::Result<()> {
let fst = if args.fst_in_memory {
let bytes = fs::read(&args.fst_file)?;
fst::Map::from_bytes(bytes)?
} else {
unsafe { fst::Map::from_path(&args.fst_file) }?
};
let default = if args.one_on_one { Some(0) } else { None };
let pattern = regex::Regex::new(&args.pattern)?;
let contains = args.contains.chars().collect::<HashSet<char>>();
let lacks = args.lacks.chars().collect::<HashSet<char>>();
fasta::Reader::new(io::stdin(), false)
.records()
.chunked(args.chunk_size)
.par_bridge()
.map(|chunk| {
let chunk = chunk?;
let mut chunk_output = String::new();
for read in chunk {
chunk_output.push_str(&format!(">{}\n", read.header));
for seq in read.sequence {
let first_run = pattern.replace_all(&seq, "$1\n$2");
for peptide in pattern
.replace_all(&first_run, "$1\n$2")
.replace("*", "\n")
.lines()
.filter(|x| !x.is_empty())
.filter(|seq| {
let length = seq.len();
length >= args.min_length && length <= args.max_length
})
.filter(|seq| {
(contains.is_empty() && lacks.is_empty()) || {
let set = seq.chars().collect::<HashSet<char>>();
contains.intersection(&set).count() == contains.len()
&& lacks.intersection(&set).count() == 0
}
})
{
if let Some(lca) = fst.get(&peptide).map(Some).unwrap_or(default) {
chunk_output.push_str(&format!("{}\n", lca));
}
}
}
}
print!("{}", chunk_output);
Ok(())
})
.collect()
}