use std::ops::Range;
use std::error::Error;
use std::fs;
use std::str;
use itertools::Itertools;
use rust_htslib::bcf;
use bio::io::fasta;
use bio::stats::{LogProb, PHREDProb};
use ordered_float::NotNaN;
use model;
use BCFError;
use utils;
use Event;
pub fn collect_variants(
record: &mut bcf::Record,
omit_snvs: bool,
omit_indels: bool,
indel_len_range: Option<Range<u32>>,
exclusive_end: bool
) -> Result<Vec<Option<model::Variant>>, Box<Error>> {
let pos = record.pos();
let svlen = match record.info(b"SVLEN").integer() {
Ok(Some(svlen)) => Some(svlen[0].abs() as u32),
_ => None
};
let end = match record.info(b"END").integer() {
Ok(Some(end)) => {
let mut end = end[0] as u32 - 1;
if exclusive_end {
debug!("fixing END tag");
end -= 1;
}
Some(end)
},
_ => None
};
let svtype = match record.info(b"SVTYPE").string() {
Ok(Some(svtype)) => Some(svtype[0].to_owned()),
_ => None
};
let is_valid_len = |svlen| {
if let Some(ref len_range) = indel_len_range {
if svlen < len_range.start || svlen >= len_range.end {
return false;
}
}
true
};
let variants = if let Some(svtype) = svtype {
vec![
if omit_indels {
None
} else if svtype == b"INS" {
let alleles = record.alleles();
if alleles.len() > 2 {
return Err(Box::new(
BCFError::InvalidRecord("SVTYPE=INS but more than one ALT allele".to_owned())
))
}
let ref_allele = alleles[0];
let alt_allele = alleles[1];
if alt_allele == b"<INS>" {
None
} else {
let len = alt_allele.len() - ref_allele.len();
if is_valid_len(len as u32) {
Some(model::Variant::Insertion(alt_allele[ref_allele.len()..].to_owned()))
} else {
None
}
}
} else if svtype == b"DEL" {
let svlen = match(svlen, end) {
(Some(svlen), _) => svlen,
(None, Some(end)) => end - pos,
_ => {
return Err(Box::new(BCFError::MissingTag("SVLEN or END".to_owned())));
}
};
if is_valid_len(svlen) {
Some(model::Variant::Deletion(svlen))
} else {
None
}
} else {
None
}
]
} else {
let alleles = record.alleles();
let ref_allele = alleles[0];
alleles.iter().skip(1).map(|alt_allele| {
if alt_allele[0] == b'<' {
None
} else if alt_allele.len() == 1 && ref_allele.len() == 1 {
if omit_snvs {
None
} else {
Some(model::Variant::SNV(alt_allele[0]))
}
} else if alt_allele.len() == ref_allele.len() {
None
} else {
let indel_len = (alt_allele.len() as i32 - ref_allele.len() as i32).abs() as u32;
if omit_indels {
None
} else if !is_valid_len(indel_len) {
None
} else if alt_allele.len() < ref_allele.len() {
Some(model::Variant::Deletion((ref_allele.len() - alt_allele.len()) as u32))
} else {
Some(model::Variant::Insertion(alt_allele[ref_allele.len()..].to_owned()))
}
}
}).collect_vec()
};
Ok(variants)
}
pub struct ReferenceBuffer {
reader: fasta::IndexedReader<fs::File>,
chrom: Option<Vec<u8>>,
sequence: Vec<u8>
}
impl ReferenceBuffer {
pub fn new(fasta: fasta::IndexedReader<fs::File>) -> Self {
ReferenceBuffer {
reader: fasta,
chrom: None,
sequence: Vec::new()
}
}
pub fn seq(&mut self, chrom: &[u8]) -> Result<&[u8], Box<Error>> {
if let Some(ref last_chrom) = self.chrom {
if last_chrom == &chrom {
return Ok(&self.sequence);
}
}
try!(self.reader.read_all(try!(str::from_utf8(chrom)), &mut self.sequence));
self.chrom = Some(chrom.to_owned());
Ok(&self.sequence)
}
}
pub fn collect_prob_dist<E: Event>(
calls: &bcf::Reader,
event: &E,
vartype: &model::VariantType) -> Result<Vec<NotNaN<f64>>, Box<Error>> {
let mut record = bcf::Record::new();
let mut prob_dist = Vec::new();
loop {
if let Err(e) = calls.read(&mut record) {
if e.is_eof() {
break;
} else {
return Err(Box::new(e));
}
}
let variants = try!(utils::collect_variants(&mut record, false, false, None, false));
let tag = event.tag_name("PROB");
let event_probs = try!(record.info(tag.as_bytes()).float());
if let Some(event_probs) = event_probs {
for (variant, event_prob) in variants.into_iter().zip(event_probs.into_iter()) {
if let Some(variant) = variant {
if !variant.is_type(vartype) || event_prob.is_nan() {
continue;
}
let event_prob = LogProb::from(PHREDProb(*event_prob as f64));
prob_dist.push(try!(NotNaN::new(*event_prob)));
}
}
}
}
prob_dist.sort();
Ok(prob_dist)
}