use std::error::Error;
use std::io;
use csv;
use itertools::Itertools;
use rust_htslib::bcf;
use bio::stats::{LogProb, PHREDProb, Prob};
use ordered_float::NotNaN;
use utils;
use Event;
use model;
use estimation::fdr::{Record, ALPHAS};
fn pval(x: NotNaN<f64>, dist: &[NotNaN<f64>]) -> LogProb {
let i = dist.binary_search(&x).unwrap_or_else(|i| i);
let f0 = LogProb::from(Prob(i as f64 / dist.len() as f64));
f0.ln_one_minus_exp()
}
pub fn control_fdr<E: Event, W: io::Write>(
calls: &mut bcf::Reader,
null_calls: &mut bcf::Reader,
writer: &mut W,
event: &E,
vartype: &model::VariantType) -> Result<(), Box<Error>> {
let mut writer = csv::Writer::from_writer(writer).delimiter(b'\t');
try!(writer.write(["FDR", "max-prob"].into_iter()));
let null_dist = utils::collect_prob_dist(null_calls, event, vartype)?;
if null_dist.is_empty() {
for &alpha in &ALPHAS {
writer.write([&format!("{}", alpha), ""].iter())?;
}
return Ok(());
}
let prob_dist = utils::collect_prob_dist(calls, event, vartype)?;
debug!("{} observations in null distribution.", null_dist.len());
debug!("{} observations in call distribution.", prob_dist.len());
let pvals = prob_dist.iter().map(|&p| pval(p, &null_dist)).collect_vec();
let m = pvals.len() as f64;
let mk_pvals = pvals.iter().enumerate().map(|(k, &p)| (*p) + m.ln() - (m - k as f64 + 1.0).ln()).collect_vec();
for &alpha in &ALPHAS {
let mut record = Record { alpha: alpha, gamma: PHREDProb::from(Prob(1.0)) };
let alpha = alpha.ln();
for (&mkp, &event_prob) in mk_pvals.iter().zip(prob_dist.iter()) {
if mkp <= alpha {
record.gamma = PHREDProb::from(LogProb(*event_prob));
break;
}
}
writer.encode(&record)?;
}
writer.flush()?;
Ok(())
}