use std::error::Error;
use futures::executor::block_on_stream;
use crate::bam::{bam_import_genome, BamFile};
use crate::error::ArgumentError;
use crate::genome::Genome;
use crate::log;
use crate::coverage::{CoverageConfig, CoverageError, FraglenEstimate, OptionCoverage};
use crate::read_stream::ReadStream;
use crate::track_simple::SimpleTrack;
use crate::track_statistics::estimate_fragment_length;
pub fn estimate_fraglen(
config: &CoverageConfig,
filename: &str,
genome: &Genome,
) -> Result<FraglenEstimate, Box<dyn Error>> {
log!(config.logger, "Reading tags from `{}`", filename);
let bam_result = BamFile::open(filename, None);
let mut bam = match bam_result {
Ok(b) => b,
Err(err) => return Err(err),
};
let reads = Box::pin(bam.reader.read_simple_stream(false, false));
let reads = ReadStream::filter_single_end(reads, Some(&config.logger), true);
let reads =
ReadStream::filter_read_length(reads, Some(&config.logger), &config.filter_read_lengths);
let reads =
ReadStream::filter_duplicates(reads, Some(&config.logger), config.filter_duplicates);
let reads = ReadStream::filter_mapq(reads, Some(&config.logger), config.filter_mapq);
let mut err_opt = None;
let reads_iter = block_on_stream(reads).map_while(|item| match item {
Ok(read) => Some(read),
Err(err) => {
err_opt = Some(err);
None
}
});
log!(config.logger, "Estimating mean fragment length");
let r = match estimate_fragment_length(
reads_iter,
genome,
2000,
config.fraglen_bin_size,
config.fraglen_range,
) {
Ok((fraglen, x, y, _n)) => {
log!(config.logger, "Estimated mean fragment length: {}", fraglen);
Ok(FraglenEstimate {
fraglen: fraglen as usize,
x,
y,
})
}
Err(err) => Err(err),
};
if let Some(err) = err_opt {
return Err(Box::new(err));
}
r
}
pub fn bam_coverage(
filenames_treatment: &Vec<&str>,
filenames_control: &Vec<&str>,
fraglen_treatment: &Vec<Option<usize>>,
fraglen_control: &Vec<Option<usize>>,
options: Vec<OptionCoverage>,
) -> Result<(SimpleTrack, Vec<FraglenEstimate>, Vec<FraglenEstimate>), CoverageError> {
let mut config = CoverageConfig::default();
for option in options {
config.insert_option(option);
}
let mut fraglen_treatment = fraglen_treatment.clone();
let mut fraglen_control = fraglen_control.clone();
if fraglen_treatment.is_empty() {
fraglen_treatment = vec![None; filenames_treatment.len()];
}
if fraglen_control.is_empty() {
fraglen_control = vec![None; filenames_control.len()];
}
if fraglen_treatment.len() != filenames_treatment.len() {
let e = ArgumentError(
format!("Number of provided treatment fragment lengths `{}` does not match number of treatment files `{}`",
fraglen_treatment.len(), filenames_treatment.len())
);
return Err(CoverageError::new_empty(Box::new(e)));
}
if fraglen_control.len() != filenames_control.len() {
let e = ArgumentError(
format!("Number of provided control fragment lengths `{}` does not match number of control files `{}`",
fraglen_control.len(), filenames_control.len())
);
return Err(CoverageError::new_empty(Box::new(e)));
}
let mut genome: Genome = Genome::default();
for filename in filenames_treatment.iter().chain(filenames_control.iter()) {
let g = bam_import_genome(filename).map_err(|e| CoverageError::new_empty(e))?;
if genome.len() == 0 {
genome = g;
} else if genome != g {
let e =
ArgumentError("Treatment and control tracks have different genomes".to_string());
return Err(CoverageError::new_empty(Box::new(e)));
}
}
let mut treatment_fraglen_estimates =
vec![FraglenEstimate::default(); filenames_treatment.len()];
let mut control_fraglen_estimates = vec![FraglenEstimate::default(); filenames_control.len()];
if !config.estimate_fraglen {
for (i, fraglen) in fraglen_treatment.iter().enumerate() {
if let Some(k) = *fraglen {
treatment_fraglen_estimates[i].fraglen = k;
} else {
treatment_fraglen_estimates[i].fraglen = 0;
}
}
for (i, fraglen) in fraglen_control.iter().enumerate() {
if let Some(k) = *fraglen {
control_fraglen_estimates[i].fraglen = k;
} else {
control_fraglen_estimates[i].fraglen = 0;
}
}
}
else {
for (i, filename) in filenames_treatment.iter().enumerate() {
match fraglen_treatment[i] {
Some(k) => treatment_fraglen_estimates[i].fraglen = k,
None => {
let estimate = estimate_fraglen(&config, filename, &genome)
.map_err(|e| CoverageError::new_empty(e))?;
treatment_fraglen_estimates[i] = estimate;
}
}
}
for (i, filename) in filenames_control.iter().enumerate() {
match fraglen_control[i] {
Some(k) => control_fraglen_estimates[i].fraglen = k,
None => {
let estimate = estimate_fraglen(&config, filename, &genome)
.map_err(|e| CoverageError::new_empty(e))?;
control_fraglen_estimates[i] = estimate;
}
}
}
}
let fraglen_treatment_arg: Vec<usize> = treatment_fraglen_estimates
.iter()
.map(|x| x.fraglen as usize)
.collect();
let fraglen_control_arg: Vec<usize> = control_fraglen_estimates
.iter()
.map(|x| x.fraglen as usize)
.collect();
let track = SimpleTrack::coverage_from_bam(
config,
filenames_treatment,
filenames_control,
&fraglen_treatment_arg,
&fraglen_control_arg,
genome,
)
.map_err(|e| {
CoverageError::new(
e,
treatment_fraglen_estimates.clone(),
control_fraglen_estimates.clone(),
)
})?;
Ok((
track,
treatment_fraglen_estimates,
control_fraglen_estimates,
))
}