use crate::common::CalcConsensus;
use bio::io::fastq;
use bio::stats::probs::LogProb;
use derive_new::new;
use itertools::Itertools;
const ALLELES: &[u8] = b"ACGT";
#[derive(new)]
pub struct CalcNonOverlappingConsensus<'a> {
recs: &'a [fastq::Record],
seqids: &'a [usize],
uuid: &'a str,
verbose_read_names: bool,
}
impl<'a> CalcNonOverlappingConsensus<'a> {
pub fn calc_consensus(&self) -> (fastq::Record, LogProb) {
let seq_len = self.recs()[0].seq().len();
let mut consensus_seq: Vec<u8> = Vec::with_capacity(seq_len);
let mut consensus_qual: Vec<u8> = Vec::with_capacity(seq_len);
assert!(
Self::validate_read_lengths(self.recs()),
"Read length of FASTQ records {:?} differ. Cannot compute consensus sequence.",
self.seqids()
);
let mut consensus_lh = LogProb::ln_one();
for i in 0..seq_len {
let likelihoods = ALLELES
.iter()
.map(|a| Self::overall_allele_likelihood(self, a, i))
.collect_vec(); Self::build_consensus_sequence(
likelihoods,
&mut consensus_lh,
&mut consensus_seq,
&mut consensus_qual,
33.0,
);
}
let name = match self.verbose_read_names {
true => format!(
"{}_consensus-read-from:{}",
self.uuid(),
self.seqids().iter().map(|i| format!("{}", i)).join(",")
),
false => format!(
"{}_consensus-read-from:{}_reads",
self.uuid(),
self.seqids().len(),
),
};
(
fastq::Record::with_attrs(&name, None, &consensus_seq, &consensus_qual),
consensus_lh,
)
}
pub fn recs(&self) -> &[fastq::Record] {
self.recs
}
}
impl<'a> CalcConsensus<'a, fastq::Record> for CalcNonOverlappingConsensus<'a> {
fn overall_allele_likelihood(&self, allele: &u8, i: usize) -> LogProb {
let mut lh = LogProb::ln_one(); for rec in self.recs() {
lh += Self::allele_likelihood_in_rec(allele, rec.seq(), rec.qual(), i, 33);
}
lh
}
fn seqids(&self) -> &'a [usize] {
self.seqids
}
fn uuid(&self) -> &'a str {
self.uuid
}
}
#[derive(new)]
pub struct CalcOverlappingConsensus<'a> {
recs1: &'a [fastq::Record],
recs2: &'a [fastq::Record],
overlap: usize,
seqids: &'a [usize],
uuid: &'a str,
verbose_read_names: bool,
}
impl<'a> CalcOverlappingConsensus<'a> {
pub fn calc_consensus(&self) -> (fastq::Record, LogProb) {
let seq_len = self.recs1()[0].seq().len() + self.recs2()[0].seq().len() - self.overlap();
let mut consensus_seq: Vec<u8> = Vec::with_capacity(seq_len);
let mut consensus_qual: Vec<u8> = Vec::with_capacity(seq_len);
assert!(
Self::validate_read_lengths(self.recs1()),
"Read length of FASTQ forward records {:?} differ. Cannot compute consensus sequence.",
self.seqids()
);
assert!(
Self::validate_read_lengths(self.recs2()),
"Read length of FASTQ reverse records {:?} differ. Cannot compute consensus sequence.",
self.seqids()
);
let mut consensus_lh = LogProb::ln_one();
for i in 0..seq_len {
let likelihoods = ALLELES
.iter()
.map(|a| Self::overall_allele_likelihood(self, a, i))
.collect_vec(); Self::build_consensus_sequence(
likelihoods,
&mut consensus_lh,
&mut consensus_seq,
&mut consensus_qual,
33.0,
);
}
let name = match self.verbose_read_names {
true => format!(
"{}_consensus-read-from:{}",
self.uuid(),
self.seqids().iter().map(|i| format!("{}", i)).join(",")
),
false => format!(
"{}_consensus-read-from:{}_reads",
self.uuid(),
self.seqids().len(),
),
};
(
fastq::Record::with_attrs(&name, None, &consensus_seq, &consensus_qual),
consensus_lh,
)
}
fn recs1(&self) -> &[fastq::Record] {
self.recs1
}
fn recs2(&self) -> &[fastq::Record] {
self.recs2
}
fn overlap(&self) -> usize {
self.overlap
}
}
impl<'a> CalcConsensus<'a, fastq::Record> for CalcOverlappingConsensus<'a> {
fn overall_allele_likelihood(&self, allele: &u8, i: usize) -> LogProb {
let mut lh = LogProb::ln_one();
for (rec1, rec2) in self.recs1().iter().zip(self.recs2()) {
if i < rec1.seq().len() {
lh += Self::allele_likelihood_in_rec(allele, rec1.seq(), rec1.qual(), i, 33);
};
if i >= rec1.seq().len() - self.overlap() {
let rec2_i = i - (rec1.seq().len() - self.overlap());
let rec2_seq = bio::alphabets::dna::revcomp(rec2.seq());
let rec2_qual: Vec<u8> = rec2.qual().iter().rev().cloned().collect();
lh += Self::allele_likelihood_in_rec(allele, &rec2_seq, &rec2_qual, rec2_i, 33);
};
}
lh
}
fn seqids(&self) -> &'a [usize] {
self.seqids
}
fn uuid(&self) -> &'a str {
self.uuid
}
}