use super::{Library, Permuter};
use crate::Offset;
use fxread::{FastxRead, Record};
use hashbrown::HashMap;
#[derive(Debug, PartialEq)]
enum Position {
Plus,
Minus,
Centered,
Null,
}
pub struct Counter {
results: HashMap<Vec<u8>, usize>,
total_reads: usize,
matched_reads: usize,
}
impl Counter {
pub fn from_hashmap(map: HashMap<Vec<u8>, usize>) -> Self {
Self {
results: map,
total_reads: 0,
matched_reads: 0,
}
}
#[must_use]
pub fn new(
reader: Box<dyn FastxRead<Item = Record>>,
library: &Library,
permuter: &Option<Permuter>,
offset: Offset,
size: usize,
position_recursion: bool,
) -> Self {
let position = if position_recursion {
Position::Centered
} else {
Position::Null
};
let mut total_reads = 0;
let mut matched_reads = 0;
let results = Self::count(
reader,
library,
permuter,
offset,
size,
&position,
&mut total_reads,
&mut matched_reads,
);
Self {
results,
total_reads,
matched_reads,
}
}
#[must_use]
pub fn get_value(&self, token: &[u8]) -> &usize {
match self.results.get(token) {
Some(c) => c,
None => &0,
}
}
fn check_library<'a>(token: &[u8], library: &'a Library) -> Option<&'a Vec<u8>> {
library.contains(token)
}
fn check_permuter<'a>(token: &[u8], permuter: &'a Option<Permuter>) -> Option<&'a Vec<u8>> {
match permuter {
Some(p) => p.contains(token),
None => None,
}
}
fn assign<'a>(
record: &Record,
library: &'a Library,
permuter: &Option<Permuter>,
offset: Offset,
size: usize,
position: &Position,
) -> Option<&'a Vec<u8>> {
let token = match Self::apply_trim(record, offset, size, position) {
Some(t) => t,
None => return None,
};
let alias = match Self::check_library(&token, library) {
Some(s) => Some(s),
None => match Self::check_permuter(&token, permuter) {
Some(s) => library.alias(s),
None => None,
},
};
if alias.is_none() {
match position {
Position::Centered => {
Self::assign(record, library, permuter, offset, size, &Position::Plus)
}
Position::Plus => {
Self::assign(record, library, permuter, offset, size, &Position::Minus)
}
Position::Minus | Position::Null => alias,
}
}
else {
alias
}
}
#[inline]
fn apply_trim(
record: &Record,
offset: Offset,
size: usize,
position: &Position,
) -> Option<Vec<u8>> {
match offset {
Offset::Forward(index) => Self::trim_forward_sequence(record, index, size, position),
Offset::Reverse(index) => Self::trim_reverse_sequence(record, index, size, position),
}
}
#[inline]
fn bounds(
seq: &[u8],
offset: usize,
size: usize,
position: &Position,
) -> Option<(usize, usize)> {
let (min, max) = match position {
Position::Plus => (offset + 1, offset + 1 + size),
Position::Minus => {
let min = match offset.checked_sub(1) {
Some(m) => m,
None => return None,
};
(min, min + size)
}
_ => (offset, offset + size),
};
if max > seq.len() {
return None;
} else {
Some((min, max))
}
}
#[inline]
fn trim_forward_sequence(
record: &Record,
offset: usize,
size: usize,
position: &Position,
) -> Option<Vec<u8>> {
Self::bounds(record.seq(), offset, size, position)
.map(|(min, max)| record.seq()[min..max].to_vec())
}
#[inline]
fn trim_reverse_sequence(
record: &Record,
offset: usize,
size: usize,
position: &Position,
) -> Option<Vec<u8>> {
Self::bounds(record.seq(), offset, size, position)
.map(|(min, max)| record.seq_rev_comp()[min..max].to_vec())
}
fn count(
reader: Box<dyn FastxRead<Item = Record>>,
library: &Library,
permuter: &Option<Permuter>,
offset: Offset,
size: usize,
position: &Position,
total_reads: &mut usize,
matched_reads: &mut usize,
) -> HashMap<Vec<u8>, usize> {
reader
.into_iter()
.map(|x| {
*total_reads += 1;
x
})
.filter_map(|x| Self::assign(&x, library, permuter, offset, size, position))
.map(|x| {
*matched_reads += 1;
x
})
.fold(HashMap::new(), |mut accum, x| {
*accum.entry(x.clone()).or_insert(0) += 1;
accum
})
}
pub fn total_reads(&self) -> usize {
self.total_reads
}
pub fn matched_reads(&self) -> usize {
self.matched_reads
}
pub fn fraction_mapped(&self) -> f64 {
self.matched_reads as f64 / self.total_reads as f64
}
}
#[cfg(test)]
mod test {
use super::{Counter, Library, Permuter, Position};
use crate::Offset;
use fxread::{FastaReader, FastxRead, Record};
fn trim_reader(distance: bool) -> Box<dyn FastxRead<Item = Record>> {
let sequence: &'static [u8] = match distance {
false => b">seq.0\nACTG\n",
true => b">seq.0\nAGTG\n",
};
Box::new(FastaReader::new(sequence))
}
fn lib_reader() -> Box<dyn FastxRead<Item = Record>> {
let sequence: &'static [u8] = b">seq.0\nACTG\n";
Box::new(FastaReader::new(sequence))
}
fn library() -> Library {
Library::from_reader(lib_reader()).unwrap()
}
fn permuter() -> Permuter {
Permuter::new(library().keys())
}
#[test]
fn count_no_distance_no_permute() {
let trimmer = trim_reader(false);
let library = library();
let count = Counter::new(trimmer, &library, &None, Offset::Forward(0), 4, false);
assert_eq!(*count.get_value(b"seq.0"), 1);
}
#[test]
fn count_no_distance_with_permute() {
let trimmer = trim_reader(true);
let library = library();
let count = Counter::new(trimmer, &library, &None, Offset::Forward(0), 4, false);
assert_eq!(*count.get_value(b"seq.0"), 0);
}
#[test]
fn count_with_distance_no_permute() {
let trimmer = trim_reader(true);
let library = library();
let count = Counter::new(trimmer, &library, &None, Offset::Forward(0), 4, false);
assert_eq!(*count.get_value(b"seq.0"), 0);
}
#[test]
fn count_with_distance_with_permute() {
let trimmer = trim_reader(true);
let library = library();
let permuter = permuter();
let count = Counter::new(
trimmer,
&library,
&Some(permuter),
Offset::Forward(0),
4,
false,
);
assert_eq!(*count.get_value(b"seq.0"), 1);
}
#[test]
fn bounds_checking_standard() {
let seq = b"ACTGACTGACTG".as_slice();
let offset = 4;
let size = 4;
let position = Position::Null;
let (min, max) = Counter::bounds(seq, offset, size, &position).unwrap();
assert_eq!(min, 4);
assert_eq!(max, 8);
}
#[test]
fn bounds_checking_plus() {
let seq = b"ACTGACTGACTG".as_slice();
let offset = 4;
let size = 4;
let position = Position::Plus;
let (min, max) = Counter::bounds(seq, offset, size, &position).unwrap();
assert_eq!(min, 5);
assert_eq!(max, 9);
}
#[test]
fn bounds_checking_minus() {
let seq = b"ACTGACTGACTG".as_slice();
let offset = 4;
let size = 4;
let position = Position::Minus;
let (min, max) = Counter::bounds(seq, offset, size, &position).unwrap();
assert_eq!(min, 3);
assert_eq!(max, 7);
}
#[test]
fn bounds_checking_standard_clipped() {
let seq = b"ACTGACT".as_slice();
let offset = 4;
let size = 4;
let position = Position::Null;
let res = Counter::bounds(seq, offset, size, &position);
assert!(res.is_none());
}
#[test]
fn bounds_checking_plus_clipped() {
let seq = b"ACTGACT".as_slice();
let offset = 4;
let size = 4;
let position = Position::Plus;
let res = Counter::bounds(seq, offset, size, &position);
assert!(res.is_none());
}
#[test]
fn bounds_checking_minus_clipped() {
let seq = b"ACTGAC".as_slice();
let offset = 4;
let size = 4;
let position = Position::Minus;
let res = Counter::bounds(seq, offset, size, &position);
assert!(res.is_none());
}
}