use std::io::{self, Read, Write};
use std::ops::{Range, RangeBounds};
use byteorder::WriteBytesExt;
use super::{resize, write_iterator};
fn nt_to_raw(nt: u8) -> Result<u8, String> {
match nt {
b'=' => Ok(0),
b'A' | b'a' => Ok(1),
b'C' | b'c' => Ok(2),
b'M' | b'm' => Ok(3),
b'G' | b'g' => Ok(4),
b'R' | b'r' => Ok(5),
b'S' | b's' => Ok(6),
b'V' | b'v' => Ok(7),
b'T' | b't' => Ok(8),
b'W' | b'w' => Ok(9),
b'Y' | b'y' => Ok(10),
b'H' | b'h' => Ok(11),
b'K' | b'k' => Ok(12),
b'D' | b'd' => Ok(13),
b'B' | b'b' => Ok(14),
b'N' | b'n' => Ok(15),
_ => Err(format!("Nucleotide not expected: {}", nt as char)),
}
}
#[derive(Clone)]
pub struct Sequence {
raw: Vec<u8>,
len: usize,
}
impl Sequence {
pub fn new() -> Self {
Sequence {
raw: Vec::new(),
len: 0,
}
}
pub fn from_text<I: IntoIterator<Item = u8>>(sequence: I) -> Result<Self, String> {
let mut seq = Sequence::new();
seq.extend_from_text(sequence)?;
Ok(seq)
}
pub fn clear(&mut self) {
self.raw.clear();
self.len = 0;
}
pub fn shrink_to_fit(&mut self) {
self.raw.shrink_to_fit();
}
pub fn push(&mut self, nt: u8) -> Result<(), String> {
if self.len % 2 == 0 {
self.raw.push(nt_to_raw(nt)? << 4);
} else {
self.raw[self.len / 2] |= nt_to_raw(nt)?;
}
self.len += 1;
Ok(())
}
pub fn extend_from_text<I: IntoIterator<Item = u8>>(&mut self, nucleotides: I)
-> Result<(), String> {
for nt in nucleotides.into_iter() {
self.push(nt)?;
};
Ok(())
}
pub fn fill_from<R: Read>(&mut self, stream: &mut R, new_len: usize)
-> io::Result<()> {
let short_len = (new_len + 1) / 2;
resize(&mut self.raw, short_len);
stream.read_exact(&mut self.raw)?;
self.len = new_len;
Ok(())
}
pub fn raw(&self) -> &[u8] {
&self.raw
}
pub fn len(&self) -> usize {
self.len
}
pub fn available(&self) -> bool {
self.len > 0
}
pub fn to_vec(&self) -> Vec<u8> {
(0..self.len).map(|i| self.at(i)).collect()
}
pub fn to_vec_acgtn_only(&self) -> Vec<u8> {
(0..self.len).map(|i| self.at_acgtn_only(i)).collect()
}
pub fn at(&self, index: usize) -> u8 {
assert!(index < self.len, "Index out of range ({} >= {})", index, self.len);
let nt = if index % 2 == 0 {
self.raw[index / 2] >> 4
} else {
self.raw[index / 2] & 0x0f
};
b"=ACMGRSVTWYHKDBN"[nt as usize]
}
pub fn at_acgtn_only(&self, index: usize) -> u8 {
assert!(index < self.len, "Index out of range ({} >= {})", index, self.len);
let nt = if index % 2 == 0 {
self.raw[index / 2] >> 4
} else {
self.raw[index / 2] & 0x0f
};
b"NACNGNNNTNNNNNNN"[nt as usize]
}
pub fn compl_at(&self, index: usize) -> u8 {
assert!(index < self.len, "Index out of range ({} >= {})", index, self.len);
let nt = if index % 2 == 0 {
self.raw[index / 2] >> 4
} else {
self.raw[index / 2] & 0x0f
};
b"=TGKCYSBAWRDMHVN"[nt as usize]
}
pub fn compl_at_acgtn_only(&self, index: usize) -> u8 {
assert!(index < self.len, "Index out of range ({} >= {})", index, self.len);
let nt = if index % 2 == 0 {
self.raw[index / 2] >> 4
} else {
self.raw[index / 2] & 0x0f
};
b"NTGNCNNNANNNNNNN"[nt as usize]
}
pub fn subseq<'a, R: RangeBounds<usize>>(&'a self, range: R) -> SubseqIter<'a> {
use std::ops::Bound::*;
let start = match range.start_bound() {
Included(&n) => n,
Excluded(&n) => n + 1,
Unbounded => 0,
};
let end = match range.end_bound() {
Included(&n) => n + 1,
Excluded(&n) => n,
Unbounded => self.len,
};
assert!(start <= end);
assert!(end <= self.len);
SubseqIter {
parent: self,
indices: start..end,
}
}
pub fn subseq_acgtn_only<R: RangeBounds<usize>>(&self, range: R) -> SubseqIterAcgtn {
use std::ops::Bound::*;
let start = match range.start_bound() {
Included(&n) => n,
Excluded(&n) => n + 1,
Unbounded => 0,
};
let end = match range.end_bound() {
Included(&n) => n + 1,
Excluded(&n) => n,
Unbounded => self.len,
};
assert!(start <= end);
assert!(end <= self.len);
SubseqIterAcgtn {
parent: self,
indices: start..end,
}
}
pub fn rev_compl<R: RangeBounds<usize>>(&self, range: R) -> RevComplIter {
use std::ops::Bound::*;
let start = match range.start_bound() {
Included(&n) => n,
Excluded(&n) => n + 1,
Unbounded => 0,
};
let end = match range.end_bound() {
Included(&n) => n + 1,
Excluded(&n) => n,
Unbounded => self.len,
};
assert!(start <= end);
assert!(end <= self.len);
RevComplIter {
parent: self,
indices: (start..end).rev(),
}
}
pub fn rev_compl_acgtn_only<R: RangeBounds<usize>>(&self, range: R) -> RevComplIterAcgtn {
use std::ops::Bound::*;
let start = match range.start_bound() {
Included(&n) => n,
Excluded(&n) => n + 1,
Unbounded => 0,
};
let end = match range.end_bound() {
Included(&n) => n + 1,
Excluded(&n) => n,
Unbounded => self.len,
};
assert!(start <= end);
assert!(end <= self.len);
RevComplIterAcgtn {
parent: self,
indices: (start..end).rev(),
}
}
pub fn write_readable<W: Write>(&self, f: &mut W) -> io::Result<()> {
if self.len == 0 {
return f.write_u8(b'*');
}
write_iterator(f, (0..self.len).map(|i| self.at(i)))
}
}
macro_rules! subseq_iter {
($name:ident, $ind_ty:ty, $fun:ident) => {
#[derive(Clone)]
pub struct $name<'a> {
parent: &'a Sequence,
indices: $ind_ty,
}
impl<'a> std::iter::Iterator for $name<'a> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
self.indices.next().map(|i| self.parent.$fun(i))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.indices.size_hint()
}
}
impl<'a> std::iter::DoubleEndedIterator for $name<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
self.indices.next_back().map(|i| self.parent.$fun(i))
}
}
impl<'a> std::iter::ExactSizeIterator for $name<'a> {}
impl<'a> std::iter::FusedIterator for $name<'a> {}
}
}
subseq_iter!(SubseqIter, Range<usize>, at);
subseq_iter!(SubseqIterAcgtn, Range<usize>, at_acgtn_only);
subseq_iter!(RevComplIter, std::iter::Rev<Range<usize>>, compl_at);
subseq_iter!(RevComplIterAcgtn, std::iter::Rev<Range<usize>>, compl_at_acgtn_only);
#[derive(Clone)]
pub struct Qualities {
raw: Vec<u8>
}
impl Qualities {
pub fn new() -> Self {
Qualities {
raw: Vec::new()
}
}
pub fn from_raw<I: IntoIterator<Item = u8>>(raw: I) -> Self {
let mut qualities = Qualities::new();
qualities.extend_from_raw(raw);
qualities
}
pub fn fill_from<R: Read>(&mut self, stream: &mut R, len: usize) -> io::Result<()> {
resize(&mut self.raw, len);
stream.read_exact(&mut self.raw)?;
Ok(())
}
pub fn raw(&self) -> &[u8] {
&self.raw
}
pub fn len(&self) -> usize {
if self.available() {
self.raw.len()
} else {
0
}
}
pub fn available(&self) -> bool {
!self.raw.is_empty() && self.raw[0] != 0xff
}
pub fn to_readable(&self) -> Vec<u8> {
self.raw.iter().map(|qual| qual + 33).collect()
}
pub fn write_readable<W: Write>(&self, f: &mut W) -> io::Result<()> {
if !self.available() {
return f.write_u8(b'*');
}
write_iterator(f, self.raw.iter().map(|qual| qual + 33))
}
pub fn clear(&mut self) {
self.raw.clear();
}
pub fn extend_from_raw<I: IntoIterator<Item = u8>>(&mut self, qualities: I) {
self.raw.extend(qualities);
}
pub fn shrink_to_fit(&mut self) {
self.raw.shrink_to_fit();
}
}