#[derive(Debug, Clone, Copy)]
pub struct Seq<'a> {
pub id: &'a [u8],
pub desc: &'a [u8],
pub seq: &'a [u8],
pub qual: Option<&'a [u8]>,
}
impl<'a> Seq<'a> {
pub fn is_fastq(&self) -> bool {
self.qual.is_some()
}
pub fn len(&self) -> usize {
self.seq.len()
}
pub fn is_empty(&self) -> bool {
self.seq.is_empty()
}
pub fn rc(&self) -> Vec<u8> {
let mut result = Vec::with_capacity(self.seq.len());
for &base in self.seq.iter().rev() {
result.push(RC_TABLE[base as usize]);
}
result
}
pub fn rc_unsafe(&self) -> Vec<u8> {
let len = self.seq.len();
let mut result = Vec::with_capacity(len);
let rc_ptr = RC_TABLE.as_ptr();
for (slot, &base) in result
.spare_capacity_mut() .iter_mut()
.zip(self.seq.iter().rev())
{
slot.write(unsafe { *rc_ptr.add(base as usize) });
}
unsafe { result.set_len(len) }; result
}
pub fn count_base(&self, base: u8) -> usize {
let mut count = 0;
for &b in self.seq {
count += (b == base) as usize;
}
count
}
pub fn count_base_fn<F: Fn(&u8) -> bool>(&self, f: F) -> usize {
let mut count = 0;
for b in self.seq {
count += f(b) as usize;
}
count
}
pub fn count_bases(&self, bases: &[u8]) -> usize {
let mut mask = [0u64; 4];
for &base in bases {
let index = (base >> 6) as usize;
let bit = 1u64 << (base & 63);
mask[index] |= bit;
}
let mut count = 0;
for &b in self.seq {
let index = (b >> 6) as usize;
let bit = 1u64 << (b & 63);
count += ((mask[index] & bit) != 0) as usize;
}
count
}
pub fn gc_content(&self) -> f32 {
if self.seq.is_empty() {
return 0.0;
}
let mut gc = 0usize;
for &b in self.seq {
gc += matches!(b, b'G' | b'C' | b'g' | b'c') as usize;
}
gc as f32 / self.seq.len() as f32
}
}
const RC_TABLE: [u8; 256] = make_rc_table();
const fn make_rc_table() -> [u8; 256] {
let mut table = [b'N'; 256];
table[b'A' as usize] = b'T';
table[b'C' as usize] = b'G';
table[b'G' as usize] = b'C';
table[b'T' as usize] = b'A';
table[b'N' as usize] = b'N';
table[b'a' as usize] = b't';
table[b'c' as usize] = b'g';
table[b'g' as usize] = b'c';
table[b't' as usize] = b'a';
table[b'n' as usize] = b'n';
table[b'M' as usize] = b'K'; table[b'R' as usize] = b'Y'; table[b'W' as usize] = b'W'; table[b'S' as usize] = b'S'; table[b'Y' as usize] = b'R'; table[b'K' as usize] = b'M';
table[b'V' as usize] = b'B'; table[b'H' as usize] = b'D'; table[b'D' as usize] = b'H'; table[b'B' as usize] = b'V';
table[b'm' as usize] = b'k';
table[b'r' as usize] = b'y';
table[b'w' as usize] = b'w';
table[b's' as usize] = b's';
table[b'y' as usize] = b'r';
table[b'k' as usize] = b'm';
table[b'v' as usize] = b'v';
table[b'h' as usize] = b'h';
table[b'd' as usize] = b'd';
table[b'b' as usize] = b'b';
table[b'.' as usize] = b'.';
table[b'-' as usize] = b'-';
table[b' ' as usize] = b' ';
table
}
#[cfg(test)]
mod tests {
use super::*;
fn a_seq(seq: &'_ [u8]) -> Seq<'_> {
Seq {
id: b"",
desc: b"",
seq,
qual: None,
}
}
#[test]
fn test_rc() {
let seq = b"ACGTNacgtn";
let expected = b"nacgtNACGT";
assert_eq!(a_seq(seq).rc(), expected);
let seq_odd = b"ACGTGa";
let expected_odd = b"tCACGT";
assert_eq!(a_seq(seq_odd).rc(), expected_odd);
let seq_iupac = b"MRSWKY";
let expected_iupac = b"RMWSYK";
assert_eq!(a_seq(seq_iupac).rc(), expected_iupac);
let seq_empty = b"";
let expected_empty = b"";
assert_eq!(a_seq(seq_empty).rc(), expected_empty);
let seq_gaps = b"a-c";
let expected_gaps = b"g-t";
assert_eq!(a_seq(seq_gaps).rc(), expected_gaps);
let seq_unknown = b"N?";
let expected_unknown = b"NN";
assert_eq!(a_seq(seq_unknown).rc(), expected_unknown);
}
#[test]
fn test_base_counting() {
let seq = b"ACGTNacgtn";
assert_eq!(a_seq(seq).count_base(b'a'), 1);
let seq = b"ACGTNacgtn";
assert_eq!(a_seq(seq).count_bases(b"Aa"), 2);
let seq = b"ACGTNacgtn";
assert_eq!(a_seq(seq).count_base_fn(|&b| b == b'a' || b == b'A'), 2);
let seq = b"ACGTNacgtn";
assert_eq!(a_seq(seq).gc_content(), 0.4);
let seq = b"";
assert_eq!(a_seq(seq).gc_content(), 0.0);
}
}