use std::io;
use noodles::sam::alignment::record::Sequence;
use super::num::{write_u8, write_u32_le};
pub(super) fn write_length(dst: &mut Vec<u8>, base_count: usize) -> io::Result<()> {
let n =
u32::try_from(base_count).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
write_u32_le(dst, n);
Ok(())
}
#[allow(clippy::needless_pass_by_value)]
pub(super) fn write_sequence<S>(
dst: &mut Vec<u8>,
read_length: usize,
sequence: S,
) -> io::Result<()>
where
S: Sequence,
{
const EQ: u8 = b'=';
if sequence.is_empty() {
return Ok(());
}
if read_length > 0 && sequence.len() != read_length {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"read length-sequence length mismatch",
));
}
let mut bases = sequence.iter();
while let Some(l) = bases.next() {
let r = bases.next().unwrap_or(EQ);
let n = (encode_base(l) << 4) | encode_base(r);
write_u8(dst, n);
}
Ok(())
}
#[inline]
pub(super) fn write_sequence_from_slice(
dst: &mut Vec<u8>,
read_length: usize,
bases: &[u8],
) -> io::Result<()> {
const CHUNK_SIZE: usize = 16;
if bases.is_empty() {
return Ok(());
}
if read_length > 0 && bases.len() != read_length {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"read length-sequence length mismatch",
));
}
let packed_len = bases.len().div_ceil(2);
dst.reserve(packed_len);
let mut chunks = bases.chunks_exact(CHUNK_SIZE);
for chunk in chunks.by_ref() {
dst.push((CODES[chunk[0] as usize] << 4) | CODES[chunk[1] as usize]);
dst.push((CODES[chunk[2] as usize] << 4) | CODES[chunk[3] as usize]);
dst.push((CODES[chunk[4] as usize] << 4) | CODES[chunk[5] as usize]);
dst.push((CODES[chunk[6] as usize] << 4) | CODES[chunk[7] as usize]);
dst.push((CODES[chunk[8] as usize] << 4) | CODES[chunk[9] as usize]);
dst.push((CODES[chunk[10] as usize] << 4) | CODES[chunk[11] as usize]);
dst.push((CODES[chunk[12] as usize] << 4) | CODES[chunk[13] as usize]);
dst.push((CODES[chunk[14] as usize] << 4) | CODES[chunk[15] as usize]);
}
let remainder = chunks.remainder();
let mut pairs = remainder.chunks_exact(2);
for pair in pairs.by_ref() {
let l = CODES[pair[0] as usize];
let r = CODES[pair[1] as usize];
dst.push((l << 4) | r);
}
if let Some(&last) = pairs.remainder().first() {
let l = CODES[last as usize];
dst.push(l << 4); }
Ok(())
}
const CODES: [u8; 256] = build_codes();
#[inline]
fn encode_base(n: u8) -> u8 {
CODES[usize::from(n)]
}
#[allow(clippy::cast_possible_truncation)]
const fn build_codes() -> [u8; 256] {
const BASES: [u8; 16] = *b"=ACMGRSVTWYHKDBN";
const N: u8 = 0x0f;
let mut i = 0;
let mut codes = [N; 256];
while i < BASES.len() {
let base = BASES[i];
let code = i as u8;
codes[base as usize] = code;
codes[base.to_ascii_lowercase() as usize] = code;
i += 1;
}
codes
}
#[cfg(test)]
mod tests {
use noodles::sam::alignment::record_buf::Sequence as SequenceBuf;
use super::*;
#[test]
fn test_write_length() -> io::Result<()> {
let mut buf = Vec::new();
write_length(&mut buf, 8)?;
assert_eq!(buf, [0x08, 0x00, 0x00, 0x00]);
Ok(())
}
#[cfg(not(target_pointer_width = "16"))]
#[test]
fn test_write_length_with_out_of_range_base_count() {
let mut buf = Vec::new();
assert!(matches!(
write_length(&mut buf, usize::MAX),
Err(e) if e.kind() == io::ErrorKind::InvalidInput
));
}
#[test]
fn test_write_sequence() -> Result<(), Box<dyn std::error::Error>> {
fn t(buf: &mut Vec<u8>, sequence: &SequenceBuf, expected: &[u8]) -> io::Result<()> {
buf.clear();
write_sequence(buf, sequence.len(), sequence)?;
assert_eq!(buf, expected);
Ok(())
}
let mut buf = Vec::new();
t(&mut buf, &SequenceBuf::default(), &[])?;
t(&mut buf, &SequenceBuf::from(b"ACG"), &[0x12, 0x40])?;
t(&mut buf, &SequenceBuf::from(b"ACGT"), &[0x12, 0x48])?;
buf.clear();
write_sequence(&mut buf, 2, &SequenceBuf::default())?;
assert!(buf.is_empty());
buf.clear();
let sequence = SequenceBuf::from(b"A");
assert!(matches!(
write_sequence(&mut buf, 2, &sequence),
Err(e) if e.kind() == io::ErrorKind::InvalidInput,
));
Ok(())
}
#[test]
fn test_encode_base() {
const BASES: [u8; 16] = *b"=ACMGRSVTWYHKDBN";
for (i, b) in (0..).zip(BASES) {
assert_eq!(encode_base(b), i);
assert_eq!(encode_base(b.to_ascii_lowercase()), i);
}
assert_eq!(encode_base(b'X'), 15);
assert_eq!(encode_base(b'x'), 15);
}
#[test]
fn test_write_sequence_from_slice() {
let mut buf = Vec::new();
write_sequence_from_slice(&mut buf, 0, b"").expect("writing sequence should succeed");
assert!(buf.is_empty());
buf.clear();
write_sequence_from_slice(&mut buf, 4, b"ACGT").expect("writing sequence should succeed");
assert_eq!(buf, [0x12, 0x48]);
buf.clear();
write_sequence_from_slice(&mut buf, 3, b"ACG").expect("writing sequence should succeed");
assert_eq!(buf, [0x12, 0x40]);
buf.clear();
write_sequence_from_slice(&mut buf, 1, b"A").expect("writing sequence should succeed");
assert_eq!(buf, [0x10]);
buf.clear();
assert!(write_sequence_from_slice(&mut buf, 2, b"A").is_err());
buf.clear();
write_sequence_from_slice(&mut buf, 4, b"ACGT").expect("writing sequence should succeed");
let mut buf2 = Vec::new();
write_sequence(&mut buf2, 4, &SequenceBuf::from(b"ACGT"))
.expect("writing sequence should succeed");
assert_eq!(buf, buf2);
}
}