use seqair_types::{BamFlags, Pos0};
pub fn compute_end_pos_from_raw(raw: &[u8]) -> Option<Pos0> {
use super::cigar::{CIGAR_D, CIGAR_EQ, CIGAR_M, CIGAR_N, CIGAR_X};
use seqair_types::Offset;
let h = parse_header(raw).ok()?;
debug_assert!(h.cigar_end <= raw.len(), "cigar overrun: {} > {}", h.cigar_end, raw.len());
#[allow(clippy::indexing_slicing, reason = "cigar_end ≤ raw.len() checked by parse_header")]
let cigar_bytes = &raw[h.var_start..h.cigar_end];
let mut ref_len: i64 = 0;
for chunk in cigar_bytes.chunks_exact(4) {
let arr: [u8; 4] = chunk.try_into().expect("chunks_exact(4) yields 4 bytes");
let op = u32::from_le_bytes(arr);
let op_len = i64::from(op >> 4);
let op_type = (op & 0xF) as u8;
match op_type {
CIGAR_M | CIGAR_D | CIGAR_N | CIGAR_EQ | CIGAR_X => {
ref_len = ref_len.checked_add(op_len)?;
}
_ => {}
}
}
if ref_len == 0 {
Some(h.pos)
} else {
h.pos.checked_add_offset(Offset::new(ref_len.checked_sub(1)?))
}
}
#[allow(
clippy::indexing_slicing,
reason = "offset + 1 < raw.len() ensured by caller's length check"
)]
pub(crate) fn read2(buf: &[u8], offset: usize) -> [u8; 2] {
debug_assert!(
offset.saturating_add(1) < buf.len(),
"read2 out of bounds: offset={offset}, len={}",
buf.len()
);
[buf[offset], buf[offset.wrapping_add(1)]]
}
#[allow(
clippy::indexing_slicing,
reason = "offset + 3 < raw.len() ensured by caller's length check"
)]
pub(crate) fn read4(buf: &[u8], offset: usize) -> [u8; 4] {
debug_assert!(
offset.saturating_add(3) < buf.len(),
"read4 out of bounds: offset={offset}, len={}",
buf.len()
);
[
buf[offset],
buf[offset.wrapping_add(1)],
buf[offset.wrapping_add(2)],
buf[offset.wrapping_add(3)],
]
}
#[derive(Debug)]
pub(crate) struct ParsedHeader {
pub tid: i32,
pub pos: Pos0,
pub mapq: u8,
pub flags: BamFlags,
pub n_cigar_ops: u16,
pub seq_len: u32,
pub next_ref_id: i32,
pub next_pos: i32,
pub template_len: i32,
pub var_start: usize,
pub cigar_end: usize,
pub seq_end: usize,
pub qual_end: usize,
}
pub(crate) fn parse_header(raw: &[u8]) -> Result<ParsedHeader, DecodeError> {
if raw.len() < 32 {
return Err(DecodeError::TooShort { len: raw.len() });
}
debug_assert!(raw.len() >= 32, "raw record too short for fixed fields: {}", raw.len());
let tid = i32::from_le_bytes(read4(raw, 0));
let pos_i32 = i32::from_le_bytes(read4(raw, 4));
let pos =
Pos0::try_from(pos_i32).map_err(|_| DecodeError::InvalidPosition { value: pos_i32 })?;
#[allow(clippy::indexing_slicing, reason = "raw.len() >= 32 checked above")]
let name_len = raw[8] as usize;
#[allow(clippy::indexing_slicing, reason = "raw.len() >= 32 checked above")]
let mapq = raw[9];
let n_cigar_ops = u16::from_le_bytes(read2(raw, 12));
let flags = BamFlags::from(u16::from_le_bytes(read2(raw, 14)));
let seq_len = u32::from_le_bytes(read4(raw, 16));
let next_ref_id = i32::from_le_bytes(read4(raw, 20));
let next_pos = i32::from_le_bytes(read4(raw, 24));
let template_len = i32::from_le_bytes(read4(raw, 28));
let cigar_bytes = usize::from(n_cigar_ops) * 4;
let seq_bytes = (seq_len as usize).div_ceil(2);
let var_start = 32usize.checked_add(name_len).ok_or(DecodeError::OffsetOverflow)?;
let cigar_end = var_start.checked_add(cigar_bytes).ok_or(DecodeError::OffsetOverflow)?;
let seq_end = cigar_end.checked_add(seq_bytes).ok_or(DecodeError::OffsetOverflow)?;
let qual_end = seq_end.checked_add(seq_len as usize).ok_or(DecodeError::OffsetOverflow)?;
if raw.len() < qual_end {
return Err(DecodeError::TooShort { len: raw.len() });
}
Ok(ParsedHeader {
tid,
pos,
mapq,
flags,
n_cigar_ops,
seq_len,
next_ref_id,
next_pos,
template_len,
var_start,
cigar_end,
seq_end,
qual_end,
})
}
pub(crate) struct FixedHeaderFields {
pub ref_id: i32,
pub pos: i32,
pub bin: u16,
pub mapq: u8,
pub l_read_name: u8,
pub flags: u16,
pub n_cigar_op: u16,
pub l_seq: i32,
pub next_ref_id: i32,
pub next_pos: i32,
pub template_len: i32,
}
pub(crate) fn encode_fixed_header(buf: &mut Vec<u8>, f: &FixedHeaderFields) {
let bin_mq_nl = (u32::from(f.bin) << 16) | (u32::from(f.mapq) << 8) | u32::from(f.l_read_name);
let flag_nc = (u32::from(f.flags) << 16) | u32::from(f.n_cigar_op);
buf.extend_from_slice(&f.ref_id.to_le_bytes());
buf.extend_from_slice(&f.pos.to_le_bytes());
buf.extend_from_slice(&bin_mq_nl.to_le_bytes());
buf.extend_from_slice(&flag_nc.to_le_bytes());
buf.extend_from_slice(&f.l_seq.to_le_bytes());
buf.extend_from_slice(&f.next_ref_id.to_le_bytes());
buf.extend_from_slice(&f.next_pos.to_le_bytes());
buf.extend_from_slice(&f.template_len.to_le_bytes());
}
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum DecodeError {
#[error("BAM record too short: {len} bytes")]
TooShort { len: usize },
#[error("arithmetic overflow computing BAM record field offsets")]
OffsetOverflow,
#[error("slab offset exceeds u32::MAX")]
SlabOverflow,
#[error("invalid BAM position value {value}: negative positions are reserved")]
InvalidPosition { value: i32 },
#[error("CIGAR query length {cigar_query_len} does not match seq_len {seq_len}")]
CigarQueryLenMismatch { cigar_query_len: u32, seq_len: u32 },
#[error("CIGAR op count {count} exceeds u16::MAX (BAM n_cigar_op limit)")]
CigarOpCountOverflow { count: usize },
#[error("qual length {qual_len} does not match seq length {seq_len}")]
QualLenMismatch { qual_len: usize, seq_len: usize },
}
#[cfg(test)]
#[allow(clippy::arithmetic_side_effects, reason = "test arithmetic on known small values")]
mod tests {
use super::*;
#[test]
fn test_compute_end_pos() {
use super::super::cigar::{CigarOp, CigarOpType, compute_end_pos};
let ops = [CigarOp::new(CigarOpType::Match, 50)];
assert_eq!(compute_end_pos(Pos0::new(100).unwrap(), &ops), Some(Pos0::new(149).unwrap()));
}
#[test]
fn parse_header_rejects_overflow_in_offset_calc() {
let mut raw = [0u8; 32];
raw[0..4].copy_from_slice(&0i32.to_le_bytes()); raw[4..8].copy_from_slice(&0i32.to_le_bytes()); raw[8] = 255; raw[9] = 0; raw[12..14].copy_from_slice(&u16::MAX.to_le_bytes()); raw[14..16].copy_from_slice(&0u16.to_le_bytes()); raw[16..20].copy_from_slice(&u32::MAX.to_le_bytes());
let result = parse_header(&raw);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
matches!(err, DecodeError::OffsetOverflow | DecodeError::TooShort { .. }),
"expected OffsetOverflow or TooShort, got {err:?}"
);
}
}