use std::{
fs::File,
io,
io::{BufRead, ErrorKind, Read},
path::Path,
};
use bgzip::{BGZFError, BGZFReader, index::BGZFIndex, read::IndexedBGZFReader};
use flate2::read::{GzDecoder, MultiGzDecoder};
#[macro_export]
macro_rules! parse_le {
($bytes:expr, $t:ty, $range:expr) => {{ <$t>::from_le_bytes($bytes[$range].try_into().unwrap()) }};
}
const MAGIC: [u8; 4] = [b'B', b'A', b'M', 1];
#[derive(Debug)]
struct RefSeq {
pub name: String,
pub l_ref: u32,
}
impl RefSeq {
pub fn from_buf(buf: &[u8]) -> io::Result<Self> {
let l_name = parse_le!(buf, u32, 0..4);
let name_end = 4 + l_name as usize;
Ok(Self {
name: String::from_utf8(buf[4..name_end - 1].to_vec()).unwrap(),
l_ref: parse_le!(buf, u32, name_end..name_end + 4),
})
}
pub fn to_buf(&self) -> Vec<u8> {
Vec::new()
}
}
#[derive(Debug)]
struct AuxData {
pub tag: [u8; 2],
pub val_type: u8,
pub value: Vec<u8>, }
#[derive(Debug)]
struct Alignment {
pub block_size: u32,
pub ref_id: i32,
pub pos: i32,
pub mapq: u8,
pub bin: u16,
pub n_cigar_op: u16,
pub flag: u16,
pub next_ref_id: i32,
pub next_pos: i32,
pub tlen: i32,
pub read_name: String,
pub cigar: Vec<u32>,
pub seq: Vec<u8>, pub qual: String, pub aux_data: Vec<AuxData>,
}
impl Alignment {
pub fn from_buf(buf: &[u8]) -> io::Result<Self> {
if buf.len() < 36 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"Buffer is too small to contain a valid Alignment record",
));
}
let block_size = parse_le!(buf, u32, 0..4);
println!("Block size: {:?}", block_size);
let ref_id = parse_le!(buf, i32, 4..8);
let pos = parse_le!(buf, i32, 8..12);
let l_read_name = buf[12] as usize;
let mapq = buf[13];
let bin = parse_le!(buf, u16, 14..16);
let n_cigar_op = parse_le!(buf, u16, 16..18);
let flag = parse_le!(buf, u16, 18..20);
let l_seq = parse_le!(buf, u32, 20..24) as usize;
let next_ref_id = parse_le!(buf, i32, 24..28);
let next_pos = parse_le!(buf, i32, 28..32);
let tlen = parse_le!(buf, i32, 32..36);
let mut i = 36;
println!("Len read name: {:?}", l_read_name);
let read_name = String::from_utf8(buf[i..i + l_read_name - 1].to_vec())
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
i += l_read_name;
let cigar_len = n_cigar_op as usize;
let mut cigar = Vec::new();
for _ in 0..cigar_len {
let cigar_op = parse_le!(buf, u32, i..i + 4);
cigar.push(cigar_op);
i += 4;
}
let seq_len = (l_seq + 1) / 2; let seq = buf[i..i + seq_len].to_vec();
i += seq_len;
let qual = buf[i..i + l_seq].to_vec();
i += l_seq;
let aux_data = Vec::new();
Ok(Self {
block_size,
ref_id,
pos,
mapq,
bin,
n_cigar_op,
flag,
next_ref_id,
next_pos,
tlen,
read_name,
cigar,
seq,
qual: String::new(), aux_data,
})
}
pub fn to_buf_(&self) -> Vec<u8> {
Vec::new()
}
}
#[derive(Debug)]
pub struct AlignmentMap {
pub l_text: u32,
pub text: String,
pub n_ref: u32,
pub refs: Vec<RefSeq>,
pub alignments: Vec<Alignment>,
}
impl AlignmentMap {
pub fn from_buf(buf: &[u8]) -> io::Result<Self> {
if buf.len() < 12 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"Buffer is too small to contain a valid header",
));
}
let l_text = parse_le!(buf, u32, 4..8);
let text_end = 8 + l_text as usize;
let n_ref = parse_le!(buf, u32, text_end..text_end + 4);
let refs_start = text_end + 4;
let mut i = refs_start;
let mut refs = Vec::new();
for _ in 0..n_ref {
let ref_seq = RefSeq::from_buf(buf[i..].try_into().unwrap())?;
i += 9 + ref_seq.name.len();
refs.push(ref_seq);
}
let mut alignments = Vec::new();
while i + 1 < buf.len() {
let alignment = Alignment::from_buf(buf[i..].try_into().unwrap())?;
println!("\n\n Alignment: {:?}", alignment);
i += 4 + alignment.block_size as usize;
alignments.push(alignment);
}
if buf[0..4] != MAGIC {
return Err(io::Error::new(
ErrorKind::InvalidData,
"Incorrect BAM magic.".to_string(),
));
}
Ok(Self {
l_text,
text: String::from_utf8(buf[8..text_end].to_vec()).unwrap(),
n_ref,
refs,
alignments,
})
}
pub fn to_buf(&self) -> Vec<u8> {
Vec::new()
}
}
pub fn import(path: &Path) -> io::Result<AlignmentMap> {
let file = File::open(path)?;
let mut decoder = GzDecoder::new(file);
println!("Decoding file...");
let mut buf = Vec::new();
decoder.read_to_end(&mut buf)?;
println!("Decode complete. Reading...");
let header = AlignmentMap::from_buf(&buf);
header
}
fn reg_to_bin(beg: i32, end: i32) -> i32 {
let end = end - 1; if beg >> 14 == end >> 14 {
return ((1 << 15) - 1) / 7 + (beg >> 14);
}
if beg >> 17 == end >> 17 {
return ((1 << 12) - 1) / 7 + (beg >> 17);
}
if beg >> 20 == end >> 20 {
return ((1 << 9) - 1) / 7 + (beg >> 20);
}
if beg >> 23 == end >> 23 {
return ((1 << 6) - 1) / 7 + (beg >> 23);
}
if beg >> 26 == end >> 26 {
return ((1 << 3) - 1) / 7 + (beg >> 26);
}
0
}
fn reg_to_bins(beg: i32, end: i32, list: &mut Vec<u16>) -> usize {
let end = end - 1; list.push(0);
for k in (1 + (beg >> 26))..=(1 + (end >> 26)) {
list.push(k as u16);
}
for k in (9 + (beg >> 23))..=(9 + (end >> 23)) {
list.push(k as u16);
}
for k in (73 + (beg >> 20))..=(73 + (end >> 20)) {
list.push(k as u16);
}
for k in (585 + (beg >> 17))..=(585 + (end >> 17)) {
list.push(k as u16);
}
for k in (4681 + (beg >> 14))..=(4681 + (end >> 14)) {
list.push(k as u16);
}
list.len()
}