use crate::ska_dict::bit_encoding::is_ambiguous;
#[derive(Clone)]
pub struct AlnWriter<'a> {
next_pos: usize,
curr_chrom: usize,
last_mapped: usize,
last_written: usize,
chrom_offset: usize,
ref_seq: &'a Vec<Vec<u8>>,
seq_out: Vec<u8>,
half_split_len: usize,
finalised: bool,
repeat_regions: &'a Vec<usize>,
mask_ambig: bool,
_middle_out: Vec<(u8, usize)>,
}
impl<'a> AlnWriter<'a> {
pub fn new(
ref_seq: &'a Vec<Vec<u8>>,
k: usize,
repeat_regions: &'a Vec<usize>,
mask_ambig: bool,
) -> Self {
let total_size = ref_seq.iter().map(|x| x.len()).sum();
let half_split_len = (k - 1) / 2;
Self {
next_pos: half_split_len,
curr_chrom: 0,
last_mapped: 0,
last_written: 0,
chrom_offset: 0,
ref_seq,
seq_out: vec![b'-'; total_size],
half_split_len,
finalised: false,
repeat_regions,
mask_ambig,
_middle_out: Vec::new(),
}
}
pub fn total_size(&self) -> usize {
self.seq_out.len()
}
fn fill_fwd_bases(&mut self, maximum: usize) {
if self.last_written > 0 {
let last_match_overhang =
(self.last_mapped + self.half_split_len).saturating_sub(self.last_written);
let start = self.last_written + 1;
let end = usize::min(start + last_match_overhang, maximum);
if end > start {
self.seq_out[(start + self.chrom_offset)..(end + self.chrom_offset)]
.copy_from_slice(&self.ref_seq[self.curr_chrom][start..end]);
self.last_written = end;
}
}
}
fn fill_contig(&mut self) {
let chrom_length = self.ref_seq[self.curr_chrom].len();
self.fill_fwd_bases(chrom_length);
self.chrom_offset += chrom_length;
self.curr_chrom += 1;
self.next_pos = self.half_split_len;
}
pub fn write_split_kmer(&mut self, mapped_pos: usize, mapped_chrom: usize, base: u8) {
while mapped_chrom > self.curr_chrom {
self.fill_contig();
}
self._middle_out.push((
if is_ambiguous(base) && self.mask_ambig {
b'N'
} else {
base
},
mapped_pos + self.chrom_offset,
));
if mapped_pos < self.next_pos {
self.last_mapped = mapped_pos;
} else {
if mapped_pos > self.next_pos {
self.fill_fwd_bases(mapped_pos - self.half_split_len);
}
let start = mapped_pos - self.half_split_len;
let end = mapped_pos;
self.seq_out[(start + self.chrom_offset)..(end + self.chrom_offset)]
.copy_from_slice(&self.ref_seq[self.curr_chrom][start..end]);
self.next_pos = mapped_pos + self.half_split_len + 1;
self.last_mapped = mapped_pos;
self.last_written = mapped_pos;
}
}
pub fn finalise(&mut self) {
if !self.finalised {
while self.curr_chrom < self.ref_seq.len() {
self.fill_contig();
}
for (middle_base, middle_pos) in &self._middle_out {
self.seq_out[*middle_pos] = *middle_base;
}
for repeat_idx in self.repeat_regions {
if self.seq_out[*repeat_idx] != b'-' {
self.seq_out[*repeat_idx] = b'N';
}
}
self.finalised = true;
}
}
pub fn get_seq(&'a mut self) -> &'a [u8] {
self.finalise();
self.seq_out.as_slice()
}
}