const CNV_NUM: [u8; 128] = [
b'A', b'C', b'G', b'T', b'N', b'R', b'Y', b'S', b'W', b'K', b'M', b'B', b'D', b'H', b'V', b'U',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', 0, 11, 1, 12, 30, 30, 2, 13, 30, 30, 9, 30, 10, 4, 30,
30, 30, 5, 7, 3, 15, 14, 8, 30, 6, 30, 30, 30, 30, 30, 30,
b' ', 0, 11, 1, 12, 30, 30, 2, 13, 30, 30, 9, 30, 10, 4, 30,
30, 30, 5, 7, 3, 15, 14, 8, 30, 6, 30, 30, 30, 30, 30, 30,
];
pub fn preprocess_raw_contig(contig: &mut Vec<u8>) {
let len = contig.len();
let mut in_pos = 0usize;
let mut out_pos = 0usize;
match len % 4 {
3 => {
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
}
2 => {
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
}
1 => {
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
}
_ => {} }
while in_pos < len {
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
let c = contig[in_pos];
in_pos += 1;
if c >> 6 != 0 {
contig[out_pos] = CNV_NUM[c as usize];
out_pos += 1;
}
}
contig.truncate(out_pos);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_preprocess_raw_contig_acgt() {
let mut contig = b"ACGT".to_vec();
preprocess_raw_contig(&mut contig);
assert_eq!(contig, vec![0, 1, 2, 3]);
}
#[test]
fn test_preprocess_raw_contig_with_spaces() {
let mut contig = b"A C G T".to_vec();
preprocess_raw_contig(&mut contig);
assert_eq!(contig, vec![0, 1, 2, 3]);
}
#[test]
fn test_preprocess_raw_contig_mixed() {
let mut contig = b"ACGTN\nATGC".to_vec();
preprocess_raw_contig(&mut contig);
assert_eq!(contig, vec![0, 1, 2, 3, 4, 0, 3, 2, 1]);
}
}