use crate::haplotype::Haplotype;
#[must_use]
fn complement(base: u8) -> u8 {
match base.to_ascii_uppercase() {
b'A' => b'T',
b'T' => b'A',
b'C' => b'G',
b'G' => b'C',
_ => b'N',
}
}
pub fn reverse_complement(seq: &mut [u8]) {
seq.reverse();
for base in seq.iter_mut() {
*base = complement(*base);
}
}
#[derive(Debug)]
pub struct Fragment {
pub bases: Vec<u8>,
pub ref_positions: Vec<u32>,
pub ref_start: u32,
pub is_forward: bool,
pub haplotype_index: usize,
}
#[must_use]
pub fn extract_fragment(
haplotype: &Haplotype,
reference: &[u8],
ref_start: u32,
fragment_len: usize,
is_forward: bool,
) -> Fragment {
let (bases, ref_positions) = haplotype.extract_fragment(reference, ref_start, fragment_len);
Fragment {
bases,
ref_positions,
ref_start,
is_forward,
haplotype_index: haplotype.allele_index(),
}
}
#[must_use]
pub fn extract_read_bases(
fragment_bases: &[u8],
read_length: usize,
adapter: &[u8],
is_r2: bool,
) -> Vec<u8> {
let frag_len = fragment_bases.len();
if is_r2 {
let mut bases = Vec::with_capacity(read_length);
let take = frag_len.min(read_length);
let start = frag_len.saturating_sub(take);
bases.extend_from_slice(&fragment_bases[start..]);
reverse_complement(&mut bases);
append_adapter_and_pad(&mut bases, read_length, adapter);
bases
} else {
let mut bases = Vec::with_capacity(read_length);
let take = frag_len.min(read_length);
bases.extend_from_slice(&fragment_bases[..take]);
append_adapter_and_pad(&mut bases, read_length, adapter);
bases
}
}
fn append_adapter_and_pad(bases: &mut Vec<u8>, target_len: usize, adapter: &[u8]) {
if bases.len() < target_len {
let need = target_len - bases.len();
let adapter_take = need.min(adapter.len());
bases.extend_from_slice(&adapter[..adapter_take]);
while bases.len() < target_len {
bases.push(b'N');
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_reverse_complement() {
let mut seq = b"ACGT".to_vec();
reverse_complement(&mut seq);
assert_eq!(&seq, b"ACGT");
let mut seq2 = b"AACG".to_vec();
reverse_complement(&mut seq2);
assert_eq!(&seq2, b"CGTT");
}
#[test]
fn test_reverse_complement_with_n() {
let mut seq = b"ANGC".to_vec();
reverse_complement(&mut seq);
assert_eq!(&seq, b"GCNT");
}
#[test]
fn test_extract_r1_full_fragment() {
let fragment = b"ACGTACGTAC";
let bases = extract_read_bases(fragment, 10, b"ADAPTER", false);
assert_eq!(&bases, b"ACGTACGTAC");
}
#[test]
fn test_extract_r1_fragment_longer_than_read() {
let fragment = b"ACGTACGTAC";
let bases = extract_read_bases(fragment, 5, b"ADAPTER", false);
assert_eq!(&bases, b"ACGTA");
}
#[test]
fn test_extract_r1_short_fragment_with_adapter() {
let fragment = b"ACG";
let adapter = b"TTTTTT";
let bases = extract_read_bases(fragment, 8, adapter, false);
assert_eq!(&bases, b"ACGTTTTT");
}
#[test]
fn test_extract_r2_full_fragment() {
let fragment = b"ACGTACGTAC";
let bases = extract_read_bases(fragment, 10, b"ADAPTER", true);
assert_eq!(&bases, b"GTACGTACGT");
}
#[test]
fn test_extract_r2_fragment_longer_than_read() {
let fragment = b"ACGTACGTAC";
let bases = extract_read_bases(fragment, 5, b"ADAPTER", true);
assert_eq!(&bases, b"GTACG");
}
#[test]
fn test_extract_r2_short_fragment_with_adapter() {
let fragment = b"ACG";
let adapter = b"TTTTTT";
let bases = extract_read_bases(fragment, 8, adapter, true);
assert_eq!(&bases, b"CGTTTTTT"); assert_eq!(bases.len(), 8);
}
#[test]
fn test_extract_empty_fragment_all_adapter() {
let fragment = b"";
let adapter = b"AGATCGG";
let bases = extract_read_bases(fragment, 5, b"AGATCGG", false);
assert_eq!(&bases, b"AGATC");
let bases_r2 = extract_read_bases(fragment, 5, adapter, true);
assert_eq!(&bases_r2, b"AGATC");
}
}