1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
use rand::Rng;
use crate::{error::WriteError, Result};
/// Policy for handling invalid nucleotide sequences
#[derive(Debug, Clone, Copy, Default)]
pub enum Policy {
#[default]
IgnoreSequence,
BreakOnInvalid,
RandomDraw,
SetToA,
SetToC,
SetToG,
SetToT,
}
impl Policy {
fn fill_with_known(sequence: &[u8], val: u8, ibuf: &mut Vec<u8>) {
for &n in sequence {
ibuf.push(match n {
b'A' | b'C' | b'G' | b'T' => n,
_ => val,
});
}
}
fn fill_with_random<R: Rng>(sequence: &[u8], rng: &mut R, ibuf: &mut Vec<u8>) {
for &n in sequence {
ibuf.push(match n {
b'A' | b'C' | b'G' | b'T' => n,
_ => match rng.gen_range(0..4) {
0 => b'A',
1 => b'C',
2 => b'G',
3 => b'T',
_ => unreachable!(),
},
});
}
}
/// Convert the sequence according to the N-policy
///
/// First clears the input buffer to ensure that it is empty.
///
/// Returns a boolean indicating whether the sequence should be processed further.
/// Returns an error if the sequence should be broken on invalid nucleotides.
///
/// # Arguments
/// * `sequence` - The sequence to be converted
/// * `ibuf` - The buffer to store the converted sequence
/// * `rng` - The random number generator
pub fn handle<R: Rng>(&self, sequence: &[u8], ibuf: &mut Vec<u8>, rng: &mut R) -> Result<bool> {
// First clears the input buffer to ensure that it is empty.
ibuf.clear();
// Returns a boolean indicating whether the sequence should be processed further.
match self {
Self::IgnoreSequence => Ok(false),
Self::BreakOnInvalid => {
let seq_str = std::str::from_utf8(sequence)?.to_string();
Err(WriteError::InvalidNucleotideSequence(seq_str).into())
}
Self::RandomDraw => {
Self::fill_with_random(sequence, rng, ibuf);
Ok(true)
}
Self::SetToA => {
Self::fill_with_known(sequence, b'A', ibuf);
Ok(true)
}
Self::SetToC => {
Self::fill_with_known(sequence, b'C', ibuf);
Ok(true)
}
Self::SetToG => {
Self::fill_with_known(sequence, b'G', ibuf);
Ok(true)
}
Self::SetToT => {
Self::fill_with_known(sequence, b'T', ibuf);
Ok(true)
}
}
}
}