orphos_core/sequence/
encoded.rs1use crate::{
2 sequence::{
3 create_reverse_complement_sequence, encode_sequence, encode_sequence_simd_wide_packed,
4 },
5 types::Mask,
6};
7
8#[derive(Debug)]
9pub struct EncodedSequence {
10 pub forward_sequence: Vec<u8>,
11 pub reverse_complement_sequence: Vec<u8>,
12 pub unknown_sequence: Vec<u8>,
13 pub masks: Vec<Mask>,
14 pub gc_content: f64,
15 pub sequence_length: usize,
16}
17
18impl EncodedSequence {
19 pub fn with_masking(sequence: &[u8]) -> Self {
20 let nucleotide_length = sequence.len();
21 let mut forward_sequence: Vec<u8> = vec![0; (nucleotide_length * 2).div_ceil(8)];
22 let mut unknown_sequence: Vec<u8> = vec![0; nucleotide_length.div_ceil(8)];
23
24 let mut masks = vec![];
25
26 let gc_content = encode_sequence(
27 sequence,
28 &mut forward_sequence,
29 &mut unknown_sequence,
30 &mut masks,
31 true,
32 )
33 .unwrap();
34 let reverse_complement_sequence = create_reverse_complement_sequence(
35 &forward_sequence,
36 &unknown_sequence,
37 nucleotide_length,
38 );
39
40 Self {
41 forward_sequence,
42 reverse_complement_sequence,
43 unknown_sequence,
44 masks,
45 gc_content,
46 sequence_length: nucleotide_length,
47 }
48 }
49
50 pub fn without_masking(sequence: &[u8]) -> Self {
51 let nucleotide_length = sequence.len();
52 let mut forward_sequence: Vec<u8> = vec![0; (nucleotide_length * 2).div_ceil(8)];
53 let mut unknown_sequence: Vec<u8> = vec![0; nucleotide_length.div_ceil(8)];
54 let masks = vec![];
55
56 let gc_content = encode_sequence_simd_wide_packed(
58 sequence,
59 &mut forward_sequence,
60 &mut unknown_sequence,
61 )
62 .unwrap();
63 let reverse_complement_sequence = create_reverse_complement_sequence(
64 &forward_sequence,
65 &unknown_sequence,
66 nucleotide_length,
67 );
68
69 Self {
70 forward_sequence,
71 reverse_complement_sequence,
72 unknown_sequence,
73 masks,
74 gc_content,
75 sequence_length: nucleotide_length,
76 }
77 }
78}