orphos_core/sequence/
encoded.rs

1use crate::{
2    sequence::{
3        create_reverse_complement_sequence, encode_sequence, encode_sequence_simd_wide_packed,
4    },
5    types::Mask,
6};
7
8#[derive(Debug)]
9pub struct EncodedSequence {
10    pub forward_sequence: Vec<u8>,
11    pub reverse_complement_sequence: Vec<u8>,
12    pub unknown_sequence: Vec<u8>,
13    pub masks: Vec<Mask>,
14    pub gc_content: f64,
15    pub sequence_length: usize,
16}
17
18impl EncodedSequence {
19    pub fn with_masking(sequence: &[u8]) -> Self {
20        let nucleotide_length = sequence.len();
21        let mut forward_sequence: Vec<u8> = vec![0; (nucleotide_length * 2).div_ceil(8)];
22        let mut unknown_sequence: Vec<u8> = vec![0; nucleotide_length.div_ceil(8)];
23
24        let mut masks = vec![];
25
26        let gc_content = encode_sequence(
27            sequence,
28            &mut forward_sequence,
29            &mut unknown_sequence,
30            &mut masks,
31            true,
32        )
33        .unwrap();
34        let reverse_complement_sequence = create_reverse_complement_sequence(
35            &forward_sequence,
36            &unknown_sequence,
37            nucleotide_length,
38        );
39
40        Self {
41            forward_sequence,
42            reverse_complement_sequence,
43            unknown_sequence,
44            masks,
45            gc_content,
46            sequence_length: nucleotide_length,
47        }
48    }
49
50    pub fn without_masking(sequence: &[u8]) -> Self {
51        let nucleotide_length = sequence.len();
52        let mut forward_sequence: Vec<u8> = vec![0; (nucleotide_length * 2).div_ceil(8)];
53        let mut unknown_sequence: Vec<u8> = vec![0; nucleotide_length.div_ceil(8)];
54        let masks = vec![];
55
56        // let gc_content = encode_sequence(sequence, &mut forward_sequence, &mut unknown_sequence, &mut masks, false).unwrap();
57        let gc_content = encode_sequence_simd_wide_packed(
58            sequence,
59            &mut forward_sequence,
60            &mut unknown_sequence,
61        )
62        .unwrap();
63        let reverse_complement_sequence = create_reverse_complement_sequence(
64            &forward_sequence,
65            &unknown_sequence,
66            nucleotide_length,
67        );
68
69        Self {
70            forward_sequence,
71            reverse_complement_sequence,
72            unknown_sequence,
73            masks,
74            gc_content,
75            sequence_length: nucleotide_length,
76        }
77    }
78}