1use crate::haplotype::Haplotype;
8
9#[must_use]
12fn complement(base: u8) -> u8 {
13 match base.to_ascii_uppercase() {
14 b'A' => b'T',
15 b'T' => b'A',
16 b'C' => b'G',
17 b'G' => b'C',
18 _ => b'N',
19 }
20}
21
22pub fn reverse_complement(seq: &mut [u8]) {
24 seq.reverse();
25 for base in seq.iter_mut() {
26 *base = complement(*base);
27 }
28}
29
30#[derive(Debug)]
35pub struct Fragment {
36 pub bases: Vec<u8>,
38 pub ref_positions: Vec<u32>,
40 pub ref_start: u32,
42 pub is_forward: bool,
44 pub haplotype_index: usize,
46}
47
48#[must_use]
60pub fn extract_fragment(
61 haplotype: &Haplotype,
62 reference: &[u8],
63 ref_start: u32,
64 fragment_len: usize,
65 is_forward: bool,
66) -> Fragment {
67 let (bases, ref_positions) = haplotype.extract_fragment(reference, ref_start, fragment_len);
68
69 Fragment {
70 bases,
71 ref_positions,
72 ref_start,
73 is_forward,
74 haplotype_index: haplotype.allele_index(),
75 }
76}
77
78#[must_use]
94pub fn extract_read_bases(
95 fragment_bases: &[u8],
96 read_length: usize,
97 adapter: &[u8],
98 is_r2: bool,
99) -> Vec<u8> {
100 let frag_len = fragment_bases.len();
101
102 if is_r2 {
103 let mut bases = Vec::with_capacity(read_length);
106 let take = frag_len.min(read_length);
107 let start = frag_len.saturating_sub(take);
108 bases.extend_from_slice(&fragment_bases[start..]);
109 reverse_complement(&mut bases);
110 append_adapter_and_pad(&mut bases, read_length, adapter);
111 bases
112 } else {
113 let mut bases = Vec::with_capacity(read_length);
115 let take = frag_len.min(read_length);
116 bases.extend_from_slice(&fragment_bases[..take]);
117 append_adapter_and_pad(&mut bases, read_length, adapter);
118 bases
119 }
120}
121
122fn append_adapter_and_pad(bases: &mut Vec<u8>, target_len: usize, adapter: &[u8]) {
124 if bases.len() < target_len {
125 let need = target_len - bases.len();
126 let adapter_take = need.min(adapter.len());
127 bases.extend_from_slice(&adapter[..adapter_take]);
128 while bases.len() < target_len {
130 bases.push(b'N');
131 }
132 }
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 #[test]
140 fn test_reverse_complement() {
141 let mut seq = b"ACGT".to_vec();
142 reverse_complement(&mut seq);
143 assert_eq!(&seq, b"ACGT"); let mut seq2 = b"AACG".to_vec();
146 reverse_complement(&mut seq2);
147 assert_eq!(&seq2, b"CGTT");
148 }
149
150 #[test]
151 fn test_reverse_complement_with_n() {
152 let mut seq = b"ANGC".to_vec();
153 reverse_complement(&mut seq);
154 assert_eq!(&seq, b"GCNT");
155 }
156
157 #[test]
158 fn test_extract_r1_full_fragment() {
159 let fragment = b"ACGTACGTAC";
160 let bases = extract_read_bases(fragment, 10, b"ADAPTER", false);
161 assert_eq!(&bases, b"ACGTACGTAC");
162 }
163
164 #[test]
165 fn test_extract_r1_fragment_longer_than_read() {
166 let fragment = b"ACGTACGTAC";
167 let bases = extract_read_bases(fragment, 5, b"ADAPTER", false);
168 assert_eq!(&bases, b"ACGTA");
169 }
170
171 #[test]
172 fn test_extract_r1_short_fragment_with_adapter() {
173 let fragment = b"ACG";
174 let adapter = b"TTTTTT";
175 let bases = extract_read_bases(fragment, 8, adapter, false);
176 assert_eq!(&bases, b"ACGTTTTT");
177 }
178
179 #[test]
180 fn test_extract_r2_full_fragment() {
181 let fragment = b"ACGTACGTAC";
182 let bases = extract_read_bases(fragment, 10, b"ADAPTER", true);
183 assert_eq!(&bases, b"GTACGTACGT");
185 }
186
187 #[test]
188 fn test_extract_r2_fragment_longer_than_read() {
189 let fragment = b"ACGTACGTAC";
190 let bases = extract_read_bases(fragment, 5, b"ADAPTER", true);
191 assert_eq!(&bases, b"GTACG");
193 }
194
195 #[test]
196 fn test_extract_r2_short_fragment_with_adapter() {
197 let fragment = b"ACG";
198 let adapter = b"TTTTTT";
199 let bases = extract_read_bases(fragment, 8, adapter, true);
200 assert_eq!(&bases, b"CGTTTTTT"); assert_eq!(bases.len(), 8);
203 }
204
205 #[test]
206 fn test_extract_empty_fragment_all_adapter() {
207 let fragment = b"";
208 let adapter = b"AGATCGG";
209 let bases = extract_read_bases(fragment, 5, b"AGATCGG", false);
210 assert_eq!(&bases, b"AGATC");
211
212 let bases_r2 = extract_read_bases(fragment, 5, adapter, true);
213 assert_eq!(&bases_r2, b"AGATC");
214 }
215}