1use crate::haplotype::Haplotype;
17
18#[must_use]
22fn complement(base: u8) -> u8 {
23 let upper = match base.to_ascii_uppercase() {
24 b'A' => b'T',
25 b'T' => b'A',
26 b'C' => b'G',
27 b'G' => b'C',
28 _ => b'N',
29 };
30 if base.is_ascii_lowercase() { upper.to_ascii_lowercase() } else { upper }
31}
32
33pub fn reverse_complement(seq: &mut [u8]) {
35 seq.reverse();
36 for base in seq.iter_mut() {
37 *base = complement(*base);
38 }
39}
40
41#[derive(Debug)]
46pub struct Fragment {
47 pub bases: Vec<u8>,
49 pub ref_positions: Vec<u32>,
51 pub ref_start: u32,
53 pub is_forward: bool,
55 pub haplotype_index: usize,
57}
58
59#[must_use]
71pub fn extract_fragment(
72 haplotype: &Haplotype,
73 reference: &[u8],
74 ref_start: u32,
75 fragment_len: usize,
76 is_forward: bool,
77) -> Fragment {
78 let (bases, ref_positions) = haplotype.extract_fragment(reference, ref_start, fragment_len);
79
80 Fragment {
81 bases,
82 ref_positions,
83 ref_start,
84 is_forward,
85 haplotype_index: haplotype.allele_index(),
86 }
87}
88
89#[must_use]
105pub fn extract_read_bases(
106 fragment_bases: &[u8],
107 read_length: usize,
108 adapter: &[u8],
109 is_r2: bool,
110) -> Vec<u8> {
111 let frag_len = fragment_bases.len();
112
113 if is_r2 {
114 let mut bases = Vec::with_capacity(read_length);
117 let take = frag_len.min(read_length);
118 let start = frag_len.saturating_sub(take);
119 bases.extend_from_slice(&fragment_bases[start..]);
120 reverse_complement(&mut bases);
121 append_adapter_and_pad(&mut bases, read_length, adapter);
122 bases
123 } else {
124 let mut bases = Vec::with_capacity(read_length);
126 let take = frag_len.min(read_length);
127 bases.extend_from_slice(&fragment_bases[..take]);
128 append_adapter_and_pad(&mut bases, read_length, adapter);
129 bases
130 }
131}
132
133#[must_use]
140pub fn lowercase_fraction(bases: &[u8]) -> f64 {
141 if bases.is_empty() {
142 return 0.0;
143 }
144 let lower = bases.iter().filter(|&&b| b.is_ascii_lowercase()).count();
149 lower as f64 / bases.len() as f64
150}
151
152pub fn uppercase_in_place(bases: &mut [u8]) {
154 for b in bases.iter_mut() {
155 b.make_ascii_uppercase();
156 }
157}
158
159fn append_adapter_and_pad(bases: &mut Vec<u8>, target_len: usize, adapter: &[u8]) {
161 if bases.len() < target_len {
162 let need = target_len - bases.len();
163 let adapter_take = need.min(adapter.len());
164 bases.extend_from_slice(&adapter[..adapter_take]);
165 while bases.len() < target_len {
167 bases.push(b'N');
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn test_reverse_complement() {
178 let mut seq = b"ACGT".to_vec();
179 reverse_complement(&mut seq);
180 assert_eq!(&seq, b"ACGT"); let mut seq2 = b"AACG".to_vec();
183 reverse_complement(&mut seq2);
184 assert_eq!(&seq2, b"CGTT");
185 }
186
187 #[test]
188 fn test_reverse_complement_with_n() {
189 let mut seq = b"ANGC".to_vec();
190 reverse_complement(&mut seq);
191 assert_eq!(&seq, b"GCNT");
192 }
193
194 #[test]
195 fn test_reverse_complement_preserves_lowercase() {
196 let mut seq = b"aCgT".to_vec();
199 reverse_complement(&mut seq);
200 assert_eq!(&seq, b"AcGt");
201 }
202
203 #[test]
204 fn test_extract_r1_full_fragment() {
205 let fragment = b"ACGTACGTAC";
206 let bases = extract_read_bases(fragment, 10, b"ADAPTER", false);
207 assert_eq!(&bases, b"ACGTACGTAC");
208 }
209
210 #[test]
211 fn test_extract_r1_fragment_longer_than_read() {
212 let fragment = b"ACGTACGTAC";
213 let bases = extract_read_bases(fragment, 5, b"ADAPTER", false);
214 assert_eq!(&bases, b"ACGTA");
215 }
216
217 #[test]
218 fn test_extract_r1_short_fragment_with_adapter() {
219 let fragment = b"ACG";
220 let adapter = b"TTTTTT";
221 let bases = extract_read_bases(fragment, 8, adapter, false);
222 assert_eq!(&bases, b"ACGTTTTT");
223 }
224
225 #[test]
226 fn test_extract_r2_full_fragment() {
227 let fragment = b"ACGTACGTAC";
228 let bases = extract_read_bases(fragment, 10, b"ADAPTER", true);
229 assert_eq!(&bases, b"GTACGTACGT");
231 }
232
233 #[test]
234 fn test_extract_r2_fragment_longer_than_read() {
235 let fragment = b"ACGTACGTAC";
236 let bases = extract_read_bases(fragment, 5, b"ADAPTER", true);
237 assert_eq!(&bases, b"GTACG");
239 }
240
241 #[test]
242 fn test_extract_r2_short_fragment_with_adapter() {
243 let fragment = b"ACG";
244 let adapter = b"TTTTTT";
245 let bases = extract_read_bases(fragment, 8, adapter, true);
246 assert_eq!(&bases, b"CGTTTTTT"); assert_eq!(bases.len(), 8);
249 }
250
251 #[test]
252 fn test_lowercase_fraction_empty() {
253 assert!(lowercase_fraction(b"").abs() < 1e-12);
254 }
255
256 #[test]
257 fn test_lowercase_fraction_all_upper() {
258 assert!(lowercase_fraction(b"ACGTACGT").abs() < 1e-12);
259 }
260
261 #[test]
262 fn test_lowercase_fraction_all_lower() {
263 assert!((lowercase_fraction(b"acgtacgt") - 1.0).abs() < 1e-12);
264 }
265
266 #[test]
267 fn test_lowercase_fraction_mixed() {
268 let f = lowercase_fraction(b"ACaGcTAtCA");
270 assert!((f - 0.3).abs() < 1e-10, "expected 0.3, got {f}");
271 }
272
273 #[test]
274 fn test_lowercase_fraction_ignores_non_letters() {
275 let f = lowercase_fraction(b"A-Na");
278 assert!((f - 0.25).abs() < 1e-10, "expected 0.25, got {f}");
279 }
280
281 #[test]
282 fn test_uppercase_in_place() {
283 let mut bases = b"aCgTnN".to_vec();
284 uppercase_in_place(&mut bases);
285 assert_eq!(&bases, b"ACGTNN");
286 }
287
288 #[test]
289 fn test_uppercase_in_place_empty() {
290 let mut bases: Vec<u8> = Vec::new();
291 uppercase_in_place(&mut bases);
292 assert!(bases.is_empty());
293 }
294
295 #[test]
296 fn test_extract_empty_fragment_all_adapter() {
297 let fragment = b"";
298 let adapter = b"AGATCGG";
299 let bases = extract_read_bases(fragment, 5, b"AGATCGG", false);
300 assert_eq!(&bases, b"AGATC");
301
302 let bases_r2 = extract_read_bases(fragment, 5, adapter, true);
303 assert_eq!(&bases_r2, b"AGATC");
304 }
305}