1use crate::bitmap;
51use crate::constants::MASK_SIZE;
52use crate::types::*;
53
54pub mod encoded;
55pub mod io;
56pub mod processing;
57
58use crate::bitmap::{set_bit, test_bit, toggle_bit};
59use rayon::prelude::*;
60
61pub use io::*;
62pub use processing::*;
63use wide::CmpEq;
64use wide::u8x32;
65
66#[must_use]
100pub const fn char_to_nuc(c: u8) -> u8 {
101 match c.to_ascii_uppercase() {
102 b'A' => 0,
103 b'C' => 1,
104 b'G' => 2,
105 b'T' | b'U' => 3,
106 _ => 4,
107 }
108}
109
110#[must_use]
112pub fn is_a(encoded_sequence: &[u8], n: usize) -> bool {
113 let bit_index = n * 2;
114 !(test_bit(encoded_sequence, bit_index) || test_bit(encoded_sequence, bit_index + 1))
115}
116
117#[must_use]
119pub fn is_c(encoded_sequence: &[u8], n: usize) -> bool {
120 let bit_index = n * 2;
121 !test_bit(encoded_sequence, bit_index) && test_bit(encoded_sequence, bit_index + 1)
122}
123
124#[must_use]
126pub fn is_g(encoded_sequence: &[u8], n: usize) -> bool {
127 let bit_index = n * 2;
128 test_bit(encoded_sequence, bit_index) && !test_bit(encoded_sequence, bit_index + 1)
129}
130
131#[must_use]
133pub fn is_t(encoded_sequence: &[u8], n: usize) -> bool {
134 let bit_index = n * 2;
135 test_bit(encoded_sequence, bit_index) && test_bit(encoded_sequence, bit_index + 1)
136}
137
138pub fn is_n(unknown_sequence: &[u8], n: usize) -> bool {
140 if n >= unknown_sequence.len() * 8 {
141 return false;
142 }
143 test_bit(unknown_sequence, n)
144}
145
146pub fn is_gc(encoded_sequence: &[u8], n: usize) -> bool {
148 let bit_index = n * 2;
149 test_bit(encoded_sequence, bit_index) != test_bit(encoded_sequence, bit_index + 1)
150}
151
152const fn uses_only_atg(trans_table: i32) -> bool {
154 matches!(trans_table, 6 | 10 | 14 | 15 | 16 | 22)
155}
156
157const fn gtg_not_start(trans_table: i32) -> bool {
159 matches!(trans_table, 1 | 3 | 12 | 22)
160}
161
162fn ttg_not_start(trans_table: i32) -> bool {
164 trans_table < 4 || trans_table == 9 || (21..25).contains(&trans_table)
165}
166
167pub fn is_start(encoded_sequence: &[u8], pos: usize, training: &Training) -> bool {
172 if is_atg(encoded_sequence, pos) {
174 return true;
175 }
176
177 if uses_only_atg(training.translation_table) {
179 return false;
180 }
181
182 if is_gtg(encoded_sequence, pos) && !gtg_not_start(training.translation_table) {
184 return true;
185 }
186
187 if is_ttg(encoded_sequence, pos) && !ttg_not_start(training.translation_table) {
189 return true;
190 }
191
192 false
193}
194
195pub fn is_atg(encoded_sequence: &[u8], pos: usize) -> bool {
197 is_a(encoded_sequence, pos)
198 && is_t(encoded_sequence, pos + 1)
199 && is_g(encoded_sequence, pos + 2)
200}
201
202pub fn is_gtg(encoded_sequence: &[u8], pos: usize) -> bool {
204 is_g(encoded_sequence, pos)
205 && is_t(encoded_sequence, pos + 1)
206 && is_g(encoded_sequence, pos + 2)
207}
208
209pub fn is_ttg(encoded_sequence: &[u8], pos: usize) -> bool {
211 is_t(encoded_sequence, pos)
212 && is_t(encoded_sequence, pos + 1)
213 && is_g(encoded_sequence, pos + 2)
214}
215
216const fn is_tag_stop(trans_table: i32) -> bool {
218 !matches!(trans_table, 6 | 15 | 16 | 22)
219}
220
221const fn is_tga_stop(trans_table: i32) -> bool {
223 !matches!(trans_table, 2..=5 | 9 | 10 | 13 | 14 | 21 | 25)
224}
225
226const fn is_taa_stop(trans_table: i32) -> bool {
228 !matches!(trans_table, 6 | 14)
229}
230
231#[inline]
236pub fn is_stop(encoded_sequence: &[u8], pos: usize, training: &Training) -> bool {
237 if is_t(encoded_sequence, pos) {
238 if is_a(encoded_sequence, pos + 1) {
239 if is_g(encoded_sequence, pos + 2) {
240 return is_tag_stop(training.translation_table);
241 }
242 if is_a(encoded_sequence, pos + 2) {
243 return is_taa_stop(training.translation_table);
244 }
245 } else if is_g(encoded_sequence, pos + 1) && is_a(encoded_sequence, pos + 2) {
246 return is_tga_stop(training.translation_table);
247 }
248 }
249
250 match training.translation_table {
252 2 => {
253 is_a(encoded_sequence, pos)
255 && is_g(encoded_sequence, pos + 1)
256 && (is_a(encoded_sequence, pos + 2) || is_g(encoded_sequence, pos + 2))
257 }
258 22 => {
259 is_t(encoded_sequence, pos)
261 && is_c(encoded_sequence, pos + 1)
262 && is_a(encoded_sequence, pos + 2)
263 }
264 23 => {
265 is_t(encoded_sequence, pos)
267 && is_t(encoded_sequence, pos + 1)
268 && is_a(encoded_sequence, pos + 2)
269 }
270 _ => false,
271 }
272}
273
274pub fn gc_content(encoded_sequence: &[u8], start: usize, end: usize) -> f64 {
279 if start > end {
280 return 0.0;
281 }
282
283 let (gc_count, total) = (start..=end)
284 .map(|i| {
285 if is_g(encoded_sequence, i) || is_c(encoded_sequence, i) {
286 (1, 1)
287 } else {
288 (0, 1)
289 }
290 })
291 .fold((0, 0), |(gc, tot), (g, t)| (gc + g, tot + t));
292
293 if total == 0 {
294 0.0
295 } else {
296 f64::from(gc_count) / f64::from(total)
297 }
298}
299
300pub const fn reverse_strand_reading_frame(forward_frame: usize, sequence_length: usize) -> usize {
305 let frame_modulus = if sequence_length.is_multiple_of(3) {
306 3
307 } else {
308 sequence_length % 3
309 };
310 (frame_modulus - 1 - forward_frame) % 3
311}
312
313pub const fn find_max_reading_frame(
317 frame_0_value: i32,
318 frame_1_value: i32,
319 frame_2_value: i32,
320) -> usize {
321 if frame_0_value > frame_1_value {
322 if frame_0_value > frame_2_value { 0 } else { 2 }
323 } else if frame_1_value > frame_2_value {
324 1
325 } else {
326 2
327 }
328}
329
330pub fn create_reverse_complement_sequence(
335 forward_sequence: &[u8],
336 unknown_sequence: &[u8],
337 nucleotide_length: usize,
338) -> Vec<u8> {
339 let mut reverse_complement_encoded_sequence = vec![0; forward_sequence.len()];
340 let sequence_length = nucleotide_length * 2;
341
342 for i in 0..sequence_length {
343 if !test_bit(forward_sequence, i) {
344 let target_pos = if i % 2 == 0 {
345 sequence_length - i - 2
346 } else {
347 sequence_length - i
348 };
349 if target_pos < sequence_length {
350 set_bit(&mut reverse_complement_encoded_sequence, target_pos);
351 }
352 }
353 }
354
355 for i in 0..nucleotide_length {
356 if test_bit(unknown_sequence, i) && sequence_length >= 2 + i * 2 {
357 toggle_bit(
358 &mut reverse_complement_encoded_sequence,
359 sequence_length - 1 - i * 2,
360 );
361 toggle_bit(
362 &mut reverse_complement_encoded_sequence,
363 sequence_length - 2 - i * 2,
364 );
365 }
366 }
367 reverse_complement_encoded_sequence
368}
369
370#[inline]
372pub fn min_of_two_integers(first_value: i32, second_value: i32) -> i32 {
373 first_value.min(second_value)
374}
375
376#[must_use]
381pub fn calculate_kmer_index(kmer_length: usize, encoded_sequence: &[u8], position: usize) -> usize {
382 let mut kmer_index = 0;
383 for i in 0..(2 * kmer_length) {
384 let bit_pos = position * 2 + i;
385 kmer_index |= usize::from(test_bit(encoded_sequence, bit_pos)) << i;
386 }
387 kmer_index
388}
389
390pub fn calculate_background_mer_frequencies(
395 length: usize,
396 encoded_sequence: &[u8],
397 reverse_complement_encoded_sequence: &[u8],
398 sequence_length: usize,
399 bg: &mut [f64],
400) {
401 let mut size = 1usize;
402
403 for _i in 1..=length {
404 size *= 4;
405 }
406
407 let chunk_size = std::cmp::max(
409 1000,
410 (sequence_length - length + 1) / rayon::current_num_threads(),
411 );
412 let total_counts: Vec<i32> = (0..(sequence_length - length + 1))
413 .into_par_iter()
414 .chunks(chunk_size)
415 .map(|chunk| {
416 let mut local_counts = vec![0i32; size];
417
418 for i in chunk {
419 let seq_idx = calculate_kmer_index(length, encoded_sequence, i);
420 if seq_idx < size {
421 local_counts[seq_idx] += 1;
422 }
423
424 let rseq_idx = calculate_kmer_index(length, reverse_complement_encoded_sequence, i);
425 if rseq_idx < size {
426 local_counts[rseq_idx] += 1;
427 }
428 }
429
430 local_counts
431 })
432 .reduce(
433 || vec![0i32; size],
434 |mut acc, local_counts| {
435 for (i, &count) in local_counts.iter().enumerate() {
436 acc[i] += count;
437 }
438 acc
439 },
440 );
441
442 let glob = (sequence_length - length + 1) * 2;
443
444 bg.par_iter_mut()
445 .enumerate()
446 .take(size)
447 .for_each(|(i, bg_val)| {
448 *bg_val = f64::from(total_counts[i]) / (glob as f64);
449 });
450}
451
452pub fn mer_text(len: usize, bit_index: usize) -> String {
457 use crate::constants::NUCLEOTIDE_LETTERS;
458
459 if len == 0 {
460 return "None".to_string();
461 }
462
463 let mut result = String::with_capacity(len);
464 let index = bit_index;
465
466 for i in 0..len {
467 let val = (index & (1 << (2 * i))) | (index & (1 << (2 * i + 1)));
469 let val = val >> (i * 2);
470 let base_idx = val & 0b11; if base_idx < 4 {
473 result.push(NUCLEOTIDE_LETTERS[base_idx]);
474 } else {
475 result.push('N'); }
477 }
478
479 result
480}
481
482pub fn encode_sequence(
488 sequence: &[u8],
489 encoded_sequence: &mut [u8],
490 unknown_sequence: &mut [u8],
491 masks: &mut Vec<Mask>,
492 do_mask: bool,
493) -> Result<f64, OrphosError> {
494 let mut gc_count = 0;
495 let mut total_count = 0;
496 let mut mask_start: Option<usize> = None;
497
498 for (i, &byte) in sequence.iter().enumerate() {
499 if i * 2 + 1 >= encoded_sequence.len() * 8 {
500 break;
501 }
502
503 if do_mask {
505 if let Some(start) = mask_start {
506 if byte != b'N' && byte != b'n' {
507 if i - start >= MASK_SIZE {
508 masks.push(Mask {
509 begin: start,
510 end: i - 1,
511 });
512 }
513 mask_start = None;
514 }
515 } else if byte == b'N' || byte == b'n' {
516 mask_start = Some(i);
517 }
518 }
519
520 let bctr = i * 2;
522 match byte.to_ascii_uppercase() {
523 b'A' => {
524 total_count += 1;
525 }
526 b'C' => {
527 bitmap::set_bit(encoded_sequence, bctr + 1);
528 gc_count += 1;
529 total_count += 1;
530 }
531 b'G' => {
532 bitmap::set_bit(encoded_sequence, bctr);
533 gc_count += 1;
534 total_count += 1;
535 }
536 b'T' | b'U' => {
537 bitmap::set_bit(encoded_sequence, bctr);
538 bitmap::set_bit(encoded_sequence, bctr + 1);
539 total_count += 1;
540 }
541 _ => {
542 bitmap::set_bit(encoded_sequence, bctr + 1);
543 bitmap::set_bit(unknown_sequence, i);
544 total_count += 1;
545 }
546 }
547 }
548
549 if do_mask
551 && let Some(start) = mask_start
552 && sequence.len() - start >= MASK_SIZE
553 {
554 masks.push(Mask {
555 begin: start,
556 end: sequence.len() - 1,
557 });
558 }
559
560 let gc_content = if total_count > 0 {
561 f64::from(gc_count) / f64::from(total_count)
562 } else {
563 0.0
564 };
565
566 Ok(gc_content)
567}
568
569pub fn encode_sequence_simd_wide(
574 sequence: &[u8],
575 encoded_sequence: &mut [u8],
576 unknown_sequence: &mut [u8],
577) -> Result<f64, OrphosError> {
578 let mut gc_count = 0u32;
579 let mut total_count = 0u32;
580
581 use crate::constants::CHUNK_SIZE;
583 let chunks = sequence.len() / CHUNK_SIZE;
584
585 let a_upper = u8x32::splat(b'A');
587 let c_upper = u8x32::splat(b'C');
588 let g_upper = u8x32::splat(b'G');
589 let t_upper = u8x32::splat(b'T');
590 let u_upper = u8x32::splat(b'U');
591 let a_lower = u8x32::splat(b'a');
594 let c_lower = u8x32::splat(b'c');
595 let g_lower = u8x32::splat(b'g');
596 let t_lower = u8x32::splat(b't');
597 let u_lower = u8x32::splat(b'u');
598 for chunk_idx in 0..chunks {
601 let chunk_start = chunk_idx * CHUNK_SIZE;
602
603 let input_slice = &sequence[chunk_start..chunk_start + CHUNK_SIZE];
605
606 let mut input_array = [0u8; 32];
608 input_array.copy_from_slice(input_slice);
609 let input = u8x32::from(input_array);
610
611 let is_a = input.cmp_eq(a_upper) | input.cmp_eq(a_lower);
613 let is_c = input.cmp_eq(c_upper) | input.cmp_eq(c_lower);
614 let is_g = input.cmp_eq(g_upper) | input.cmp_eq(g_lower);
615 let is_t = input.cmp_eq(t_upper)
616 | input.cmp_eq(t_lower)
617 | input.cmp_eq(u_upper)
618 | input.cmp_eq(u_lower);
619 let gc_mask = is_g | is_c;
622 let valid_mask = is_a | is_c | is_g | is_t;
623
624 gc_count += gc_mask.move_mask().count_ones();
626 total_count += valid_mask.move_mask().count_ones();
627
628 let is_a_array: [u8; 32] = is_a.into();
631 let is_c_array: [u8; 32] = is_c.into();
632 let is_g_array: [u8; 32] = is_g.into();
633 let is_t_array: [u8; 32] = is_t.into();
634 for i in 0..CHUNK_SIZE {
637 let pos = chunk_start + i;
638 if pos >= sequence.len() || pos * 2 + 1 >= encoded_sequence.len() * 8 {
639 break;
640 }
641
642 let bit_pos = pos * 2;
643
644 if is_a_array[i] != 0 {
646 } else if is_c_array[i] != 0 {
648 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
650 } else if is_g_array[i] != 0 {
651 crate::bitmap::set_bit(encoded_sequence, bit_pos);
653 } else if is_t_array[i] != 0 {
654 crate::bitmap::set_bit(encoded_sequence, bit_pos);
656 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
657 } else {
658 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
659 crate::bitmap::set_bit(unknown_sequence, pos);
660 }
661 }
662 }
663
664 for (pos, byte) in sequence.iter().enumerate().skip(chunks * CHUNK_SIZE) {
666 if pos * 2 + 1 >= encoded_sequence.len() * 8 {
667 break;
668 }
669
670 let bit_pos = pos * 2;
672
673 match byte.to_ascii_uppercase() {
674 b'A' => {
675 total_count += 1;
676 }
677 b'C' => {
678 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
679 gc_count += 1;
680 total_count += 1;
681 }
682 b'G' => {
683 crate::bitmap::set_bit(encoded_sequence, bit_pos);
684 gc_count += 1;
685 total_count += 1;
686 }
687 b'T' | b'U' => {
688 crate::bitmap::set_bit(encoded_sequence, bit_pos);
689 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
690 total_count += 1;
691 }
692 _ => {
693 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
694 crate::bitmap::set_bit(unknown_sequence, pos);
695 total_count += 1;
696 }
697 }
698 }
699
700 let gc_content = if total_count > 0 {
701 gc_count as f64 / total_count as f64
702 } else {
703 0.0
704 };
705
706 Ok(gc_content)
707}
708
709pub fn encode_sequence_simd_wide_packed(
711 sequence: &[u8],
712 encoded_sequence: &mut [u8],
713 unknown_sequence: &mut [u8],
714) -> Result<f64, OrphosError> {
715 let mut gc_count = 0u32;
716 let mut total_count = 0u32;
717
718 use crate::constants::CHUNK_SIZE;
720 let chunks = sequence.len() / CHUNK_SIZE;
721
722 for chunk_idx in 0..chunks {
723 let chunk_start = chunk_idx * CHUNK_SIZE;
724
725 let input_slice = &sequence[chunk_start..chunk_start + CHUNK_SIZE];
727 let mut input_array = [0u8; 32];
728 input_array.copy_from_slice(input_slice);
729 let input = u8x32::from(input_array);
730
731 let a_upper = u8x32::splat(b'A');
733 let c_upper = u8x32::splat(b'C');
734 let g_upper = u8x32::splat(b'G');
735 let t_upper = u8x32::splat(b'T');
736 let u_upper = u8x32::splat(b'U');
737
738 let a_lower = u8x32::splat(b'a');
739 let c_lower = u8x32::splat(b'c');
740 let g_lower = u8x32::splat(b'g');
741 let t_lower = u8x32::splat(b't');
742 let u_lower = u8x32::splat(b'u');
743
744 let is_a = input.cmp_eq(a_upper) | input.cmp_eq(a_lower);
745 let is_c = input.cmp_eq(c_upper) | input.cmp_eq(c_lower);
746 let is_g = input.cmp_eq(g_upper) | input.cmp_eq(g_lower);
747 let is_t = input.cmp_eq(t_upper)
748 | input.cmp_eq(t_lower)
749 | input.cmp_eq(u_upper)
750 | input.cmp_eq(u_lower);
751
752 let gc_mask = is_g | is_c;
753 let valid_mask = is_a | is_c | is_g | is_t;
754
755 gc_count += gc_mask.move_mask().count_ones();
756 total_count += valid_mask.move_mask().count_ones();
757
758 let is_c_mask: i32 = is_c.move_mask();
761 let is_g_mask: i32 = is_g.move_mask();
762 let is_t_mask: i32 = is_t.move_mask();
763 let unknown_mask: i32 = !valid_mask.move_mask();
764
765 for i in 0..CHUNK_SIZE {
767 let pos = chunk_start + i;
768 if pos >= sequence.len() || pos * 2 + 1 >= encoded_sequence.len() * 8 {
769 break;
770 }
771
772 let bit_pos = pos * 2;
773 let bit_flag = 1i32 << i;
774
775 if (is_c_mask & bit_flag) != 0 {
777 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
779 } else if (is_g_mask & bit_flag) != 0 {
780 crate::bitmap::set_bit(encoded_sequence, bit_pos);
782 } else if (is_t_mask & bit_flag) != 0 {
783 crate::bitmap::set_bit(encoded_sequence, bit_pos);
785 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
786 } else if (unknown_mask & bit_flag) != 0 {
787 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
788 crate::bitmap::set_bit(unknown_sequence, pos);
789 }
790 }
792 }
793
794 for (pos, byte) in sequence.iter().enumerate().skip(chunks * CHUNK_SIZE) {
797 if pos * 2 + 1 >= encoded_sequence.len() * 8 {
798 break;
799 }
800
801 let bit_pos = pos * 2;
803
804 match byte.to_ascii_uppercase() {
805 b'A' => {
806 total_count += 1;
807 }
808 b'C' => {
809 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
810 gc_count += 1;
811 total_count += 1;
812 }
813 b'G' => {
814 crate::bitmap::set_bit(encoded_sequence, bit_pos);
815 gc_count += 1;
816 total_count += 1;
817 }
818 b'T' | b'U' => {
819 crate::bitmap::set_bit(encoded_sequence, bit_pos);
820 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
821 total_count += 1;
822 }
823 _ => {
824 crate::bitmap::set_bit(encoded_sequence, bit_pos + 1);
825 crate::bitmap::set_bit(unknown_sequence, pos);
826 total_count += 1;
827 }
828 }
829 }
830
831 let gc_content = if total_count > 0 {
832 gc_count as f64 / total_count as f64
833 } else {
834 0.0
835 };
836
837 Ok(gc_content)
838}
839
840#[cfg(test)]
841mod tests {
842 use super::*;
843 use crate::types::Training;
844
845 #[test]
846 fn test_char_to_nuc_valid_bases() {
847 assert_eq!(char_to_nuc(b'A'), 0);
848 assert_eq!(char_to_nuc(b'a'), 0);
849 assert_eq!(char_to_nuc(b'C'), 1);
850 assert_eq!(char_to_nuc(b'c'), 1);
851 assert_eq!(char_to_nuc(b'G'), 2);
852 assert_eq!(char_to_nuc(b'g'), 2);
853 assert_eq!(char_to_nuc(b'T'), 3);
854 assert_eq!(char_to_nuc(b't'), 3);
855 assert_eq!(char_to_nuc(b'U'), 3);
856 assert_eq!(char_to_nuc(b'u'), 3);
857 }
858
859 #[test]
860 fn test_char_to_nuc_invalid_bases() {
861 assert_eq!(char_to_nuc(b'N'), 4);
862 assert_eq!(char_to_nuc(b'n'), 4);
863 assert_eq!(char_to_nuc(b'X'), 4);
864 assert_eq!(char_to_nuc(b'-'), 4);
865 assert_eq!(char_to_nuc(b' '), 4);
866 }
867
868 #[test]
869 fn test_nucleotide_check_functions() {
870 let mut seq = vec![0u8; 10];
871
872 crate::bitmap::set_bit(&mut seq, 2); crate::bitmap::set_bit(&mut seq, 3);
876 crate::bitmap::set_bit(&mut seq, 5); crate::bitmap::set_bit(&mut seq, 6); assert!(is_a(&seq, 0));
880 assert!(!is_a(&seq, 1));
881 assert!(!is_a(&seq, 2));
882 assert!(!is_a(&seq, 3));
883
884 assert!(!is_t(&seq, 0));
885 assert!(is_t(&seq, 1));
886 assert!(!is_t(&seq, 2));
887 assert!(!is_t(&seq, 3));
888
889 assert!(!is_c(&seq, 0));
890 assert!(!is_c(&seq, 1));
891 assert!(is_c(&seq, 2));
892 assert!(!is_c(&seq, 3));
893
894 assert!(!is_g(&seq, 0));
895 assert!(!is_g(&seq, 1));
896 assert!(!is_g(&seq, 2));
897 assert!(is_g(&seq, 3));
898 }
899
900 #[test]
901 fn test_start_codon_functions() {
902 let mut seq = vec![0u8; 20];
903
904 crate::bitmap::set_bit(&mut seq, 2); crate::bitmap::set_bit(&mut seq, 3);
907 crate::bitmap::set_bit(&mut seq, 4); assert!(is_atg(&seq, 0));
910 assert!(!is_gtg(&seq, 0));
911 assert!(!is_ttg(&seq, 0));
912
913 crate::bitmap::set_bit(&mut seq, 6); crate::bitmap::set_bit(&mut seq, 8); crate::bitmap::set_bit(&mut seq, 9);
917 crate::bitmap::set_bit(&mut seq, 10); assert!(!is_atg(&seq, 3));
920 assert!(is_gtg(&seq, 3));
921 assert!(!is_ttg(&seq, 3));
922 }
923
924 #[test]
925 fn test_stop_codon_functions() {
926 let mut seq = vec![0u8; 20];
927 let training = Training::default();
928
929 crate::bitmap::set_bit(&mut seq, 0); crate::bitmap::set_bit(&mut seq, 1);
932 assert!(is_stop(&seq, 0, &training));
935
936 crate::bitmap::set_bit(&mut seq, 6); crate::bitmap::set_bit(&mut seq, 7);
939 crate::bitmap::set_bit(&mut seq, 10); assert!(is_stop(&seq, 3, &training));
942 }
943
944 #[test]
945 fn test_gc_content_calculation() {
946 let mut seq = vec![0u8; 20];
947
948 crate::bitmap::set_bit(&mut seq, 2); crate::bitmap::set_bit(&mut seq, 3);
951 crate::bitmap::set_bit(&mut seq, 5); crate::bitmap::set_bit(&mut seq, 6); let gc = gc_content(&seq, 0, 3);
955 assert!((gc - 0.5).abs() < 0.001); }
957
958 #[test]
959 fn test_reverse_strand_reading_frame() {
960 assert_eq!(reverse_strand_reading_frame(0, 9), 2); assert_eq!(reverse_strand_reading_frame(1, 9), 1); assert_eq!(reverse_strand_reading_frame(2, 9), 0); assert_eq!(reverse_strand_reading_frame(0, 10), 0); assert_eq!(reverse_strand_reading_frame(0, 8), 1); assert_eq!(reverse_strand_reading_frame(1, 8), 0); }
972
973 #[test]
974 fn test_find_max_reading_frame() {
975 assert_eq!(find_max_reading_frame(10, 5, 3), 0);
976 assert_eq!(find_max_reading_frame(5, 10, 3), 1);
977 assert_eq!(find_max_reading_frame(5, 3, 10), 2);
978 assert_eq!(find_max_reading_frame(5, 5, 3), 1); }
980
981 #[test]
982 fn test_min_of_two_integers() {
983 assert_eq!(min_of_two_integers(5, 3), 3);
984 assert_eq!(min_of_two_integers(3, 5), 3);
985 assert_eq!(min_of_two_integers(-1, 5), -1);
986 assert_eq!(min_of_two_integers(5, 5), 5);
987 }
988
989 #[test]
990 fn test_calculate_kmer_index() {
991 let mut seq = vec![0u8; 20];
992
993 crate::bitmap::set_bit(&mut seq, 1); let idx = calculate_kmer_index(2, &seq, 0);
997 assert_eq!(idx, 2); }
999
1000 #[test]
1001 fn test_mer_text() {
1002 assert_eq!(mer_text(0, 0), "None");
1003 assert_eq!(mer_text(2, 0), "AA");
1004 assert_eq!(mer_text(2, 1), "GA");
1005 assert_eq!(mer_text(2, 2), "CA");
1006 assert_eq!(mer_text(2, 3), "TA");
1007 }
1008
1009 #[test]
1010 fn test_encode_sequence_basic() {
1011 let sequence = b"ATCG";
1012 let mut encoded = vec![0u8; 10];
1013 let mut unknown_sequence = vec![0u8; 10];
1014 let mut masks = Vec::new();
1015
1016 let gc = encode_sequence(
1017 sequence,
1018 &mut encoded,
1019 &mut unknown_sequence,
1020 &mut masks,
1021 false,
1022 )
1023 .unwrap();
1024 assert!((gc - 0.5).abs() < 0.001); }
1026
1027 #[test]
1028 fn test_encode_sequence_with_n() {
1029 let sequence = b"ATNG";
1030 let mut encoded = vec![0u8; 10];
1031 let mut unknown_sequence = vec![0u8; 10];
1032 let mut masks = Vec::new();
1033
1034 let gc = encode_sequence(
1035 sequence,
1036 &mut encoded,
1037 &mut unknown_sequence,
1038 &mut masks,
1039 false,
1040 )
1041 .unwrap();
1042 assert!((gc - 0.25).abs() < 0.001); assert!(crate::bitmap::test_bit(&unknown_sequence, 2)); }
1045
1046 #[test]
1047 fn test_encode_sequence_masking() {
1048 let mut sequence = b"ATC".to_vec();
1050 sequence.extend(vec![b'N'; 52]); sequence.extend(b"GCG");
1052
1053 let mut encoded = vec![0u8; 60];
1054 let mut unknown_sequence = vec![0u8; 60];
1055 let mut masks = Vec::new();
1056
1057 let _gc = encode_sequence(
1058 &sequence,
1059 &mut encoded,
1060 &mut unknown_sequence,
1061 &mut masks,
1062 true,
1063 )
1064 .unwrap();
1065 assert!(!masks.is_empty()); assert_eq!(masks.len(), 1);
1067 assert_eq!(masks[0].begin, 3); assert_eq!(masks[0].end, 54); }
1070
1071 #[test]
1072 fn test_is_gc() {
1073 let mut seq = vec![0u8; 10];
1074
1075 assert!(!is_gc(&seq, 0));
1076
1077 crate::bitmap::set_bit(&mut seq, 1);
1078 assert!(is_gc(&seq, 0));
1079
1080 let mut seq2 = vec![0u8; 10];
1081 crate::bitmap::set_bit(&mut seq2, 0);
1082 assert!(is_gc(&seq2, 0));
1083
1084 let mut seq3 = vec![0u8; 10];
1085 crate::bitmap::set_bit(&mut seq3, 0);
1086 crate::bitmap::set_bit(&mut seq3, 1);
1087 assert!(!is_gc(&seq3, 0));
1088 }
1089
1090 #[test]
1091 fn test_is_n() {
1092 let mut unknown_sequence = vec![0u8; 10];
1093
1094 assert!(!is_n(&unknown_sequence, 0));
1095 assert!(!is_n(&unknown_sequence, 100));
1096
1097 crate::bitmap::set_bit(&mut unknown_sequence, 5);
1098 assert!(is_n(&unknown_sequence, 5));
1099 }
1100
1101 #[test]
1102 fn test_calculate_background_mer_frequencies() {
1103 let seq = vec![0u8; 20]; let rseq = vec![0u8; 20]; let mut bg = vec![0.0; 16]; calculate_background_mer_frequencies(2, &seq, &rseq, 10, &mut bg);
1108
1109 assert!(bg[0] > 0.5); }
1112
1113 #[test]
1114 fn test_rcom_seq() {
1115 let seq = vec![0u8; 10];
1116 let unknown_sequence = vec![0u8; 10];
1117
1118 let rseq = create_reverse_complement_sequence(&seq, &unknown_sequence, 2);
1122
1123 assert!(is_t(&rseq, 1));
1124 }
1125
1126 #[test]
1127 fn test_translation_table_functions() {
1128 assert!(uses_only_atg(6));
1129 assert!(uses_only_atg(10));
1130 assert!(!uses_only_atg(11));
1131
1132 assert!(gtg_not_start(1));
1133 assert!(gtg_not_start(22));
1134 assert!(!gtg_not_start(11));
1135
1136 assert!(ttg_not_start(1));
1137 assert!(ttg_not_start(9));
1138 assert!(!ttg_not_start(11));
1139 }
1140
1141 #[test]
1142 fn test_start_codon_with_training() {
1143 let mut training = Training {
1144 translation_table: 11,
1145 ..Training::default()
1146 };
1147
1148 let mut seq = vec![0u8; 20];
1149
1150 crate::bitmap::set_bit(&mut seq, 2); crate::bitmap::set_bit(&mut seq, 3);
1153 crate::bitmap::set_bit(&mut seq, 4); assert!(is_start(&seq, 0, &training));
1156
1157 training.translation_table = 6;
1159 assert!(is_start(&seq, 0, &training)); let mut seq2 = vec![0u8; 20];
1163 crate::bitmap::set_bit(&mut seq2, 0); crate::bitmap::set_bit(&mut seq2, 2); crate::bitmap::set_bit(&mut seq2, 3);
1166 crate::bitmap::set_bit(&mut seq2, 4); assert!(!is_start(&seq2, 0, &training)); }
1170
1171 #[test]
1172 fn test_stop_codon_special_tables() {
1173 let mut training = Training::default();
1174 let mut seq = vec![0u8; 20];
1175
1176 training.translation_table = 2;
1178 crate::bitmap::set_bit(&mut seq, 2); assert!(is_stop(&seq, 0, &training));
1182
1183 training.translation_table = 22;
1185 let mut seq2 = vec![0u8; 20];
1186 crate::bitmap::set_bit(&mut seq2, 0); crate::bitmap::set_bit(&mut seq2, 1);
1189 crate::bitmap::set_bit(&mut seq2, 3); assert!(is_stop(&seq2, 0, &training));
1192 }
1193}