1use cyanea_core::{CyaneaError, Result};
8
9fn base_index(b: u8) -> Option<usize> {
14 match b.to_ascii_uppercase() {
15 b'A' => Some(0),
16 b'C' => Some(1),
17 b'G' => Some(2),
18 b'T' | b'U' => Some(3),
19 _ => None,
20 }
21}
22
23fn codon_index(codon: &[u8]) -> Option<usize> {
25 if codon.len() != 3 {
26 return None;
27 }
28 let b1 = base_index(codon[0])?;
29 let b2 = base_index(codon[1])?;
30 let b3 = base_index(codon[2])?;
31 Some(b1 * 16 + b2 * 4 + b3)
32}
33
34fn index_to_codon(idx: usize) -> [u8; 3] {
36 const BASES: [u8; 4] = [b'A', b'C', b'G', b'T'];
37 [
38 BASES[idx >> 4],
39 BASES[(idx >> 2) & 3],
40 BASES[idx & 3],
41 ]
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub enum GeneticCodeId {
54 Standard = 1,
55 VertebrateMitochondrial = 2,
56 YeastMitochondrial = 3,
57 MycoplasmaSpiroplasma = 4,
58 InvertebrateMitochondrial = 5,
59 CiliateNuclear = 6,
60 BacterialPlastid = 11,
61}
62
63const TABLE1_AA: [u8; 64] = [
76 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'R', b'S', b'R', b'S',
77 b'I', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
78 b'R', b'R', b'R', b'R', b'L', b'L', b'L', b'L', b'E', b'D', b'E', b'D',
79 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
80 b'*', b'Y', b'*', b'Y', b'S', b'S', b'S', b'S', b'*', b'C', b'W', b'C',
81 b'L', b'F', b'L', b'F',
82];
83
84const TABLE1_STARTS: [bool; 64] = {
85 let mut s = [false; 64];
86 s[14] = true;
88 s
89};
90
91const TABLE2_AA: [u8; 64] = [
94 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'*', b'S', b'*', b'S',
95 b'M', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
96 b'R', b'R', b'R', b'R', b'L', b'L', b'L', b'L', b'E', b'D', b'E', b'D',
97 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
98 b'*', b'Y', b'*', b'Y', b'S', b'S', b'S', b'S', b'W', b'C', b'W', b'C',
99 b'L', b'F', b'L', b'F',
100];
101
102const TABLE2_STARTS: [bool; 64] = {
103 let mut s = [false; 64];
104 s[14] = true; s[12] = true; s[13] = true; s[15] = true; s[46] = true; s
110};
111
112const TABLE3_AA: [u8; 64] = [
115 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'R', b'S', b'R', b'S',
116 b'M', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
117 b'R', b'R', b'R', b'R', b'T', b'L', b'T', b'L', b'E', b'D', b'E', b'D',
118 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
119 b'*', b'Y', b'*', b'Y', b'S', b'S', b'S', b'S', b'W', b'C', b'W', b'C',
120 b'L', b'F', b'L', b'F',
121];
122
123const TABLE3_STARTS: [bool; 64] = {
124 let mut s = [false; 64];
125 s[14] = true; s[12] = true; s
128};
129
130const TABLE4_AA: [u8; 64] = [
133 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'R', b'S', b'R', b'S',
134 b'I', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
135 b'R', b'R', b'R', b'R', b'L', b'L', b'L', b'L', b'E', b'D', b'E', b'D',
136 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
137 b'*', b'Y', b'*', b'Y', b'S', b'S', b'S', b'S', b'W', b'C', b'W', b'C',
138 b'L', b'F', b'L', b'F',
139];
140
141const TABLE4_STARTS: [bool; 64] = {
142 let mut s = [false; 64];
143 s[14] = true; s[62] = true; s[46] = true; s
147};
148
149const TABLE5_AA: [u8; 64] = [
152 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'S', b'S', b'S', b'S',
153 b'M', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
154 b'R', b'R', b'R', b'R', b'L', b'L', b'L', b'L', b'E', b'D', b'E', b'D',
155 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
156 b'*', b'Y', b'*', b'Y', b'S', b'S', b'S', b'S', b'W', b'C', b'W', b'C',
157 b'L', b'F', b'L', b'F',
158];
159
160const TABLE5_STARTS: [bool; 64] = {
161 let mut s = [false; 64];
162 s[14] = true; s[15] = true; s[46] = true; s
166};
167
168const TABLE6_AA: [u8; 64] = [
171 b'K', b'N', b'K', b'N', b'T', b'T', b'T', b'T', b'R', b'S', b'R', b'S',
172 b'I', b'I', b'M', b'I', b'Q', b'H', b'Q', b'H', b'P', b'P', b'P', b'P',
173 b'R', b'R', b'R', b'R', b'L', b'L', b'L', b'L', b'E', b'D', b'E', b'D',
174 b'A', b'A', b'A', b'A', b'G', b'G', b'G', b'G', b'V', b'V', b'V', b'V',
175 b'Q', b'Y', b'Q', b'Y', b'S', b'S', b'S', b'S', b'*', b'C', b'W', b'C',
176 b'L', b'F', b'L', b'F',
177];
178
179const TABLE6_STARTS: [bool; 64] = {
180 let mut s = [false; 64];
181 s[14] = true; s
183};
184
185const TABLE11_AA: [u8; 64] = TABLE1_AA;
188
189const TABLE11_STARTS: [bool; 64] = {
190 let mut s = [false; 64];
191 s[14] = true; s[62] = true; s[46] = true; s[13] = true; s[15] = true; s[28] = true; s
198};
199
200#[derive(Debug, Clone)]
209pub struct GeneticCode {
210 id: GeneticCodeId,
211 name: &'static str,
212 table: [u8; 64],
213 starts: [bool; 64],
214}
215
216impl GeneticCode {
217 pub fn from_id(id: GeneticCodeId) -> Self {
219 match id {
220 GeneticCodeId::Standard => Self {
221 id,
222 name: "Standard",
223 table: TABLE1_AA,
224 starts: TABLE1_STARTS,
225 },
226 GeneticCodeId::VertebrateMitochondrial => Self {
227 id,
228 name: "Vertebrate Mitochondrial",
229 table: TABLE2_AA,
230 starts: TABLE2_STARTS,
231 },
232 GeneticCodeId::YeastMitochondrial => Self {
233 id,
234 name: "Yeast Mitochondrial",
235 table: TABLE3_AA,
236 starts: TABLE3_STARTS,
237 },
238 GeneticCodeId::MycoplasmaSpiroplasma => Self {
239 id,
240 name: "Mycoplasma/Spiroplasma",
241 table: TABLE4_AA,
242 starts: TABLE4_STARTS,
243 },
244 GeneticCodeId::InvertebrateMitochondrial => Self {
245 id,
246 name: "Invertebrate Mitochondrial",
247 table: TABLE5_AA,
248 starts: TABLE5_STARTS,
249 },
250 GeneticCodeId::CiliateNuclear => Self {
251 id,
252 name: "Ciliate Nuclear",
253 table: TABLE6_AA,
254 starts: TABLE6_STARTS,
255 },
256 GeneticCodeId::BacterialPlastid => Self {
257 id,
258 name: "Bacterial/Plant Plastid",
259 table: TABLE11_AA,
260 starts: TABLE11_STARTS,
261 },
262 }
263 }
264
265 pub fn standard() -> Self {
267 Self::from_id(GeneticCodeId::Standard)
268 }
269
270 pub fn id(&self) -> GeneticCodeId {
272 self.id
273 }
274
275 pub fn name(&self) -> &str {
277 self.name
278 }
279
280 pub fn translate_codon(&self, codon: &[u8]) -> Option<u8> {
285 let idx = codon_index(codon)?;
286 let aa = self.table[idx];
287 if aa == b'*' {
288 None
289 } else {
290 Some(aa)
291 }
292 }
293
294 pub fn translate_sequence(&self, seq: &[u8]) -> Vec<u8> {
298 let mut protein = Vec::with_capacity(seq.len() / 3);
299 for codon in seq.chunks_exact(3) {
300 match self.translate_codon(codon) {
301 Some(aa) => protein.push(aa),
302 None => break,
303 }
304 }
305 protein
306 }
307
308 pub fn translate_sequence_full(&self, seq: &[u8]) -> Vec<u8> {
313 let mut protein = Vec::with_capacity(seq.len() / 3);
314 for codon in seq.chunks_exact(3) {
315 if let Some(idx) = codon_index(codon) {
316 protein.push(self.table[idx]);
317 }
318 }
319 protein
320 }
321
322 pub fn is_start(&self, codon: &[u8]) -> bool {
324 codon_index(codon).map_or(false, |idx| self.starts[idx])
325 }
326
327 pub fn is_stop(&self, codon: &[u8]) -> bool {
329 codon_index(codon).map_or(false, |idx| self.table[idx] == b'*')
330 }
331
332 pub fn stop_codons(&self) -> Vec<[u8; 3]> {
334 (0..64)
335 .filter(|&i| self.table[i] == b'*')
336 .map(index_to_codon)
337 .collect()
338 }
339
340 pub fn start_codons(&self) -> Vec<[u8; 3]> {
342 (0..64)
343 .filter(|&i| self.starts[i])
344 .map(index_to_codon)
345 .collect()
346 }
347
348 fn aa_at(&self, idx: usize) -> u8 {
350 self.table[idx]
351 }
352}
353
354pub fn translate_codon(codon: &[u8]) -> Option<u8> {
363 GeneticCode::standard().translate_codon(codon)
364}
365
366pub fn translate_sequence(seq: &[u8]) -> Vec<u8> {
370 GeneticCode::standard().translate_sequence(seq)
371}
372
373#[derive(Debug, Clone)]
381pub struct CodonUsage {
382 counts: [u64; 64],
383 total: u64,
384}
385
386impl CodonUsage {
387 pub fn from_sequence(seq: &[u8]) -> Self {
392 let mut counts = [0u64; 64];
393 let mut total = 0u64;
394 for codon in seq.chunks_exact(3) {
395 if let Some(idx) = codon_index(codon) {
396 counts[idx] += 1;
397 total += 1;
398 }
399 }
400 CodonUsage { counts, total }
401 }
402
403 pub fn merge(&mut self, other: &CodonUsage) {
405 for i in 0..64 {
406 self.counts[i] += other.counts[i];
407 }
408 self.total += other.total;
409 }
410
411 pub fn count(&self, codon: &[u8]) -> u64 {
413 codon_index(codon).map_or(0, |idx| self.counts[idx])
414 }
415
416 pub fn frequency(&self, codon: &[u8]) -> f64 {
418 if self.total == 0 {
419 return 0.0;
420 }
421 self.count(codon) as f64 / self.total as f64
422 }
423
424 pub fn rscu(&self, codon: &[u8], code: &GeneticCode) -> f64 {
429 let idx = match codon_index(codon) {
430 Some(i) => i,
431 None => return 0.0,
432 };
433 let aa = code.aa_at(idx);
434 if aa == b'*' {
435 return 0.0;
436 }
437
438 let synonymous: Vec<usize> = (0..64)
440 .filter(|&i| code.aa_at(i) == aa)
441 .collect();
442 let n_syn = synonymous.len() as f64;
443 let total_syn: u64 = synonymous.iter().map(|&i| self.counts[i]).sum();
444
445 if total_syn == 0 {
446 return 0.0;
447 }
448
449 let observed = self.counts[idx] as f64;
450 let expected = total_syn as f64 / n_syn;
451 observed / expected
452 }
453
454 pub fn relative_adaptiveness(&self, code: &GeneticCode) -> [f64; 64] {
459 let mut w = [0.0f64; 64];
460
461 for aa in b"ACDEFGHIKLMNPQRSTVWY".iter() {
463 let synonymous: Vec<usize> = (0..64)
464 .filter(|&i| code.aa_at(i) == *aa)
465 .collect();
466 if synonymous.is_empty() {
467 continue;
468 }
469
470 let rscu_vals: Vec<f64> = synonymous
471 .iter()
472 .map(|&i| {
473 let total_syn: u64 = synonymous.iter().map(|&j| self.counts[j]).sum();
474 if total_syn == 0 {
475 return 0.0;
476 }
477 let n_syn = synonymous.len() as f64;
478 self.counts[i] as f64 / (total_syn as f64 / n_syn)
479 })
480 .collect();
481
482 let max_rscu = rscu_vals
483 .iter()
484 .cloned()
485 .fold(0.0f64, f64::max);
486
487 if max_rscu > 0.0 {
488 for (k, &idx) in synonymous.iter().enumerate() {
489 w[idx] = rscu_vals[k] / max_rscu;
490 }
491 }
492 }
493
494 w
495 }
496}
497
498pub fn codon_adaptation_index(
511 query: &[u8],
512 reference: &CodonUsage,
513 code: &GeneticCode,
514) -> Result<f64> {
515 let w = reference.relative_adaptiveness(code);
516 let mut log_sum = 0.0f64;
517 let mut n = 0u64;
518
519 for codon in query.chunks_exact(3) {
520 if let Some(idx) = codon_index(codon) {
521 let aa = code.aa_at(idx);
522 if aa == b'*' {
523 continue; }
525 let wi = w[idx];
526 if wi > 0.0 {
527 log_sum += wi.ln();
528 n += 1;
529 }
530 }
531 }
532
533 if n == 0 {
534 return Err(CyaneaError::InvalidInput(
535 "no valid sense codons in query".into(),
536 ));
537 }
538
539 Ok((log_sum / n as f64).exp())
540}
541
542#[derive(Debug, Clone, Copy, PartialEq, Eq)]
548pub enum SubstitutionClass {
549 Synonymous,
551 NonSynonymous,
553 StopInvolved,
555}
556
557pub fn classify_substitution(
562 codon1: &[u8],
563 codon2: &[u8],
564 code: &GeneticCode,
565) -> SubstitutionClass {
566 let idx1 = match codon_index(codon1) {
567 Some(i) => i,
568 None => return SubstitutionClass::StopInvolved,
569 };
570 let idx2 = match codon_index(codon2) {
571 Some(i) => i,
572 None => return SubstitutionClass::StopInvolved,
573 };
574
575 let aa1 = code.aa_at(idx1);
576 let aa2 = code.aa_at(idx2);
577
578 if aa1 == b'*' || aa2 == b'*' {
579 SubstitutionClass::StopInvolved
580 } else if aa1 == aa2 {
581 SubstitutionClass::Synonymous
582 } else {
583 SubstitutionClass::NonSynonymous
584 }
585}
586
587pub fn count_syn_nonsyn_sites(seq: &[u8], code: &GeneticCode) -> (f64, f64) {
593 let mut syn = 0.0f64;
594 let mut nonsyn = 0.0f64;
595 let bases: [u8; 4] = [b'A', b'C', b'G', b'T'];
596
597 for codon in seq.chunks_exact(3) {
598 let idx = match codon_index(codon) {
599 Some(i) => i,
600 None => continue,
601 };
602 let aa = code.aa_at(idx);
603 if aa == b'*' {
604 continue;
605 }
606
607 for pos in 0..3 {
609 let mut s = 0.0f64;
610 let mut n = 0.0f64;
611 let orig = codon[pos].to_ascii_uppercase();
612 let orig = if orig == b'U' { b'T' } else { orig };
613
614 for &base in &bases {
615 if base == orig {
616 continue;
617 }
618 let mut mutant = [codon[0], codon[1], codon[2]];
619 mutant[pos] = base;
620 let midx = match codon_index(&mutant) {
621 Some(i) => i,
622 None => continue,
623 };
624 let maa = code.aa_at(midx);
625 if maa == b'*' {
626 n += 1.0;
628 } else if maa == aa {
629 s += 1.0;
630 } else {
631 n += 1.0;
632 }
633 }
634
635 let total = s + n;
636 if total > 0.0 {
637 syn += s / total;
638 nonsyn += n / total;
639 }
640 }
641 }
642
643 (syn, nonsyn)
644}
645
646#[cfg(test)]
651mod tests {
652 use super::*;
653
654 #[test]
655 fn standard_all_64_codons() {
656 let code = GeneticCode::standard();
657 assert_eq!(code.translate_codon(b"AAA"), Some(b'K'));
659 assert_eq!(code.translate_codon(b"AAC"), Some(b'N'));
660 assert_eq!(code.translate_codon(b"AAG"), Some(b'K'));
661 assert_eq!(code.translate_codon(b"AAT"), Some(b'N'));
662 assert_eq!(code.translate_codon(b"ATG"), Some(b'M'));
664 assert_eq!(code.translate_codon(b"TAA"), None);
666 assert_eq!(code.translate_codon(b"TAG"), None);
667 assert_eq!(code.translate_codon(b"TGA"), None);
668 assert_eq!(code.translate_codon(b"TGG"), Some(b'W'));
670 assert_eq!(code.translate_codon(b"TTT"), Some(b'F'));
672 assert_eq!(code.translate_codon(b"TTC"), Some(b'F'));
673 assert_eq!(code.translate_codon(b"GGG"), Some(b'G'));
675 }
676
677 #[test]
678 fn backward_compat_translate_codon() {
679 assert_eq!(translate_codon(b"ATG"), Some(b'M'));
680 assert_eq!(translate_codon(b"TAA"), None);
681 assert_eq!(translate_codon(b"UGG"), Some(b'W'));
682 }
683
684 #[test]
685 fn backward_compat_translate_sequence() {
686 let protein = translate_sequence(b"AUGUUUUAA");
687 assert_eq!(protein, b"MF");
688 }
689
690 #[test]
691 fn translate_dna_codons() {
692 assert_eq!(translate_codon(b"ATG"), Some(b'M'));
693 assert_eq!(translate_codon(b"TAA"), None);
694 }
695
696 #[test]
697 fn translate_sequence_basic() {
698 let protein = translate_sequence(b"AUGUUUUAA");
699 assert_eq!(protein, b"MF");
700 }
701
702 #[test]
703 fn translate_sequence_incomplete_codon_ignored() {
704 let protein = translate_sequence(b"AUGUUUAU");
705 assert_eq!(protein, b"MF");
706 }
707
708 #[test]
709 fn translate_sequence_full_includes_stops() {
710 let code = GeneticCode::standard();
711 let protein = code.translate_sequence_full(b"ATGTAAGCG");
712 assert_eq!(protein, &[b'M', b'*', b'A']);
713 }
714
715 #[test]
716 fn rna_codons_work() {
717 let code = GeneticCode::standard();
718 assert_eq!(code.translate_codon(b"AUG"), Some(b'M'));
719 assert_eq!(code.translate_codon(b"UAA"), None);
720 }
721
722 #[test]
723 fn vertebrate_mito_differences() {
724 let code = GeneticCode::from_id(GeneticCodeId::VertebrateMitochondrial);
725 assert_eq!(code.translate_codon(b"TGA"), Some(b'W'));
727 assert_eq!(code.translate_codon(b"AGA"), None);
729 assert_eq!(code.translate_codon(b"AGG"), None);
731 assert_eq!(code.translate_codon(b"ATA"), Some(b'M'));
733 }
734
735 #[test]
736 fn yeast_mito_differences() {
737 let code = GeneticCode::from_id(GeneticCodeId::YeastMitochondrial);
738 assert_eq!(code.translate_codon(b"CTG"), Some(b'T'));
740 assert_eq!(code.translate_codon(b"CTA"), Some(b'T'));
742 assert_eq!(code.translate_codon(b"TGA"), Some(b'W'));
744 assert_eq!(code.translate_codon(b"ATA"), Some(b'M'));
746 }
747
748 #[test]
749 fn mycoplasma_differences() {
750 let code = GeneticCode::from_id(GeneticCodeId::MycoplasmaSpiroplasma);
751 assert_eq!(code.translate_codon(b"TGA"), Some(b'W'));
753 assert_eq!(code.translate_codon(b"TAA"), None);
755 assert_eq!(code.translate_codon(b"TAG"), None);
756 }
757
758 #[test]
759 fn invertebrate_mito_differences() {
760 let code = GeneticCode::from_id(GeneticCodeId::InvertebrateMitochondrial);
761 assert_eq!(code.translate_codon(b"AGA"), Some(b'S'));
763 assert_eq!(code.translate_codon(b"AGG"), Some(b'S'));
765 assert_eq!(code.translate_codon(b"TGA"), Some(b'W'));
767 }
768
769 #[test]
770 fn ciliate_differences() {
771 let code = GeneticCode::from_id(GeneticCodeId::CiliateNuclear);
772 assert_eq!(code.translate_codon(b"TAA"), Some(b'Q'));
774 assert_eq!(code.translate_codon(b"TAG"), Some(b'Q'));
776 assert_eq!(code.translate_codon(b"TGA"), None);
778 }
779
780 #[test]
781 fn bacterial_plastid_same_aas_different_starts() {
782 let code = GeneticCode::from_id(GeneticCodeId::BacterialPlastid);
783 assert_eq!(code.translate_codon(b"ATG"), Some(b'M'));
785 assert_eq!(code.translate_codon(b"TGA"), None);
786 assert!(code.is_start(b"GTG"));
788 assert!(code.is_start(b"TTG"));
789 assert!(code.is_start(b"ATG"));
790 }
791
792 #[test]
793 fn start_stop_codons() {
794 let code = GeneticCode::standard();
795 assert!(code.is_start(b"ATG"));
796 assert!(!code.is_start(b"GTG"));
797 assert!(code.is_stop(b"TAA"));
798 assert!(code.is_stop(b"TAG"));
799 assert!(code.is_stop(b"TGA"));
800 assert!(!code.is_stop(b"ATG"));
801
802 let stops = code.stop_codons();
803 assert_eq!(stops.len(), 3);
804 let starts = code.start_codons();
805 assert_eq!(starts.len(), 1);
806 }
807
808 #[test]
809 fn codon_usage_basic() {
810 let usage = CodonUsage::from_sequence(b"ATGATG");
812 assert_eq!(usage.count(b"ATG"), 2);
813 assert_eq!(usage.total, 2);
814 assert!((usage.frequency(b"ATG") - 1.0).abs() < 1e-10);
815 }
816
817 #[test]
818 fn codon_usage_merge() {
819 let mut u1 = CodonUsage::from_sequence(b"ATGATG");
820 let u2 = CodonUsage::from_sequence(b"ATGAAA");
821 u1.merge(&u2);
822 assert_eq!(u1.count(b"ATG"), 3);
823 assert_eq!(u1.count(b"AAA"), 1);
824 assert_eq!(u1.total, 4);
825 }
826
827 #[test]
828 fn rscu_single_codon_family() {
829 let code = GeneticCode::standard();
830 let usage = CodonUsage::from_sequence(b"ATGATGATG");
832 let rscu = usage.rscu(b"ATG", &code);
833 assert!((rscu - 1.0).abs() < 1e-10);
834 }
835
836 #[test]
837 fn rscu_two_fold_degenerate() {
838 let code = GeneticCode::standard();
839 let usage = CodonUsage::from_sequence(b"TTTTTTTTT");
841 let rscu_ttt = usage.rscu(b"TTT", &code);
842 assert!((rscu_ttt - 2.0).abs() < 1e-10);
843 let rscu_ttc = usage.rscu(b"TTC", &code);
844 assert!(rscu_ttc.abs() < 1e-10);
845 }
846
847 #[test]
848 fn cai_high_adaptation() {
849 let code = GeneticCode::standard();
850 let ref_seq = b"ATGTTTAAA";
852 let reference = CodonUsage::from_sequence(ref_seq);
853 let cai = codon_adaptation_index(ref_seq, &reference, &code).unwrap();
854 assert!((cai - 1.0).abs() < 0.01, "CAI={}", cai);
855 }
856
857 #[test]
858 fn cai_error_on_empty() {
859 let code = GeneticCode::standard();
860 let reference = CodonUsage::from_sequence(b"ATGATG");
861 let result = codon_adaptation_index(b"", &reference, &code);
862 assert!(result.is_err());
863 }
864
865 #[test]
866 fn classify_synonymous() {
867 let code = GeneticCode::standard();
868 assert_eq!(
870 classify_substitution(b"TTT", b"TTC", &code),
871 SubstitutionClass::Synonymous
872 );
873 }
874
875 #[test]
876 fn classify_nonsynonymous() {
877 let code = GeneticCode::standard();
878 assert_eq!(
880 classify_substitution(b"ATG", b"GTG", &code),
881 SubstitutionClass::NonSynonymous
882 );
883 }
884
885 #[test]
886 fn classify_stop_involved() {
887 let code = GeneticCode::standard();
888 assert_eq!(
889 classify_substitution(b"ATG", b"TAG", &code),
890 SubstitutionClass::StopInvolved
891 );
892 }
893
894 #[test]
895 fn count_sites_basic() {
896 let code = GeneticCode::standard();
897 let (s, n) = count_syn_nonsyn_sites(b"ATG", &code);
899 assert!(s < 0.5, "ATG syn sites should be ~0, got {}", s);
900 assert!(n > 2.5, "ATG nonsyn sites should be ~3, got {}", n);
901 }
902
903 #[test]
904 fn count_sites_four_fold() {
905 let code = GeneticCode::standard();
906 let (s, _n) = count_syn_nonsyn_sites(b"GCA", &code);
909 assert!(s > 0.5, "Ala codon should have >0.5 syn sites, got {}", s);
910 }
911}
912
913#[cfg(test)]
914mod proptests {
915 use super::*;
916 use proptest::prelude::*;
917
918 fn coding_seq(max_codons: usize) -> impl Strategy<Value = Vec<u8>> {
919 proptest::collection::vec(
920 prop_oneof![Just(b'A'), Just(b'C'), Just(b'G'), Just(b'T')],
921 3..=(max_codons * 3),
922 )
923 .prop_map(|v| {
924 let len = v.len() - (v.len() % 3);
925 v[..len].to_vec()
926 })
927 }
928
929 proptest! {
930 #[test]
931 fn cai_in_unit_interval(seq in coding_seq(20)) {
932 let code = GeneticCode::standard();
933 let reference = CodonUsage::from_sequence(&seq);
934 if let Ok(cai) = codon_adaptation_index(&seq, &reference, &code) {
935 prop_assert!(cai > 0.0 && cai <= 1.0 + 1e-10,
936 "CAI should be in (0,1], got {}", cai);
937 }
938 }
939 }
940}