1use cyanea_core::{CyaneaError, Result};
14
15fn aa_index(aa: u8) -> Option<usize> {
19 match aa {
20 b'A' => Some(0),
21 b'C' => Some(1),
22 b'D' => Some(2),
23 b'E' => Some(3),
24 b'F' => Some(4),
25 b'G' => Some(5),
26 b'H' => Some(6),
27 b'I' => Some(7),
28 b'K' => Some(8),
29 b'L' => Some(9),
30 b'M' => Some(10),
31 b'N' => Some(11),
32 b'P' => Some(12),
33 b'Q' => Some(13),
34 b'R' => Some(14),
35 b'S' => Some(15),
36 b'T' => Some(16),
37 b'V' => Some(17),
38 b'W' => Some(18),
39 b'Y' => Some(19),
40 _ => None,
41 }
42}
43
44fn normalize_protein(seq: &[u8]) -> Result<Vec<u8>> {
46 if seq.is_empty() {
47 return Err(CyaneaError::InvalidInput(
48 "empty protein sequence".to_string(),
49 ));
50 }
51 seq.iter()
52 .map(|&b| {
53 let upper = b.to_ascii_uppercase();
54 if aa_index(upper).is_some() {
55 Ok(upper)
56 } else {
57 Err(CyaneaError::InvalidInput(format!(
58 "invalid amino acid '{}' in protein sequence",
59 b as char
60 )))
61 }
62 })
63 .collect()
64}
65
66const KYTE_DOOLITTLE: [f64; 20] = [
70 1.8, 2.5, -3.5, -3.5, 2.8, -0.4, -3.2, 4.5, -3.9, 3.8, 1.9, -3.5, -1.6, -3.5, -4.5, -0.8, -0.7, 4.2, -0.9, -1.3, ];
91
92const HOPP_WOODS: [f64; 20] = [
94 -0.5, -1.0, 3.0, 3.0, -2.5, 0.0, -0.5, -1.8, 3.0, -1.8, -1.3, 0.2, 0.0, 0.2, 3.0, 0.3, -0.4, -1.5, -3.4, -2.3, ];
115
116const PKA_NTERM: f64 = 9.69;
119const PKA_CTERM: f64 = 2.34;
120const PKA_D: f64 = 3.65;
121const PKA_E: f64 = 4.25;
122const PKA_C: f64 = 8.18;
123const PKA_Y: f64 = 10.07;
124const PKA_H: f64 = 6.00;
125const PKA_K: f64 = 10.53;
126const PKA_R: f64 = 12.48;
127
128const EXT_TRP: f64 = 5500.0;
131const EXT_TYR: f64 = 1490.0;
132const EXT_CYSTINE: f64 = 125.0;
133
134const CHOU_FASMAN: [(f64, f64, f64); 20] = [
138 (1.42, 0.83, 0.66), (0.70, 1.19, 1.19), (1.01, 0.54, 1.46), (1.51, 0.37, 0.74), (1.13, 1.38, 0.60), (0.57, 0.75, 1.56), (1.00, 0.87, 0.95), (1.08, 1.60, 0.47), (1.16, 0.74, 1.01), (1.21, 1.30, 0.59), (1.45, 1.05, 0.60), (0.67, 0.89, 1.56), (0.57, 0.55, 1.52), (1.11, 1.10, 0.98), (0.98, 0.93, 0.95), (0.77, 0.75, 1.43), (0.83, 1.19, 0.96), (1.06, 1.70, 0.50), (1.08, 1.37, 0.96), (0.69, 1.47, 1.14), ];
159
160const GOR_INFO: [(f64, f64, f64); 20] = [
165 (0.36, -0.23, -0.13), (-0.20, 0.17, 0.03), (0.07, -0.42, 0.35), (0.42, -0.37, -0.05), (-0.09, 0.32, -0.23), (-0.43, -0.18, 0.61), (0.04, -0.09, 0.05), (-0.06, 0.42, -0.36), (0.13, -0.25, 0.12), (0.21, 0.22, -0.43), (0.36, 0.03, -0.39), (-0.29, -0.18, 0.47), (-0.42, -0.37, 0.79), (0.18, -0.10, -0.08), (-0.01, -0.15, 0.16), (-0.15, -0.07, 0.22), (-0.11, 0.16, -0.05), (-0.06, 0.52, -0.46), (-0.02, 0.27, -0.25), (-0.17, 0.31, -0.14), ];
186
187const GOR_HALF_WIDTH: usize = 8;
189
190const DISORDER_PROPENSITY: [f64; 20] = [
195 -0.26, -0.20, 0.70, 0.55, -0.60, 0.16, 0.06, -0.70, 0.60, -0.50, -0.14, 0.38, 0.55, 0.45, 0.30, 0.20, 0.12, -0.60, -0.55, -0.45, ];
216
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221pub enum HydrophobicityScale {
222 KyteDoolittle,
224 HoppWoods,
226}
227
228#[derive(Debug, Clone)]
230pub struct AminoAcidComposition {
231 pub counts: [usize; 20],
233 pub fractions: [f64; 20],
235 pub length: usize,
237}
238
239#[derive(Debug, Clone)]
241pub struct ExtinctionCoefficient {
242 pub reduced: f64,
244 pub cystine: f64,
246 pub abs_01_reduced: f64,
248 pub abs_01_cystine: f64,
250}
251
252#[derive(Debug, Clone, Copy, PartialEq, Eq)]
254pub enum SecondaryStructure {
255 Helix,
256 Strand,
257 Coil,
258}
259
260#[derive(Debug, Clone)]
262pub struct SecondaryStructurePrediction {
263 pub states: Vec<SecondaryStructure>,
265 pub helix_scores: Vec<f64>,
267 pub strand_scores: Vec<f64>,
269 pub coil_scores: Vec<f64>,
271 pub helix_fraction: f64,
273 pub strand_fraction: f64,
275 pub coil_fraction: f64,
277}
278
279#[derive(Debug, Clone)]
281pub struct DisorderPrediction {
282 pub scores: Vec<f64>,
284 pub disordered: Vec<bool>,
286 pub disorder_fraction: f64,
288}
289
290pub fn amino_acid_composition(seq: &[u8]) -> Result<AminoAcidComposition> {
311 let norm = normalize_protein(seq)?;
312 let mut counts = [0usize; 20];
313 for &aa in &norm {
314 counts[aa_index(aa).unwrap()] += 1;
315 }
316 let len = norm.len() as f64;
317 let mut fractions = [0.0f64; 20];
318 for i in 0..20 {
319 fractions[i] = counts[i] as f64 / len;
320 }
321 Ok(AminoAcidComposition {
322 counts,
323 fractions,
324 length: norm.len(),
325 })
326}
327
328pub fn hydrophobicity_profile(
352 seq: &[u8],
353 window: usize,
354 scale: HydrophobicityScale,
355) -> Result<Vec<f64>> {
356 let norm = normalize_protein(seq)?;
357 if window == 0 || window % 2 == 0 {
358 return Err(CyaneaError::InvalidInput(
359 "window size must be odd and >= 1".to_string(),
360 ));
361 }
362 if window > norm.len() {
363 return Err(CyaneaError::InvalidInput(format!(
364 "window size {} exceeds sequence length {}",
365 window,
366 norm.len()
367 )));
368 }
369
370 let table = match scale {
371 HydrophobicityScale::KyteDoolittle => &KYTE_DOOLITTLE,
372 HydrophobicityScale::HoppWoods => &HOPP_WOODS,
373 };
374
375 let values: Vec<f64> = norm
376 .iter()
377 .map(|&aa| table[aa_index(aa).unwrap()])
378 .collect();
379
380 let n = norm.len();
381 let mut profile = Vec::with_capacity(n - window + 1);
382
383 let mut sum: f64 = values[..window].iter().sum();
385 profile.push(sum / window as f64);
386
387 for i in 1..=(n - window) {
389 sum += values[i + window - 1] - values[i - 1];
390 profile.push(sum / window as f64);
391 }
392
393 Ok(profile)
394}
395
396pub fn gravy(seq: &[u8]) -> Result<f64> {
410 let norm = normalize_protein(seq)?;
411 let sum: f64 = norm
412 .iter()
413 .map(|&aa| KYTE_DOOLITTLE[aa_index(aa).unwrap()])
414 .sum();
415 Ok(sum / norm.len() as f64)
416}
417
418fn net_charge(seq: &[u8], ph: f64) -> f64 {
422 let mut charge = 0.0;
423
424 charge += 1.0 / (1.0 + 10_f64.powf(ph - PKA_NTERM));
426 charge -= 1.0 / (1.0 + 10_f64.powf(PKA_CTERM - ph));
428
429 for &aa in seq {
430 match aa {
431 b'D' => charge -= 1.0 / (1.0 + 10_f64.powf(PKA_D - ph)),
432 b'E' => charge -= 1.0 / (1.0 + 10_f64.powf(PKA_E - ph)),
433 b'C' => charge -= 1.0 / (1.0 + 10_f64.powf(PKA_C - ph)),
434 b'Y' => charge -= 1.0 / (1.0 + 10_f64.powf(PKA_Y - ph)),
435 b'H' => charge += 1.0 / (1.0 + 10_f64.powf(ph - PKA_H)),
436 b'K' => charge += 1.0 / (1.0 + 10_f64.powf(ph - PKA_K)),
437 b'R' => charge += 1.0 / (1.0 + 10_f64.powf(ph - PKA_R)),
438 _ => {}
439 }
440 }
441 charge
442}
443
444pub fn isoelectric_point(seq: &[u8]) -> Result<f64> {
458 let norm = normalize_protein(seq)?;
459 let mut lo = 0.0_f64;
460 let mut hi = 14.0_f64;
461
462 for _ in 0..100 {
463 let mid = (lo + hi) / 2.0;
464 let charge = net_charge(&norm, mid);
465 if charge.abs() < 0.001 {
466 return Ok(mid);
467 }
468 if charge > 0.0 {
469 lo = mid;
470 } else {
471 hi = mid;
472 }
473 }
474 Ok((lo + hi) / 2.0)
475}
476
477pub fn extinction_coefficient(seq: &[u8]) -> Result<ExtinctionCoefficient> {
493 let norm = normalize_protein(seq)?;
494 let comp = amino_acid_composition(&norm)?;
495
496 let n_trp = comp.counts[aa_index(b'W').unwrap()] as f64;
497 let n_tyr = comp.counts[aa_index(b'Y').unwrap()] as f64;
498 let n_cys = comp.counts[aa_index(b'C').unwrap()] as f64;
499
500 let reduced = n_trp * EXT_TRP + n_tyr * EXT_TYR;
501 let n_cystine = (n_cys / 2.0).floor();
502 let cystine = reduced + n_cystine * EXT_CYSTINE;
503
504 let mw = molecular_weight_from_seq(&norm);
506
507 let abs_01_reduced = if mw > 0.0 { reduced / mw } else { 0.0 };
508 let abs_01_cystine = if mw > 0.0 { cystine / mw } else { 0.0 };
509
510 Ok(ExtinctionCoefficient {
511 reduced,
512 cystine,
513 abs_01_reduced,
514 abs_01_cystine,
515 })
516}
517
518fn molecular_weight_from_seq(seq: &[u8]) -> f64 {
520 const WEIGHTS: [f64; 20] = [
522 89.09, 121.16, 133.10, 147.13, 165.19, 75.03, 155.16, 131.17, 146.19, 131.17, 149.21, 132.12, 115.13, 146.15, 174.20, 105.09, 119.12, 117.15, 204.23, 181.19, ];
543 if seq.is_empty() {
544 return 0.0;
545 }
546 let sum: f64 = seq
547 .iter()
548 .map(|&aa| WEIGHTS[aa_index(aa).unwrap()])
549 .sum();
550 sum - (seq.len() as f64 - 1.0) * 18.015
551}
552
553pub fn chou_fasman(seq: &[u8]) -> Result<SecondaryStructurePrediction> {
577 let norm = normalize_protein(seq)?;
578 let n = norm.len();
579 if n < 6 {
580 return Err(CyaneaError::InvalidInput(
581 "sequence must be at least 6 residues for Chou-Fasman prediction".to_string(),
582 ));
583 }
584
585 let props: Vec<(f64, f64, f64)> = norm
586 .iter()
587 .map(|&aa| CHOU_FASMAN[aa_index(aa).unwrap()])
588 .collect();
589
590 let mut assignment = vec![0u8; n];
592
593 let mut helix_regions: Vec<(usize, usize)> = Vec::new();
595 for i in 0..=n.saturating_sub(6) {
596 let count = (0..6).filter(|&j| props[i + j].0 >= 1.03).count();
597 if count >= 4 {
598 helix_regions.push((i, i + 5));
599 }
600 }
601
602 let helix_regions = merge_regions(helix_regions);
604
605 let helix_regions: Vec<(usize, usize)> = helix_regions
607 .into_iter()
608 .map(|(start, end)| extend_region(&props, start, end, n, |p| p.0, 1.00))
609 .collect();
610
611 let mut strand_regions: Vec<(usize, usize)> = Vec::new();
613 for i in 0..=n.saturating_sub(5) {
614 let count = (0..5).filter(|&j| props[i + j].1 >= 1.05).count();
615 if count >= 3 {
616 strand_regions.push((i, i + 4));
617 }
618 }
619
620 let strand_regions = merge_regions(strand_regions);
621 let strand_regions: Vec<(usize, usize)> = strand_regions
622 .into_iter()
623 .map(|(start, end)| extend_region(&props, start, end, n, |p| p.1, 1.00))
624 .collect();
625
626 for &(start, end) in &helix_regions {
628 for i in start..=end {
629 assignment[i] = 1;
630 }
631 }
632
633 for &(start, end) in &strand_regions {
635 for i in start..=end {
636 if assignment[i] == 1 {
637 let alpha_avg: f64 = props[i].0;
639 let beta_avg: f64 = props[i].1;
640 if beta_avg > alpha_avg {
641 assignment[i] = 2;
642 }
643 } else {
645 assignment[i] = 2;
646 }
647 }
648 }
649
650 let mut helix_scores = Vec::with_capacity(n);
652 let mut strand_scores = Vec::with_capacity(n);
653 let mut coil_scores = Vec::with_capacity(n);
654 let mut states = Vec::with_capacity(n);
655
656 for i in 0..n {
657 let (pa, pb, pt) = props[i];
658 helix_scores.push(pa);
659 strand_scores.push(pb);
660 coil_scores.push(pt);
661 states.push(match assignment[i] {
662 1 => SecondaryStructure::Helix,
663 2 => SecondaryStructure::Strand,
664 _ => SecondaryStructure::Coil,
665 });
666 }
667
668 let helix_count = states.iter().filter(|&&s| s == SecondaryStructure::Helix).count();
669 let strand_count = states.iter().filter(|&&s| s == SecondaryStructure::Strand).count();
670 let coil_count = n - helix_count - strand_count;
671
672 Ok(SecondaryStructurePrediction {
673 states,
674 helix_scores,
675 strand_scores,
676 coil_scores,
677 helix_fraction: helix_count as f64 / n as f64,
678 strand_fraction: strand_count as f64 / n as f64,
679 coil_fraction: coil_count as f64 / n as f64,
680 })
681}
682
683fn merge_regions(mut regions: Vec<(usize, usize)>) -> Vec<(usize, usize)> {
685 if regions.is_empty() {
686 return regions;
687 }
688 regions.sort_by_key(|r| r.0);
689 let mut merged = vec![regions[0]];
690 for &(start, end) in ®ions[1..] {
691 let last = merged.last_mut().unwrap();
692 if start <= last.1 + 1 {
693 last.1 = last.1.max(end);
694 } else {
695 merged.push((start, end));
696 }
697 }
698 merged
699}
700
701fn extend_region(
703 props: &[(f64, f64, f64)],
704 start: usize,
705 end: usize,
706 n: usize,
707 accessor: fn(&(f64, f64, f64)) -> f64,
708 threshold: f64,
709) -> (usize, usize) {
710 let mut s = start;
711 let mut e = end;
712
713 while s >= 4 {
715 let avg: f64 = (0..4).map(|j| accessor(&props[s - 4 + j])).sum::<f64>() / 4.0;
716 if avg >= threshold {
717 s -= 1;
718 } else {
719 break;
720 }
721 }
722
723 while e + 4 < n {
725 let avg: f64 = (0..4).map(|j| accessor(&props[e + 1 + j])).sum::<f64>() / 4.0;
726 if avg >= threshold {
727 e += 1;
728 } else {
729 break;
730 }
731 }
732
733 (s, e)
734}
735
736pub fn gor(seq: &[u8]) -> Result<SecondaryStructurePrediction> {
758 let norm = normalize_protein(seq)?;
759 let n = norm.len();
760
761 let indices: Vec<usize> = norm
762 .iter()
763 .map(|&aa| aa_index(aa).unwrap())
764 .collect();
765
766 let mut helix_scores = vec![0.0f64; n];
767 let mut strand_scores = vec![0.0f64; n];
768 let mut coil_scores = vec![0.0f64; n];
769
770 for i in 0..n {
772 let mut h = 0.0;
773 let mut e = 0.0;
774 let mut c = 0.0;
775 let mut weight_sum = 0.0;
776
777 let w_start = if i >= GOR_HALF_WIDTH { i - GOR_HALF_WIDTH } else { 0 };
778 let w_end = (i + GOR_HALF_WIDTH).min(n - 1);
779
780 for j in w_start..=w_end {
781 let dist = if j >= i { j - i } else { i - j };
782 let weight = 1.0 - (dist as f64 / (GOR_HALF_WIDTH as f64 + 1.0));
783 let (ih, ie, ic) = GOR_INFO[indices[j]];
784 h += ih * weight;
785 e += ie * weight;
786 c += ic * weight;
787 weight_sum += weight;
788 }
789
790 if weight_sum > 0.0 {
791 helix_scores[i] = h / weight_sum;
792 strand_scores[i] = e / weight_sum;
793 coil_scores[i] = c / weight_sum;
794 }
795 }
796
797 let mut states: Vec<SecondaryStructure> = (0..n)
799 .map(|i| {
800 let h = helix_scores[i];
801 let e = strand_scores[i];
802 let c = coil_scores[i];
803 if h >= e && h >= c {
804 SecondaryStructure::Helix
805 } else if e >= h && e >= c {
806 SecondaryStructure::Strand
807 } else {
808 SecondaryStructure::Coil
809 }
810 })
811 .collect();
812
813 if n >= 3 {
815 for i in 1..n - 1 {
816 let prev = states[i - 1];
817 let next = states[i + 1];
818 if prev == next && states[i] != prev {
819 states[i] = prev;
820 }
821 }
822 }
823
824 let helix_count = states.iter().filter(|&&s| s == SecondaryStructure::Helix).count();
825 let strand_count = states.iter().filter(|&&s| s == SecondaryStructure::Strand).count();
826 let coil_count = n - helix_count - strand_count;
827
828 Ok(SecondaryStructurePrediction {
829 states,
830 helix_scores,
831 strand_scores,
832 coil_scores,
833 helix_fraction: helix_count as f64 / n as f64,
834 strand_fraction: strand_count as f64 / n as f64,
835 coil_fraction: coil_count as f64 / n as f64,
836 })
837}
838
839pub fn predict_disorder(seq: &[u8], window: usize) -> Result<DisorderPrediction> {
862 let norm = normalize_protein(seq)?;
863 let n = norm.len();
864
865 if window == 0 || window % 2 == 0 {
866 return Err(CyaneaError::InvalidInput(
867 "window size must be odd and >= 1".to_string(),
868 ));
869 }
870 if window > n {
871 return Err(CyaneaError::InvalidInput(format!(
872 "window size {} exceeds sequence length {}",
873 window, n
874 )));
875 }
876
877 let raw_props: Vec<f64> = norm
878 .iter()
879 .map(|&aa| DISORDER_PROPENSITY[aa_index(aa).unwrap()])
880 .collect();
881
882 let half = window / 2;
883 let mut scores = Vec::with_capacity(n);
884
885 for i in 0..n {
886 let w_start = if i >= half { i - half } else { 0 };
887 let w_end = (i + half).min(n - 1);
888 let avg: f64 = raw_props[w_start..=w_end].iter().sum::<f64>()
889 / (w_end - w_start + 1) as f64;
890
891 let score = 1.0 / (1.0 + (-5.0 * avg).exp());
893 scores.push(score);
894 }
895
896 let disordered: Vec<bool> = scores.iter().map(|&s| s > 0.5).collect();
897 let n_disordered = disordered.iter().filter(|&&d| d).count();
898
899 Ok(DisorderPrediction {
900 scores,
901 disordered,
902 disorder_fraction: n_disordered as f64 / n as f64,
903 })
904}
905
906#[cfg(test)]
909mod tests {
910 use super::*;
911
912 #[test]
915 fn normalize_protein_uppercase() {
916 let r = normalize_protein(b"ACDEFGHIKLMNPQRSTVWY").unwrap();
917 assert_eq!(r, b"ACDEFGHIKLMNPQRSTVWY");
918 }
919
920 #[test]
921 fn normalize_protein_lowercase() {
922 let r = normalize_protein(b"acdefghiklmnpqrstvwy").unwrap();
923 assert_eq!(r, b"ACDEFGHIKLMNPQRSTVWY");
924 }
925
926 #[test]
927 fn normalize_protein_invalid() {
928 assert!(normalize_protein(b"ABCDE").is_err()); }
930
931 #[test]
934 fn composition_all_alanine() {
935 let comp = amino_acid_composition(b"AAAAAAAAAA").unwrap();
936 assert_eq!(comp.counts[0], 10); assert!((comp.fractions[0] - 1.0).abs() < 1e-10);
938 assert_eq!(comp.length, 10);
939 for i in 1..20 {
941 assert_eq!(comp.counts[i], 0);
942 }
943 }
944
945 #[test]
946 fn composition_each_aa_once() {
947 let comp = amino_acid_composition(b"ACDEFGHIKLMNPQRSTVWY").unwrap();
948 assert_eq!(comp.length, 20);
949 for i in 0..20 {
950 assert_eq!(comp.counts[i], 1);
951 assert!((comp.fractions[i] - 0.05).abs() < 1e-10);
952 }
953 }
954
955 #[test]
956 fn composition_empty_error() {
957 assert!(amino_acid_composition(b"").is_err());
958 }
959
960 #[test]
963 fn hydro_poly_i_kd() {
964 let profile =
966 hydrophobicity_profile(b"IIIIIIIII", 3, HydrophobicityScale::KyteDoolittle).unwrap();
967 for &v in &profile {
968 assert!((v - 4.5).abs() < 1e-10);
969 }
970 }
971
972 #[test]
973 fn hydro_poly_r_kd() {
974 let profile =
976 hydrophobicity_profile(b"RRRRRRRRR", 3, HydrophobicityScale::KyteDoolittle).unwrap();
977 for &v in &profile {
978 assert!((v - (-4.5)).abs() < 1e-10);
979 }
980 }
981
982 #[test]
983 fn hydro_hopp_woods_sign() {
984 let profile =
986 hydrophobicity_profile(b"DDDDDDDDD", 3, HydrophobicityScale::HoppWoods).unwrap();
987 for &v in &profile {
988 assert!(v > 0.0);
989 }
990 }
991
992 #[test]
993 fn hydro_even_window_error() {
994 assert!(
995 hydrophobicity_profile(b"AAAAAA", 4, HydrophobicityScale::KyteDoolittle).is_err()
996 );
997 }
998
999 #[test]
1000 fn hydro_window_1_raw() {
1001 let profile =
1003 hydrophobicity_profile(b"AIV", 1, HydrophobicityScale::KyteDoolittle).unwrap();
1004 assert_eq!(profile.len(), 3);
1005 assert!((profile[0] - 1.8).abs() < 1e-10); assert!((profile[1] - 4.5).abs() < 1e-10); assert!((profile[2] - 4.2).abs() < 1e-10); }
1009
1010 #[test]
1013 fn gravy_poly_i() {
1014 let g = gravy(b"IIIII").unwrap();
1015 assert!((g - 4.5).abs() < 1e-10);
1016 }
1017
1018 #[test]
1019 fn gravy_mixed() {
1020 let g = gravy(b"AR").unwrap();
1022 assert!((g - (-1.35)).abs() < 1e-10);
1023 }
1024
1025 #[test]
1026 fn gravy_empty_error() {
1027 assert!(gravy(b"").is_err());
1028 }
1029
1030 #[test]
1033 fn pi_poly_d_acidic() {
1034 let pi = isoelectric_point(b"DDDDD").unwrap();
1035 assert!(pi < 3.5, "poly-D pI should be < 3.5, got {}", pi);
1036 }
1037
1038 #[test]
1039 fn pi_poly_k_basic() {
1040 let pi = isoelectric_point(b"KKKKK").unwrap();
1041 assert!(pi > 10.0, "poly-K pI should be > 10.0, got {}", pi);
1042 }
1043
1044 #[test]
1045 fn pi_poly_g_neutral() {
1046 let pi = isoelectric_point(b"GGGGG").unwrap();
1047 assert!(pi > 5.0 && pi < 7.0, "poly-G pI should be ~6.0, got {}", pi);
1049 }
1050
1051 #[test]
1052 fn pi_single_residue() {
1053 let pi = isoelectric_point(b"A").unwrap();
1055 assert!(pi > 0.0 && pi < 14.0);
1056 }
1057
1058 #[test]
1059 fn pi_known_protein() {
1060 let pi = isoelectric_point(b"FVNQHLCGSHLVEALYLVCGERGFFYTPKT").unwrap();
1063 assert!(
1064 pi > 5.5 && pi < 8.5,
1065 "insulin B chain pI should be ~6.9, got {}",
1066 pi
1067 );
1068 }
1069
1070 #[test]
1073 fn ext_trp_only() {
1074 let ec = extinction_coefficient(b"W").unwrap();
1075 assert!((ec.reduced - 5500.0).abs() < 1e-10);
1076 assert!((ec.cystine - 5500.0).abs() < 1e-10); }
1078
1079 #[test]
1080 fn ext_tyr_only() {
1081 let ec = extinction_coefficient(b"Y").unwrap();
1082 assert!((ec.reduced - 1490.0).abs() < 1e-10);
1083 }
1084
1085 #[test]
1086 fn ext_cystine_contribution() {
1087 let ec = extinction_coefficient(b"CC").unwrap();
1089 assert!((ec.reduced - 0.0).abs() < 1e-10); assert!((ec.cystine - 125.0).abs() < 1e-10); }
1092
1093 #[test]
1094 fn ext_no_absorbers() {
1095 let ec = extinction_coefficient(b"AAAAAA").unwrap();
1097 assert!((ec.reduced - 0.0).abs() < 1e-10);
1098 assert!((ec.cystine - 0.0).abs() < 1e-10);
1099 }
1100
1101 #[test]
1104 fn cf_poly_a_helix() {
1105 let pred = chou_fasman(b"AAAAAAAAAAAAAAAAAAA").unwrap();
1107 assert!(
1108 pred.helix_fraction > 0.5,
1109 "poly-A should be mostly helix, got helix_fraction={}",
1110 pred.helix_fraction
1111 );
1112 }
1113
1114 #[test]
1115 fn cf_poly_v_strand() {
1116 let pred = chou_fasman(b"VVVVVVVVVVVVVVVVVVV").unwrap();
1118 assert!(
1119 pred.strand_fraction > 0.5,
1120 "poly-V should be mostly strand, got strand_fraction={}",
1121 pred.strand_fraction
1122 );
1123 }
1124
1125 #[test]
1126 fn cf_poly_g_coil() {
1127 let pred = chou_fasman(b"GGGGGGGGGGGGGGGGGGG").unwrap();
1129 assert!(
1130 pred.coil_fraction > 0.5,
1131 "poly-G should be mostly coil, got coil_fraction={}",
1132 pred.coil_fraction
1133 );
1134 }
1135
1136 #[test]
1137 fn cf_short_error() {
1138 assert!(chou_fasman(b"AAAA").is_err());
1139 }
1140
1141 #[test]
1144 fn gor_helix_rich() {
1145 let pred = gor(b"EEEEEEEEEEEEEEEEEEEE").unwrap();
1147 assert!(
1148 pred.helix_fraction > pred.strand_fraction,
1149 "E-rich should be mostly helix: H={}, E={}",
1150 pred.helix_fraction,
1151 pred.strand_fraction
1152 );
1153 }
1154
1155 #[test]
1156 fn gor_strand_rich() {
1157 let pred = gor(b"VVVVVVVVVVVVVVVVVVVV").unwrap();
1159 assert!(
1160 pred.strand_fraction > pred.helix_fraction,
1161 "V-rich should be mostly strand: E={}, H={}",
1162 pred.strand_fraction,
1163 pred.helix_fraction
1164 );
1165 }
1166
1167 #[test]
1168 fn gor_output_length() {
1169 let pred = gor(b"ACDEFGHIKLMNPQRSTVWY").unwrap();
1170 assert_eq!(pred.states.len(), 20);
1171 assert_eq!(pred.helix_scores.len(), 20);
1172 assert_eq!(pred.strand_scores.len(), 20);
1173 assert_eq!(pred.coil_scores.len(), 20);
1174 }
1175
1176 #[test]
1177 fn gor_fractions_sum() {
1178 let pred = gor(b"ACDEFGHIKLMNPQRSTVWY").unwrap();
1179 let sum = pred.helix_fraction + pred.strand_fraction + pred.coil_fraction;
1180 assert!(
1181 (sum - 1.0).abs() < 1e-10,
1182 "fractions should sum to 1.0, got {}",
1183 sum
1184 );
1185 }
1186
1187 #[test]
1190 fn disorder_charged_stretch() {
1191 let pred = predict_disorder(b"KKKDDDEEEKKKDDDEEEK", 7).unwrap();
1193 assert!(
1194 pred.disorder_fraction > 0.5,
1195 "charged stretch should be mostly disordered, got {}",
1196 pred.disorder_fraction
1197 );
1198 }
1199
1200 #[test]
1201 fn disorder_hydrophobic_stretch() {
1202 let pred = predict_disorder(b"IVLFWIVLFWIVLFWIVLFW", 7).unwrap();
1204 assert!(
1205 pred.disorder_fraction < 0.5,
1206 "hydrophobic stretch should be mostly ordered, got {}",
1207 pred.disorder_fraction
1208 );
1209 }
1210
1211 #[test]
1212 fn disorder_scores_bounded() {
1213 let pred = predict_disorder(b"ACDEFGHIKLMNPQRSTVWY", 5).unwrap();
1214 for &s in &pred.scores {
1215 assert!(s >= 0.0 && s <= 1.0, "score {} out of [0,1]", s);
1216 }
1217 }
1218
1219 #[test]
1220 fn disorder_window_zero_error() {
1221 assert!(predict_disorder(b"AAAA", 0).is_err());
1222 }
1223}