1use super::functions::*;
5
6#[allow(dead_code)]
8#[derive(Debug, Clone)]
9pub struct AlignmentScorer {
10 pub match_score: i32,
11 pub mismatch_penalty: i32,
12 pub gap_open: i32,
13 pub gap_extend: i32,
14}
15#[allow(dead_code)]
16impl AlignmentScorer {
17 pub fn default_scorer() -> Self {
19 Self {
20 match_score: 2,
21 mismatch_penalty: -1,
22 gap_open: -4,
23 gap_extend: -1,
24 }
25 }
26 pub fn blosum62_approx() -> Self {
28 Self {
29 match_score: 4,
30 mismatch_penalty: -1,
31 gap_open: -11,
32 gap_extend: -1,
33 }
34 }
35 pub fn score_pair(&self, a: char, b: char) -> i32 {
37 if a == b {
38 self.match_score
39 } else {
40 self.mismatch_penalty
41 }
42 }
43 pub fn smith_waterman_description(&self) -> String {
45 format!(
46 "Smith-Waterman: match={}, mismatch={}, gap_open={}, gap_ext={}",
47 self.match_score, self.mismatch_penalty, self.gap_open, self.gap_extend
48 )
49 }
50}
51#[derive(Debug, Clone, Copy, PartialEq)]
53pub enum LatticeMove {
54 Up,
55 Down,
56 Left,
57 Right,
58}
59#[allow(dead_code)]
61#[derive(Debug, Clone)]
62pub struct ProteinStructure {
63 pub pdb_id: String,
64 pub sequence: String,
65 pub secondary: Vec<SecondaryStructure>,
66 pub resolution_angstrom: f64,
67}
68#[allow(dead_code)]
69impl ProteinStructure {
70 pub fn new(pdb_id: &str, seq: &str, ss: Vec<SecondaryStructure>, res: f64) -> Self {
72 Self {
73 pdb_id: pdb_id.to_string(),
74 sequence: seq.to_string(),
75 secondary: ss,
76 resolution_angstrom: res,
77 }
78 }
79 pub fn helix_fraction(&self) -> f64 {
81 if self.secondary.is_empty() {
82 return 0.0;
83 }
84 let n_helix = self
85 .secondary
86 .iter()
87 .filter(|s| **s == SecondaryStructure::AlphaHelix)
88 .count();
89 n_helix as f64 / self.secondary.len() as f64
90 }
91 pub fn is_high_resolution(&self) -> bool {
93 self.resolution_angstrom < 2.0
94 }
95}
96#[derive(Debug, Clone, PartialEq)]
98pub struct BasePair {
99 pub i: usize,
100 pub j: usize,
101}
102#[derive(Debug, Clone)]
104pub struct PhyloBranch {
105 pub from: String,
106 pub to: String,
107 pub length: f64,
108}
109#[derive(Debug, Clone, PartialEq)]
111pub struct BlastHit {
112 pub query_pos: usize,
113 pub subject_pos: usize,
114 pub word: String,
115 pub score: i32,
116}
117#[derive(Debug, Clone)]
120pub struct RNAMFEFolder {
121 pub unpaired_penalty: f64,
123 pub pair_bonus: f64,
125}
126impl RNAMFEFolder {
127 pub fn new() -> Self {
129 RNAMFEFolder {
130 unpaired_penalty: 0.0,
131 pair_bonus: -1.0,
132 }
133 }
134 pub fn with_params(pair_bonus: f64, unpaired_penalty: f64) -> Self {
136 RNAMFEFolder {
137 unpaired_penalty,
138 pair_bonus,
139 }
140 }
141 pub fn fold(&self, sequence: &str) -> (f64, Vec<BasePair>) {
143 let (n_pairs, pairs) = nussinov(sequence);
144 let n_unpaired = sequence.len().saturating_sub(2 * pairs.len());
145 let mfe = n_pairs as f64 * self.pair_bonus + n_unpaired as f64 * self.unpaired_penalty;
146 (mfe, pairs)
147 }
148 pub fn mfe(&self, sequence: &str) -> f64 {
150 self.fold(sequence).0
151 }
152}
153#[derive(Debug, Clone, PartialEq)]
155pub struct Alignment {
156 pub score: i32,
157 pub aligned_a: String,
158 pub aligned_b: String,
159}
160#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162pub struct DeBruijnNode(pub String);
163#[derive(Debug, Clone)]
165pub struct DistMatrix {
166 pub labels: Vec<String>,
167 pub data: Vec<Vec<f64>>,
168}
169impl DistMatrix {
170 pub fn new(labels: Vec<String>, data: Vec<Vec<f64>>) -> Self {
172 DistMatrix { labels, data }
173 }
174 pub fn n(&self) -> usize {
176 self.labels.len()
177 }
178 pub fn get(&self, i: usize, j: usize) -> f64 {
180 self.data[i][j]
181 }
182}
183#[derive(Debug, Clone)]
185pub struct DeBruijnGraph {
186 pub k: usize,
187 pub edges: Vec<DeBruijnEdge>,
188}
189impl DeBruijnGraph {
190 pub fn build(reads: &[&str], k: usize) -> Self {
192 let mut edges = Vec::new();
193 for read in reads {
194 let chars: Vec<char> = read.chars().collect();
195 if chars.len() < k {
196 continue;
197 }
198 for i in 0..=(chars.len() - k) {
199 let kmer: String = chars[i..i + k].iter().collect();
200 let from = DeBruijnNode(chars[i..i + k - 1].iter().collect());
201 let to = DeBruijnNode(chars[i + 1..i + k].iter().collect());
202 edges.push(DeBruijnEdge {
203 from,
204 to,
205 label: kmer,
206 });
207 }
208 }
209 DeBruijnGraph { k, edges }
210 }
211 pub fn in_degree(&self, node: &DeBruijnNode) -> usize {
213 self.edges.iter().filter(|e| &e.to == node).count()
214 }
215 pub fn out_degree(&self, node: &DeBruijnNode) -> usize {
217 self.edges.iter().filter(|e| &e.from == node).count()
218 }
219 pub fn greedy_assemble(&self) -> String {
223 if self.edges.is_empty() {
224 return String::new();
225 }
226 let mut used = vec![false; self.edges.len()];
227 let start = self.edges[0].from.clone();
228 let mut path = vec![start.clone()];
229 let mut current = start;
230 loop {
231 let next_edge = self
232 .edges
233 .iter()
234 .enumerate()
235 .find(|(idx, e)| !used[*idx] && e.from == current);
236 match next_edge {
237 Some((idx, edge)) => {
238 used[idx] = true;
239 current = edge.to.clone();
240 path.push(current.clone());
241 }
242 None => break,
243 }
244 }
245 if path.is_empty() {
246 return String::new();
247 }
248 let mut result = path[0].0.clone();
249 for node in &path[1..] {
250 if let Some(last_char) = node.0.chars().last() {
251 result.push(last_char);
252 }
253 }
254 result
255 }
256}
257#[allow(dead_code)]
259#[derive(Debug, Clone)]
260pub struct GeneExpressionMatrix {
261 pub genes: Vec<String>,
262 pub samples: Vec<String>,
263 pub data: Vec<Vec<f64>>,
264}
265#[allow(dead_code)]
266impl GeneExpressionMatrix {
267 pub fn new(genes: Vec<String>, samples: Vec<String>, data: Vec<Vec<f64>>) -> Self {
269 Self {
270 genes,
271 samples,
272 data,
273 }
274 }
275 pub fn mean_expression(&self, gene_idx: usize) -> f64 {
277 if gene_idx >= self.data.len() || self.data[gene_idx].is_empty() {
278 return 0.0;
279 }
280 let row = &self.data[gene_idx];
281 row.iter().sum::<f64>() / row.len() as f64
282 }
283 pub fn num_genes(&self) -> usize {
285 self.genes.len()
286 }
287 pub fn num_samples(&self) -> usize {
289 self.samples.len()
290 }
291}
292#[derive(Debug, Clone)]
294pub struct NeedlemanWunschGlobal {
295 pub gap_penalty: i32,
296 pub match_score: i32,
297 pub mismatch_score: i32,
298}
299impl NeedlemanWunschGlobal {
300 pub fn new(match_score: i32, mismatch_score: i32, gap_penalty: i32) -> Self {
302 NeedlemanWunschGlobal {
303 gap_penalty,
304 match_score,
305 mismatch_score,
306 }
307 }
308 pub fn align(&self, a: &str, b: &str) -> Alignment {
310 let a_chars: Vec<char> = a.chars().collect();
311 let b_chars: Vec<char> = b.chars().collect();
312 let m = a_chars.len();
313 let n = b_chars.len();
314 let ms = self.match_score;
315 let mm = self.mismatch_score;
316 let gap = self.gap_penalty;
317 let mut dp = vec![vec![0i32; n + 1]; m + 1];
318 for i in 0..=m {
319 dp[i][0] = i as i32 * gap;
320 }
321 for j in 0..=n {
322 dp[0][j] = j as i32 * gap;
323 }
324 for i in 1..=m {
325 for j in 1..=n {
326 let subst = if a_chars[i - 1] == b_chars[j - 1] {
327 ms
328 } else {
329 mm
330 };
331 let mat = dp[i - 1][j - 1] + subst;
332 let del = dp[i - 1][j] + gap;
333 let ins = dp[i][j - 1] + gap;
334 dp[i][j] = mat.max(del).max(ins);
335 }
336 }
337 let mut aligned_a = String::new();
338 let mut aligned_b = String::new();
339 let mut i = m;
340 let mut j = n;
341 while i > 0 || j > 0 {
342 if i > 0 && j > 0 {
343 let subst = if a_chars[i - 1] == b_chars[j - 1] {
344 ms
345 } else {
346 mm
347 };
348 if dp[i][j] == dp[i - 1][j - 1] + subst {
349 aligned_a.push(a_chars[i - 1]);
350 aligned_b.push(b_chars[j - 1]);
351 i -= 1;
352 j -= 1;
353 continue;
354 }
355 }
356 if i > 0 && dp[i][j] == dp[i - 1][j] + gap {
357 aligned_a.push(a_chars[i - 1]);
358 aligned_b.push('-');
359 i -= 1;
360 } else {
361 aligned_a.push('-');
362 aligned_b.push(b_chars[j - 1]);
363 j -= 1;
364 }
365 }
366 let aligned_a: String = aligned_a.chars().rev().collect();
367 let aligned_b: String = aligned_b.chars().rev().collect();
368 Alignment {
369 score: dp[m][n],
370 aligned_a,
371 aligned_b,
372 }
373 }
374 pub fn identity(&self, a: &str, b: &str) -> f64 {
376 let aln = self.align(a, b);
377 let len = aln.aligned_a.len().max(1);
378 let matches = aln
379 .aligned_a
380 .chars()
381 .zip(aln.aligned_b.chars())
382 .filter(|(ca, cb)| ca == cb && *ca != '-')
383 .count();
384 matches as f64 / len as f64
385 }
386}
387#[allow(dead_code)]
389#[derive(Debug, Clone, PartialEq)]
390pub enum SecondaryStructure {
391 AlphaHelix,
392 BetaStrand,
393 Loop,
394 Turn,
395}
396#[allow(dead_code)]
397impl SecondaryStructure {
398 pub fn code(&self) -> char {
400 match self {
401 Self::AlphaHelix => 'H',
402 Self::BetaStrand => 'E',
403 Self::Loop => 'C',
404 Self::Turn => 'T',
405 }
406 }
407}
408#[derive(Debug, Clone, PartialEq)]
410pub struct DeBruijnEdge {
411 pub from: DeBruijnNode,
412 pub to: DeBruijnNode,
413 pub label: String,
414}
415#[derive(Debug, Clone)]
417pub struct DeBruijnAssembler {
418 pub k: usize,
419}
420impl DeBruijnAssembler {
421 pub fn new(k: usize) -> Self {
423 DeBruijnAssembler { k }
424 }
425 pub fn assemble(&self, reads: &[&str]) -> Vec<String> {
427 let graph = DeBruijnGraph::build(reads, self.k);
428 let contig = graph.greedy_assemble();
429 if contig.is_empty() {
430 vec![]
431 } else {
432 vec![contig]
433 }
434 }
435 pub fn count_kmers(&self, reads: &[&str]) -> usize {
437 let graph = DeBruijnGraph::build(reads, self.k);
438 let mut labels: std::collections::HashSet<String> = std::collections::HashSet::new();
439 for edge in &graph.edges {
440 labels.insert(edge.label.clone());
441 }
442 labels.len()
443 }
444}
445#[derive(Debug, Clone)]
447pub struct SmithWatermanAligner {
448 pub gap_penalty: i32,
449 pub match_score: i32,
450 pub mismatch_score: i32,
451}
452impl SmithWatermanAligner {
453 pub fn new(match_score: i32, mismatch_score: i32, gap_penalty: i32) -> Self {
455 SmithWatermanAligner {
456 gap_penalty,
457 match_score,
458 mismatch_score,
459 }
460 }
461 pub fn align(&self, a: &str, b: &str) -> Alignment {
463 let a_chars: Vec<char> = a.chars().collect();
464 let b_chars: Vec<char> = b.chars().collect();
465 let m = a_chars.len();
466 let n = b_chars.len();
467 let ms = self.match_score;
468 let mm = self.mismatch_score;
469 let gap = self.gap_penalty;
470 let mut dp = vec![vec![0i32; n + 1]; m + 1];
471 let mut best_score = 0i32;
472 let mut best_i = 0usize;
473 let mut best_j = 0usize;
474 for i in 1..=m {
475 for j in 1..=n {
476 let subst = if a_chars[i - 1] == b_chars[j - 1] {
477 ms
478 } else {
479 mm
480 };
481 let mat = dp[i - 1][j - 1] + subst;
482 let del = dp[i - 1][j] + gap;
483 let ins = dp[i][j - 1] + gap;
484 dp[i][j] = 0i32.max(mat).max(del).max(ins);
485 if dp[i][j] > best_score {
486 best_score = dp[i][j];
487 best_i = i;
488 best_j = j;
489 }
490 }
491 }
492 let mut aligned_a = String::new();
493 let mut aligned_b = String::new();
494 let mut i = best_i;
495 let mut j = best_j;
496 while i > 0 && j > 0 && dp[i][j] > 0 {
497 let subst = if a_chars[i - 1] == b_chars[j - 1] {
498 ms
499 } else {
500 mm
501 };
502 if dp[i][j] == dp[i - 1][j - 1] + subst {
503 aligned_a.push(a_chars[i - 1]);
504 aligned_b.push(b_chars[j - 1]);
505 i -= 1;
506 j -= 1;
507 } else if dp[i][j] == dp[i - 1][j] + gap {
508 aligned_a.push(a_chars[i - 1]);
509 aligned_b.push('-');
510 i -= 1;
511 } else {
512 aligned_a.push('-');
513 aligned_b.push(b_chars[j - 1]);
514 j -= 1;
515 }
516 }
517 let aligned_a: String = aligned_a.chars().rev().collect();
518 let aligned_b: String = aligned_b.chars().rev().collect();
519 Alignment {
520 score: best_score,
521 aligned_a,
522 aligned_b,
523 }
524 }
525 pub fn score_only(&self, a: &str, b: &str) -> i32 {
527 self.align(a, b).score
528 }
529}
530#[derive(Debug, Clone)]
532pub struct PhyloTree {
533 pub branches: Vec<PhyloBranch>,
534}
535impl PhyloTree {
536 pub fn neighbor_joining(mut dist: DistMatrix) -> PhyloTree {
538 let mut branches = Vec::new();
539 let mut active: Vec<usize> = (0..dist.n()).collect();
540 let mut node_count = dist.n();
541 while active.len() > 2 {
542 let n = active.len();
543 let r: Vec<f64> = active
544 .iter()
545 .map(|&i| active.iter().map(|&j| dist.get(i, j)).sum::<f64>())
546 .collect();
547 let mut min_q = f64::INFINITY;
548 let mut min_pair = (0, 1);
549 for ai in 0..n {
550 for aj in (ai + 1)..n {
551 let i = active[ai];
552 let j = active[aj];
553 let q = (n as f64 - 2.0) * dist.get(i, j) - r[ai] - r[aj];
554 if q < min_q {
555 min_q = q;
556 min_pair = (ai, aj);
557 }
558 }
559 }
560 let (ai, aj) = min_pair;
561 let i = active[ai];
562 let j = active[aj];
563 let n_f = n as f64;
564 let d_ij = dist.get(i, j);
565 let len_i = 0.5 * d_ij + (r[ai] - r[aj]) / (2.0 * (n_f - 2.0));
566 let len_j = d_ij - len_i;
567 let new_label = format!("node{}", node_count);
568 node_count += 1;
569 branches.push(PhyloBranch {
570 from: new_label.clone(),
571 to: dist.labels[i].clone(),
572 length: len_i.max(0.0),
573 });
574 branches.push(PhyloBranch {
575 from: new_label.clone(),
576 to: dist.labels[j].clone(),
577 length: len_j.max(0.0),
578 });
579 let remaining: Vec<usize> = active
580 .iter()
581 .enumerate()
582 .filter(|&(idx, _)| idx != ai && idx != aj)
583 .map(|(_, &k)| k)
584 .collect();
585 let new_idx = dist.data.len();
586 let mut new_row = vec![0.0f64; new_idx + 1];
587 for &k in &remaining {
588 let d_new_k = 0.5 * (dist.get(i, k) + dist.get(j, k) - d_ij);
589 new_row[k] = d_new_k.max(0.0);
590 dist.data[k].push(d_new_k.max(0.0));
591 }
592 dist.data.push(new_row);
593 dist.labels.push(new_label);
594 active.retain(|&x| x != i && x != j);
595 active.push(new_idx);
596 }
597 if active.len() == 2 {
598 let i = active[0];
599 let j = active[1];
600 branches.push(PhyloBranch {
601 from: dist.labels[i].clone(),
602 to: dist.labels[j].clone(),
603 length: dist.get(i, j),
604 });
605 }
606 PhyloTree { branches }
607 }
608}
609#[derive(Debug, Clone)]
613pub struct PhylogeneticParsimony;
614impl PhylogeneticParsimony {
615 pub fn new() -> Self {
617 PhylogeneticParsimony
618 }
619 pub fn score(&self, tree: &ParsimonyTree) -> (usize, std::collections::HashSet<char>) {
622 fitch_parsimony_score(tree)
623 }
624 pub fn build_balanced(leaves: &[char]) -> ParsimonyTree {
626 match leaves.len() {
627 0 => ParsimonyTree::Leaf('?'),
628 1 => ParsimonyTree::Leaf(leaves[0]),
629 _ => {
630 let mid = leaves.len() / 2;
631 let left = Self::build_balanced(&leaves[..mid]);
632 let right = Self::build_balanced(&leaves[mid..]);
633 ParsimonyTree::Internal(Box::new(left), Box::new(right))
634 }
635 }
636 }
637 pub fn score_leaves(&self, leaves: &[char]) -> usize {
640 let tree = Self::build_balanced(leaves);
641 self.score(&tree).0
642 }
643}
644#[derive(Debug, Clone)]
646pub struct HiddenMarkovModel {
647 pub n_states: usize,
648 pub n_symbols: usize,
649 pub initial: Vec<f64>,
650 pub transition: Vec<Vec<f64>>,
651 pub emission: Vec<Vec<f64>>,
652}
653impl HiddenMarkovModel {
654 pub fn uniform(n_states: usize, n_symbols: usize) -> Self {
656 let p_state = 1.0 / n_states as f64;
657 let p_sym = 1.0 / n_symbols as f64;
658 HiddenMarkovModel {
659 n_states,
660 n_symbols,
661 initial: vec![p_state; n_states],
662 transition: vec![vec![p_state; n_states]; n_states],
663 emission: vec![vec![p_sym; n_symbols]; n_states],
664 }
665 }
666 pub fn viterbi(&self, observations: &[usize]) -> Vec<usize> {
670 let t = observations.len();
671 if t == 0 {
672 return vec![];
673 }
674 let n = self.n_states;
675 let mut dp = vec![vec![f64::NEG_INFINITY; n]; t];
676 let mut backtrack = vec![vec![0usize; n]; t];
677 for s in 0..n {
678 let obs = observations[0];
679 if obs < self.n_symbols {
680 dp[0][s] = self.initial[s].ln() + self.emission[s][obs].ln();
681 }
682 }
683 for ti in 1..t {
684 let obs = observations[ti];
685 for s in 0..n {
686 let emit = if obs < self.n_symbols {
687 self.emission[s][obs].ln()
688 } else {
689 f64::NEG_INFINITY
690 };
691 let (best_prev, best_val) = (0..n)
692 .map(|prev| {
693 let v = dp[ti - 1][prev] + self.transition[prev][s].ln();
694 (prev, v)
695 })
696 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
697 .unwrap_or((0, f64::NEG_INFINITY));
698 dp[ti][s] = best_val + emit;
699 backtrack[ti][s] = best_prev;
700 }
701 }
702 let mut path = vec![0usize; t];
703 path[t - 1] = (0..n)
704 .max_by(|&a, &b| {
705 dp[t - 1][a]
706 .partial_cmp(&dp[t - 1][b])
707 .unwrap_or(std::cmp::Ordering::Equal)
708 })
709 .unwrap_or(0);
710 for ti in (1..t).rev() {
711 path[ti - 1] = backtrack[ti][path[ti]];
712 }
713 path
714 }
715 pub fn forward_probability(&self, observations: &[usize]) -> f64 {
717 let t = observations.len();
718 if t == 0 {
719 return 1.0;
720 }
721 let n = self.n_states;
722 let mut alpha = vec![0.0f64; n];
723 for s in 0..n {
724 let obs = observations[0];
725 if obs < self.n_symbols {
726 alpha[s] = self.initial[s] * self.emission[s][obs];
727 }
728 }
729 for ti in 1..t {
730 let obs = observations[ti];
731 let mut alpha_new = vec![0.0f64; n];
732 for s in 0..n {
733 let emit = if obs < self.n_symbols {
734 self.emission[s][obs]
735 } else {
736 0.0
737 };
738 alpha_new[s] = (0..n)
739 .map(|prev| alpha[prev] * self.transition[prev][s])
740 .sum::<f64>()
741 * emit;
742 }
743 alpha = alpha_new;
744 }
745 alpha.iter().sum()
746 }
747}
748#[derive(Debug, Clone)]
750pub enum ParsimonyTree {
751 Leaf(char),
752 Internal(Box<ParsimonyTree>, Box<ParsimonyTree>),
753}
754#[derive(Debug, Clone, Copy, PartialEq)]
756pub enum HPResidue {
757 H,
758 P,
759}
760#[allow(dead_code)]
762#[derive(Debug, Clone)]
763pub struct RegulatoryNode {
764 pub gene_name: String,
765 pub is_transcription_factor: bool,
766 pub targets: Vec<String>,
767}
768#[allow(dead_code)]
769impl RegulatoryNode {
770 pub fn new(name: &str, is_tf: bool) -> Self {
772 Self {
773 gene_name: name.to_string(),
774 is_transcription_factor: is_tf,
775 targets: Vec::new(),
776 }
777 }
778 pub fn add_target(&mut self, target: &str) {
780 self.targets.push(target.to_string());
781 }
782 pub fn out_degree(&self) -> usize {
784 self.targets.len()
785 }
786 pub fn description(&self) -> String {
788 if self.is_transcription_factor {
789 format!(
790 "TF {} regulates {} genes",
791 self.gene_name,
792 self.targets.len()
793 )
794 } else {
795 format!("Gene {} (non-TF)", self.gene_name)
796 }
797 }
798}
799#[allow(dead_code)]
801#[derive(Debug, Clone)]
802pub struct SequenceHmm {
803 pub name: String,
804 pub num_states: usize,
805 pub alphabet_size: usize,
806}
807#[allow(dead_code)]
808impl SequenceHmm {
809 pub fn profile(name: &str, length: usize) -> Self {
811 Self {
812 name: name.to_string(),
813 num_states: 3 * length,
814 alphabet_size: 20,
815 }
816 }
817 pub fn cpg_island_detector() -> Self {
819 Self {
820 name: "CpG-HMM".to_string(),
821 num_states: 2,
822 alphabet_size: 4,
823 }
824 }
825 pub fn viterbi_description(&self) -> String {
827 format!("Viterbi on {} ({} states)", self.name, self.num_states)
828 }
829}