oxilean_std/bioinformatics/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4use super::functions::*;
5
6/// Sequence alignment scoring.
7#[allow(dead_code)]
8#[derive(Debug, Clone)]
9pub struct AlignmentScorer {
10    pub match_score: i32,
11    pub mismatch_penalty: i32,
12    pub gap_open: i32,
13    pub gap_extend: i32,
14}
15#[allow(dead_code)]
16impl AlignmentScorer {
17    /// Default scoring scheme.
18    pub fn default_scorer() -> Self {
19        Self {
20            match_score: 2,
21            mismatch_penalty: -1,
22            gap_open: -4,
23            gap_extend: -1,
24        }
25    }
26    /// BLOSUM62-like scoring.
27    pub fn blosum62_approx() -> Self {
28        Self {
29            match_score: 4,
30            mismatch_penalty: -1,
31            gap_open: -11,
32            gap_extend: -1,
33        }
34    }
35    /// Score two characters.
36    pub fn score_pair(&self, a: char, b: char) -> i32 {
37        if a == b {
38            self.match_score
39        } else {
40            self.mismatch_penalty
41        }
42    }
43    /// Smith-Waterman local alignment description.
44    pub fn smith_waterman_description(&self) -> String {
45        format!(
46            "Smith-Waterman: match={}, mismatch={}, gap_open={}, gap_ext={}",
47            self.match_score, self.mismatch_penalty, self.gap_open, self.gap_extend
48        )
49    }
50}
51/// A 2D lattice conformation as a sequence of moves.
52#[derive(Debug, Clone, Copy, PartialEq)]
53pub enum LatticeMove {
54    Up,
55    Down,
56    Left,
57    Right,
58}
59/// Protein structure data.
60#[allow(dead_code)]
61#[derive(Debug, Clone)]
62pub struct ProteinStructure {
63    pub pdb_id: String,
64    pub sequence: String,
65    pub secondary: Vec<SecondaryStructure>,
66    pub resolution_angstrom: f64,
67}
68#[allow(dead_code)]
69impl ProteinStructure {
70    /// Create protein structure record.
71    pub fn new(pdb_id: &str, seq: &str, ss: Vec<SecondaryStructure>, res: f64) -> Self {
72        Self {
73            pdb_id: pdb_id.to_string(),
74            sequence: seq.to_string(),
75            secondary: ss,
76            resolution_angstrom: res,
77        }
78    }
79    /// Fraction of helix.
80    pub fn helix_fraction(&self) -> f64 {
81        if self.secondary.is_empty() {
82            return 0.0;
83        }
84        let n_helix = self
85            .secondary
86            .iter()
87            .filter(|s| **s == SecondaryStructure::AlphaHelix)
88            .count();
89        n_helix as f64 / self.secondary.len() as f64
90    }
91    /// High resolution?
92    pub fn is_high_resolution(&self) -> bool {
93        self.resolution_angstrom < 2.0
94    }
95}
96/// A base pair in an RNA secondary structure.
97#[derive(Debug, Clone, PartialEq)]
98pub struct BasePair {
99    pub i: usize,
100    pub j: usize,
101}
102/// A branch in a phylogenetic tree.
103#[derive(Debug, Clone)]
104pub struct PhyloBranch {
105    pub from: String,
106    pub to: String,
107    pub length: f64,
108}
109/// A BLAST hit: position in query and subject, and the matching word.
110#[derive(Debug, Clone, PartialEq)]
111pub struct BlastHit {
112    pub query_pos: usize,
113    pub subject_pos: usize,
114    pub word: String,
115    pub score: i32,
116}
117/// An RNA secondary structure folder using the Nussinov maximum base-pair algorithm
118/// as a proxy for minimum free energy (MFE) folding.
119#[derive(Debug, Clone)]
120pub struct RNAMFEFolder {
121    /// Penalty per unpaired base (negative = energetically unfavourable).
122    pub unpaired_penalty: f64,
123    /// Energy bonus per base pair.
124    pub pair_bonus: f64,
125}
126impl RNAMFEFolder {
127    /// Create an `RNAMFEFolder` with default Turner-like parameters.
128    pub fn new() -> Self {
129        RNAMFEFolder {
130            unpaired_penalty: 0.0,
131            pair_bonus: -1.0,
132        }
133    }
134    /// Create an `RNAMFEFolder` with custom parameters.
135    pub fn with_params(pair_bonus: f64, unpaired_penalty: f64) -> Self {
136        RNAMFEFolder {
137            unpaired_penalty,
138            pair_bonus,
139        }
140    }
141    /// Fold an RNA sequence; returns (approximate MFE, base-pair list).
142    pub fn fold(&self, sequence: &str) -> (f64, Vec<BasePair>) {
143        let (n_pairs, pairs) = nussinov(sequence);
144        let n_unpaired = sequence.len().saturating_sub(2 * pairs.len());
145        let mfe = n_pairs as f64 * self.pair_bonus + n_unpaired as f64 * self.unpaired_penalty;
146        (mfe, pairs)
147    }
148    /// Return only the MFE estimate.
149    pub fn mfe(&self, sequence: &str) -> f64 {
150        self.fold(sequence).0
151    }
152}
153/// Alignment result containing score and aligned sequences.
154#[derive(Debug, Clone, PartialEq)]
155pub struct Alignment {
156    pub score: i32,
157    pub aligned_a: String,
158    pub aligned_b: String,
159}
160/// A De Bruijn graph node is a k-1-mer.
161#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162pub struct DeBruijnNode(pub String);
163/// A distance matrix for phylogenetic reconstruction.
164#[derive(Debug, Clone)]
165pub struct DistMatrix {
166    pub labels: Vec<String>,
167    pub data: Vec<Vec<f64>>,
168}
169impl DistMatrix {
170    /// Create a new distance matrix.
171    pub fn new(labels: Vec<String>, data: Vec<Vec<f64>>) -> Self {
172        DistMatrix { labels, data }
173    }
174    /// Number of taxa.
175    pub fn n(&self) -> usize {
176        self.labels.len()
177    }
178    /// Get distance between taxa i and j.
179    pub fn get(&self, i: usize, j: usize) -> f64 {
180        self.data[i][j]
181    }
182}
183/// De Bruijn graph constructed from reads.
184#[derive(Debug, Clone)]
185pub struct DeBruijnGraph {
186    pub k: usize,
187    pub edges: Vec<DeBruijnEdge>,
188}
189impl DeBruijnGraph {
190    /// Build a De Bruijn graph of order k from a list of sequence reads.
191    pub fn build(reads: &[&str], k: usize) -> Self {
192        let mut edges = Vec::new();
193        for read in reads {
194            let chars: Vec<char> = read.chars().collect();
195            if chars.len() < k {
196                continue;
197            }
198            for i in 0..=(chars.len() - k) {
199                let kmer: String = chars[i..i + k].iter().collect();
200                let from = DeBruijnNode(chars[i..i + k - 1].iter().collect());
201                let to = DeBruijnNode(chars[i + 1..i + k].iter().collect());
202                edges.push(DeBruijnEdge {
203                    from,
204                    to,
205                    label: kmer,
206                });
207            }
208        }
209        DeBruijnGraph { k, edges }
210    }
211    /// Count in-degree of a node.
212    pub fn in_degree(&self, node: &DeBruijnNode) -> usize {
213        self.edges.iter().filter(|e| &e.to == node).count()
214    }
215    /// Count out-degree of a node.
216    pub fn out_degree(&self, node: &DeBruijnNode) -> usize {
217        self.edges.iter().filter(|e| &e.from == node).count()
218    }
219    /// Attempt a greedy Eulerian path (genome assembly).
220    ///
221    /// Returns assembled sequence if a path exists, otherwise partial result.
222    pub fn greedy_assemble(&self) -> String {
223        if self.edges.is_empty() {
224            return String::new();
225        }
226        let mut used = vec![false; self.edges.len()];
227        let start = self.edges[0].from.clone();
228        let mut path = vec![start.clone()];
229        let mut current = start;
230        loop {
231            let next_edge = self
232                .edges
233                .iter()
234                .enumerate()
235                .find(|(idx, e)| !used[*idx] && e.from == current);
236            match next_edge {
237                Some((idx, edge)) => {
238                    used[idx] = true;
239                    current = edge.to.clone();
240                    path.push(current.clone());
241                }
242                None => break,
243            }
244        }
245        if path.is_empty() {
246            return String::new();
247        }
248        let mut result = path[0].0.clone();
249        for node in &path[1..] {
250            if let Some(last_char) = node.0.chars().last() {
251                result.push(last_char);
252            }
253        }
254        result
255    }
256}
257/// Gene expression matrix (simplified).
258#[allow(dead_code)]
259#[derive(Debug, Clone)]
260pub struct GeneExpressionMatrix {
261    pub genes: Vec<String>,
262    pub samples: Vec<String>,
263    pub data: Vec<Vec<f64>>,
264}
265#[allow(dead_code)]
266impl GeneExpressionMatrix {
267    /// Create a gene expression matrix.
268    pub fn new(genes: Vec<String>, samples: Vec<String>, data: Vec<Vec<f64>>) -> Self {
269        Self {
270            genes,
271            samples,
272            data,
273        }
274    }
275    /// Compute mean expression of gene at index i.
276    pub fn mean_expression(&self, gene_idx: usize) -> f64 {
277        if gene_idx >= self.data.len() || self.data[gene_idx].is_empty() {
278            return 0.0;
279        }
280        let row = &self.data[gene_idx];
281        row.iter().sum::<f64>() / row.len() as f64
282    }
283    /// Number of genes.
284    pub fn num_genes(&self) -> usize {
285        self.genes.len()
286    }
287    /// Number of samples.
288    pub fn num_samples(&self) -> usize {
289        self.samples.len()
290    }
291}
292/// A global sequence aligner using the Needleman-Wunsch algorithm.
293#[derive(Debug, Clone)]
294pub struct NeedlemanWunschGlobal {
295    pub gap_penalty: i32,
296    pub match_score: i32,
297    pub mismatch_score: i32,
298}
299impl NeedlemanWunschGlobal {
300    /// Create a new global aligner.
301    pub fn new(match_score: i32, mismatch_score: i32, gap_penalty: i32) -> Self {
302        NeedlemanWunschGlobal {
303            gap_penalty,
304            match_score,
305            mismatch_score,
306        }
307    }
308    /// Globally align two sequences and return an `Alignment`.
309    pub fn align(&self, a: &str, b: &str) -> Alignment {
310        let a_chars: Vec<char> = a.chars().collect();
311        let b_chars: Vec<char> = b.chars().collect();
312        let m = a_chars.len();
313        let n = b_chars.len();
314        let ms = self.match_score;
315        let mm = self.mismatch_score;
316        let gap = self.gap_penalty;
317        let mut dp = vec![vec![0i32; n + 1]; m + 1];
318        for i in 0..=m {
319            dp[i][0] = i as i32 * gap;
320        }
321        for j in 0..=n {
322            dp[0][j] = j as i32 * gap;
323        }
324        for i in 1..=m {
325            for j in 1..=n {
326                let subst = if a_chars[i - 1] == b_chars[j - 1] {
327                    ms
328                } else {
329                    mm
330                };
331                let mat = dp[i - 1][j - 1] + subst;
332                let del = dp[i - 1][j] + gap;
333                let ins = dp[i][j - 1] + gap;
334                dp[i][j] = mat.max(del).max(ins);
335            }
336        }
337        let mut aligned_a = String::new();
338        let mut aligned_b = String::new();
339        let mut i = m;
340        let mut j = n;
341        while i > 0 || j > 0 {
342            if i > 0 && j > 0 {
343                let subst = if a_chars[i - 1] == b_chars[j - 1] {
344                    ms
345                } else {
346                    mm
347                };
348                if dp[i][j] == dp[i - 1][j - 1] + subst {
349                    aligned_a.push(a_chars[i - 1]);
350                    aligned_b.push(b_chars[j - 1]);
351                    i -= 1;
352                    j -= 1;
353                    continue;
354                }
355            }
356            if i > 0 && dp[i][j] == dp[i - 1][j] + gap {
357                aligned_a.push(a_chars[i - 1]);
358                aligned_b.push('-');
359                i -= 1;
360            } else {
361                aligned_a.push('-');
362                aligned_b.push(b_chars[j - 1]);
363                j -= 1;
364            }
365        }
366        let aligned_a: String = aligned_a.chars().rev().collect();
367        let aligned_b: String = aligned_b.chars().rev().collect();
368        Alignment {
369            score: dp[m][n],
370            aligned_a,
371            aligned_b,
372        }
373    }
374    /// Compute the alignment identity fraction.
375    pub fn identity(&self, a: &str, b: &str) -> f64 {
376        let aln = self.align(a, b);
377        let len = aln.aligned_a.len().max(1);
378        let matches = aln
379            .aligned_a
380            .chars()
381            .zip(aln.aligned_b.chars())
382            .filter(|(ca, cb)| ca == cb && *ca != '-')
383            .count();
384        matches as f64 / len as f64
385    }
386}
387/// Protein secondary structure element.
388#[allow(dead_code)]
389#[derive(Debug, Clone, PartialEq)]
390pub enum SecondaryStructure {
391    AlphaHelix,
392    BetaStrand,
393    Loop,
394    Turn,
395}
396#[allow(dead_code)]
397impl SecondaryStructure {
398    /// One-letter code.
399    pub fn code(&self) -> char {
400        match self {
401            Self::AlphaHelix => 'H',
402            Self::BetaStrand => 'E',
403            Self::Loop => 'C',
404            Self::Turn => 'T',
405        }
406    }
407}
408/// A directed edge in the De Bruijn graph.
409#[derive(Debug, Clone, PartialEq)]
410pub struct DeBruijnEdge {
411    pub from: DeBruijnNode,
412    pub to: DeBruijnNode,
413    pub label: String,
414}
415/// A genome assembler based on de Bruijn graphs.
416#[derive(Debug, Clone)]
417pub struct DeBruijnAssembler {
418    pub k: usize,
419}
420impl DeBruijnAssembler {
421    /// Create a new assembler with k-mer length `k`.
422    pub fn new(k: usize) -> Self {
423        DeBruijnAssembler { k }
424    }
425    /// Assemble reads into contigs using the de Bruijn graph approach.
426    pub fn assemble(&self, reads: &[&str]) -> Vec<String> {
427        let graph = DeBruijnGraph::build(reads, self.k);
428        let contig = graph.greedy_assemble();
429        if contig.is_empty() {
430            vec![]
431        } else {
432            vec![contig]
433        }
434    }
435    /// Return the number of distinct k-mers across all reads.
436    pub fn count_kmers(&self, reads: &[&str]) -> usize {
437        let graph = DeBruijnGraph::build(reads, self.k);
438        let mut labels: std::collections::HashSet<String> = std::collections::HashSet::new();
439        for edge in &graph.edges {
440            labels.insert(edge.label.clone());
441        }
442        labels.len()
443    }
444}
445/// A reusable Smith-Waterman local aligner.
446#[derive(Debug, Clone)]
447pub struct SmithWatermanAligner {
448    pub gap_penalty: i32,
449    pub match_score: i32,
450    pub mismatch_score: i32,
451}
452impl SmithWatermanAligner {
453    /// Create a new `SmithWatermanAligner` with given scoring parameters.
454    pub fn new(match_score: i32, mismatch_score: i32, gap_penalty: i32) -> Self {
455        SmithWatermanAligner {
456            gap_penalty,
457            match_score,
458            mismatch_score,
459        }
460    }
461    /// Align two sequences and return the best local alignment.
462    pub fn align(&self, a: &str, b: &str) -> Alignment {
463        let a_chars: Vec<char> = a.chars().collect();
464        let b_chars: Vec<char> = b.chars().collect();
465        let m = a_chars.len();
466        let n = b_chars.len();
467        let ms = self.match_score;
468        let mm = self.mismatch_score;
469        let gap = self.gap_penalty;
470        let mut dp = vec![vec![0i32; n + 1]; m + 1];
471        let mut best_score = 0i32;
472        let mut best_i = 0usize;
473        let mut best_j = 0usize;
474        for i in 1..=m {
475            for j in 1..=n {
476                let subst = if a_chars[i - 1] == b_chars[j - 1] {
477                    ms
478                } else {
479                    mm
480                };
481                let mat = dp[i - 1][j - 1] + subst;
482                let del = dp[i - 1][j] + gap;
483                let ins = dp[i][j - 1] + gap;
484                dp[i][j] = 0i32.max(mat).max(del).max(ins);
485                if dp[i][j] > best_score {
486                    best_score = dp[i][j];
487                    best_i = i;
488                    best_j = j;
489                }
490            }
491        }
492        let mut aligned_a = String::new();
493        let mut aligned_b = String::new();
494        let mut i = best_i;
495        let mut j = best_j;
496        while i > 0 && j > 0 && dp[i][j] > 0 {
497            let subst = if a_chars[i - 1] == b_chars[j - 1] {
498                ms
499            } else {
500                mm
501            };
502            if dp[i][j] == dp[i - 1][j - 1] + subst {
503                aligned_a.push(a_chars[i - 1]);
504                aligned_b.push(b_chars[j - 1]);
505                i -= 1;
506                j -= 1;
507            } else if dp[i][j] == dp[i - 1][j] + gap {
508                aligned_a.push(a_chars[i - 1]);
509                aligned_b.push('-');
510                i -= 1;
511            } else {
512                aligned_a.push('-');
513                aligned_b.push(b_chars[j - 1]);
514                j -= 1;
515            }
516        }
517        let aligned_a: String = aligned_a.chars().rev().collect();
518        let aligned_b: String = aligned_b.chars().rev().collect();
519        Alignment {
520            score: best_score,
521            aligned_a,
522            aligned_b,
523        }
524    }
525    /// Compute only the best local alignment score (no traceback).
526    pub fn score_only(&self, a: &str, b: &str) -> i32 {
527        self.align(a, b).score
528    }
529}
530/// A phylogenetic tree as a list of branches.
531#[derive(Debug, Clone)]
532pub struct PhyloTree {
533    pub branches: Vec<PhyloBranch>,
534}
535impl PhyloTree {
536    /// Neighbor-joining algorithm for phylogenetic tree reconstruction.
537    pub fn neighbor_joining(mut dist: DistMatrix) -> PhyloTree {
538        let mut branches = Vec::new();
539        let mut active: Vec<usize> = (0..dist.n()).collect();
540        let mut node_count = dist.n();
541        while active.len() > 2 {
542            let n = active.len();
543            let r: Vec<f64> = active
544                .iter()
545                .map(|&i| active.iter().map(|&j| dist.get(i, j)).sum::<f64>())
546                .collect();
547            let mut min_q = f64::INFINITY;
548            let mut min_pair = (0, 1);
549            for ai in 0..n {
550                for aj in (ai + 1)..n {
551                    let i = active[ai];
552                    let j = active[aj];
553                    let q = (n as f64 - 2.0) * dist.get(i, j) - r[ai] - r[aj];
554                    if q < min_q {
555                        min_q = q;
556                        min_pair = (ai, aj);
557                    }
558                }
559            }
560            let (ai, aj) = min_pair;
561            let i = active[ai];
562            let j = active[aj];
563            let n_f = n as f64;
564            let d_ij = dist.get(i, j);
565            let len_i = 0.5 * d_ij + (r[ai] - r[aj]) / (2.0 * (n_f - 2.0));
566            let len_j = d_ij - len_i;
567            let new_label = format!("node{}", node_count);
568            node_count += 1;
569            branches.push(PhyloBranch {
570                from: new_label.clone(),
571                to: dist.labels[i].clone(),
572                length: len_i.max(0.0),
573            });
574            branches.push(PhyloBranch {
575                from: new_label.clone(),
576                to: dist.labels[j].clone(),
577                length: len_j.max(0.0),
578            });
579            let remaining: Vec<usize> = active
580                .iter()
581                .enumerate()
582                .filter(|&(idx, _)| idx != ai && idx != aj)
583                .map(|(_, &k)| k)
584                .collect();
585            let new_idx = dist.data.len();
586            let mut new_row = vec![0.0f64; new_idx + 1];
587            for &k in &remaining {
588                let d_new_k = 0.5 * (dist.get(i, k) + dist.get(j, k) - d_ij);
589                new_row[k] = d_new_k.max(0.0);
590                dist.data[k].push(d_new_k.max(0.0));
591            }
592            dist.data.push(new_row);
593            dist.labels.push(new_label);
594            active.retain(|&x| x != i && x != j);
595            active.push(new_idx);
596        }
597        if active.len() == 2 {
598            let i = active[0];
599            let j = active[1];
600            branches.push(PhyloBranch {
601                from: dist.labels[i].clone(),
602                to: dist.labels[j].clone(),
603                length: dist.get(i, j),
604            });
605        }
606        PhyloTree { branches }
607    }
608}
609/// Fitch-parsimony scorer for phylogenetic trees.
610///
611/// Wraps the recursive `fitch_parsimony_score` and adds convenience helpers.
612#[derive(Debug, Clone)]
613pub struct PhylogeneticParsimony;
614impl PhylogeneticParsimony {
615    /// Create a new parsimony scorer.
616    pub fn new() -> Self {
617        PhylogeneticParsimony
618    }
619    /// Score a `ParsimonyTree` using the Fitch algorithm.
620    /// Returns (number_of_mutations, ancestral_character_set).
621    pub fn score(&self, tree: &ParsimonyTree) -> (usize, std::collections::HashSet<char>) {
622        fitch_parsimony_score(tree)
623    }
624    /// Build a balanced binary tree from a slice of leaf characters.
625    pub fn build_balanced(leaves: &[char]) -> ParsimonyTree {
626        match leaves.len() {
627            0 => ParsimonyTree::Leaf('?'),
628            1 => ParsimonyTree::Leaf(leaves[0]),
629            _ => {
630                let mid = leaves.len() / 2;
631                let left = Self::build_balanced(&leaves[..mid]);
632                let right = Self::build_balanced(&leaves[mid..]);
633                ParsimonyTree::Internal(Box::new(left), Box::new(right))
634            }
635        }
636    }
637    /// Compute the parsimony score for a flat slice of leaf characters
638    /// arranged in a balanced binary tree.
639    pub fn score_leaves(&self, leaves: &[char]) -> usize {
640        let tree = Self::build_balanced(leaves);
641        self.score(&tree).0
642    }
643}
644/// A discrete hidden Markov model.
645#[derive(Debug, Clone)]
646pub struct HiddenMarkovModel {
647    pub n_states: usize,
648    pub n_symbols: usize,
649    pub initial: Vec<f64>,
650    pub transition: Vec<Vec<f64>>,
651    pub emission: Vec<Vec<f64>>,
652}
653impl HiddenMarkovModel {
654    /// Create an HMM with uniform distributions.
655    pub fn uniform(n_states: usize, n_symbols: usize) -> Self {
656        let p_state = 1.0 / n_states as f64;
657        let p_sym = 1.0 / n_symbols as f64;
658        HiddenMarkovModel {
659            n_states,
660            n_symbols,
661            initial: vec![p_state; n_states],
662            transition: vec![vec![p_state; n_states]; n_states],
663            emission: vec![vec![p_sym; n_symbols]; n_states],
664        }
665    }
666    /// Run the Viterbi algorithm on a symbol sequence.
667    ///
668    /// Returns the most probable state path.
669    pub fn viterbi(&self, observations: &[usize]) -> Vec<usize> {
670        let t = observations.len();
671        if t == 0 {
672            return vec![];
673        }
674        let n = self.n_states;
675        let mut dp = vec![vec![f64::NEG_INFINITY; n]; t];
676        let mut backtrack = vec![vec![0usize; n]; t];
677        for s in 0..n {
678            let obs = observations[0];
679            if obs < self.n_symbols {
680                dp[0][s] = self.initial[s].ln() + self.emission[s][obs].ln();
681            }
682        }
683        for ti in 1..t {
684            let obs = observations[ti];
685            for s in 0..n {
686                let emit = if obs < self.n_symbols {
687                    self.emission[s][obs].ln()
688                } else {
689                    f64::NEG_INFINITY
690                };
691                let (best_prev, best_val) = (0..n)
692                    .map(|prev| {
693                        let v = dp[ti - 1][prev] + self.transition[prev][s].ln();
694                        (prev, v)
695                    })
696                    .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
697                    .unwrap_or((0, f64::NEG_INFINITY));
698                dp[ti][s] = best_val + emit;
699                backtrack[ti][s] = best_prev;
700            }
701        }
702        let mut path = vec![0usize; t];
703        path[t - 1] = (0..n)
704            .max_by(|&a, &b| {
705                dp[t - 1][a]
706                    .partial_cmp(&dp[t - 1][b])
707                    .unwrap_or(std::cmp::Ordering::Equal)
708            })
709            .unwrap_or(0);
710        for ti in (1..t).rev() {
711            path[ti - 1] = backtrack[ti][path[ti]];
712        }
713        path
714    }
715    /// Forward algorithm: compute P(observations | model).
716    pub fn forward_probability(&self, observations: &[usize]) -> f64 {
717        let t = observations.len();
718        if t == 0 {
719            return 1.0;
720        }
721        let n = self.n_states;
722        let mut alpha = vec![0.0f64; n];
723        for s in 0..n {
724            let obs = observations[0];
725            if obs < self.n_symbols {
726                alpha[s] = self.initial[s] * self.emission[s][obs];
727            }
728        }
729        for ti in 1..t {
730            let obs = observations[ti];
731            let mut alpha_new = vec![0.0f64; n];
732            for s in 0..n {
733                let emit = if obs < self.n_symbols {
734                    self.emission[s][obs]
735                } else {
736                    0.0
737                };
738                alpha_new[s] = (0..n)
739                    .map(|prev| alpha[prev] * self.transition[prev][s])
740                    .sum::<f64>()
741                    * emit;
742            }
743            alpha = alpha_new;
744        }
745        alpha.iter().sum()
746    }
747}
748/// A simple tree node for parsimony scoring.
749#[derive(Debug, Clone)]
750pub enum ParsimonyTree {
751    Leaf(char),
752    Internal(Box<ParsimonyTree>, Box<ParsimonyTree>),
753}
754/// Residue type in the HP (hydrophobic-polar) model.
755#[derive(Debug, Clone, Copy, PartialEq)]
756pub enum HPResidue {
757    H,
758    P,
759}
760/// Regulatory network node.
761#[allow(dead_code)]
762#[derive(Debug, Clone)]
763pub struct RegulatoryNode {
764    pub gene_name: String,
765    pub is_transcription_factor: bool,
766    pub targets: Vec<String>,
767}
768#[allow(dead_code)]
769impl RegulatoryNode {
770    /// Create a regulatory node.
771    pub fn new(name: &str, is_tf: bool) -> Self {
772        Self {
773            gene_name: name.to_string(),
774            is_transcription_factor: is_tf,
775            targets: Vec::new(),
776        }
777    }
778    /// Add a regulatory target.
779    pub fn add_target(&mut self, target: &str) {
780        self.targets.push(target.to_string());
781    }
782    /// Out-degree.
783    pub fn out_degree(&self) -> usize {
784        self.targets.len()
785    }
786    /// Description.
787    pub fn description(&self) -> String {
788        if self.is_transcription_factor {
789            format!(
790                "TF {} regulates {} genes",
791                self.gene_name,
792                self.targets.len()
793            )
794        } else {
795            format!("Gene {} (non-TF)", self.gene_name)
796        }
797    }
798}
799/// Hidden Markov Model for sequence analysis.
800#[allow(dead_code)]
801#[derive(Debug, Clone)]
802pub struct SequenceHmm {
803    pub name: String,
804    pub num_states: usize,
805    pub alphabet_size: usize,
806}
807#[allow(dead_code)]
808impl SequenceHmm {
809    /// Profile HMM for protein family.
810    pub fn profile(name: &str, length: usize) -> Self {
811        Self {
812            name: name.to_string(),
813            num_states: 3 * length,
814            alphabet_size: 20,
815        }
816    }
817    /// CpG island detector.
818    pub fn cpg_island_detector() -> Self {
819        Self {
820            name: "CpG-HMM".to_string(),
821            num_states: 2,
822            alphabet_size: 4,
823        }
824    }
825    /// Viterbi decoding gives most likely state sequence.
826    pub fn viterbi_description(&self) -> String {
827        format!("Viterbi on {} ({} states)", self.name, self.num_states)
828    }
829}
oxilean_std/bioinformatics/types.rs

oxilean_std/bioinformatics/
types.rs