sigalign_core/aligner/regulator/
mod.rs1use crate::core::regulators::{
2 Penalty, PREC_SCALE, Cutoff, MinPenaltyForPattern,
3 calculate_max_pattern_size,
4};
5use crate::results::{
6 QueryAlignment, Alignment, TargetAlignment,
7};
8use thiserror::Error;
9use num::integer::gcd;
10
11#[derive(Error, Debug)]
13pub enum RegulatorError {
14 #[error("Gap extend penalty only allow positive integer.")]
15 InvalidGapExtendPenalty,
16 #[error("Maximum penalty per length only allow positive value.")]
17 InvalidMaxPenaltyPerLength,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
22pub struct AlignmentRegulator {
23 pub(super) penalties: Penalty,
24 pub(super) cutoff: Cutoff,
25 pub(super) min_penalty_for_pattern: MinPenaltyForPattern,
26 pub(super) gcd_for_compression: u32,
27 pub(super) pattern_size: u32,
28}
29
30impl AlignmentRegulator {
31 pub fn new(
33 mismatch_penalty: u32,
34 gap_open_penalty: u32,
35 gap_extend_penalty: u32,
36 minimum_alignment_length: u32,
37 maximum_penalty_per_alignment_length: f32,
38 ) -> Result<Self, RegulatorError> {
39 if gap_extend_penalty == 0 {
40 return Err(RegulatorError::InvalidGapExtendPenalty);
41 } else if maximum_penalty_per_alignment_length <= 0.0 {
42 return Err(RegulatorError::InvalidMaxPenaltyPerLength);
43 }
44
45 let penalties = Penalty::new(mismatch_penalty, gap_open_penalty, gap_extend_penalty);
46 let cutoff = Cutoff::new(minimum_alignment_length, maximum_penalty_per_alignment_length);
47 let aligner = Self::new_with_gcd_compressed_from_penalties_and_cutoff(penalties, cutoff);
48
49 Ok(aligner)
50 }
51 fn new_with_gcd_compressed_from_penalties_and_cutoff(mut penalties: Penalty, mut cutoff: Cutoff) -> Self {
52 let gcd = penalties.gcd_of_penalties();
53 penalties.divide_by_gcd(gcd);
54 cutoff.divide_by_gcd(gcd);
55
56 let min_penalty_for_pattern = MinPenaltyForPattern::new(&penalties);
57 let max_pattern_size = calculate_max_pattern_size(
58 &penalties,
59 &cutoff,
60 &min_penalty_for_pattern,
61 );
62
63 Self {
64 penalties,
65 cutoff,
66 min_penalty_for_pattern,
67 gcd_for_compression: gcd,
68 pattern_size: max_pattern_size,
69 }
70 }
71 pub(super) fn decompress_result_with_gcd(&self, alignment_result: &mut QueryAlignment) {
72 if self.gcd_for_compression != 1 {
73 alignment_result.multiply_gcd(self.gcd_for_compression);
74 }
75 }
76 pub fn get_mismatch_penalty(&self) -> u32 {
78 self.penalties.x * self.gcd_for_compression
79 }
80 pub fn get_gap_open_penalty(&self) -> u32 {
82 self.penalties.o * self.gcd_for_compression
83 }
84 pub fn get_gap_extend_penalty(&self) -> u32 {
86 self.penalties.e * self.gcd_for_compression
87 }
88 pub fn get_minimum_length(&self) -> u32 {
90 self.cutoff.minimum_length
91 }
92 pub fn get_maximum_penalty_per_length(&self) -> f32 {
94 (self.cutoff.maximum_scaled_penalty_per_length * self.gcd_for_compression) as f32 / PREC_SCALE as f32
95 }
96 pub fn get_pattern_size(&self) -> u32 {
98 self.pattern_size
99 }
100}
101
102impl QueryAlignment {
103 fn multiply_gcd(&mut self, gcd: u32) {
104 self.0.iter_mut().for_each(|target_alignment_result| {
105 target_alignment_result.multiply_gcd(gcd);
106 })
107 }
108}
109
110impl TargetAlignment {
111 #[inline]
112 fn multiply_gcd(&mut self, gcd: u32) {
113 self.alignments.iter_mut().for_each(|alignment_result| {
114 alignment_result.multiply_gcd(gcd);
115 })
116 }
117}
118
119impl Alignment {
120 #[inline]
121 fn multiply_gcd(&mut self, gcd: u32) {
122 self.penalty *= gcd;
123 }
124}
125
126impl Penalty {
127 fn new(mismatch: u32, gap_open: u32, gap_extend: u32) -> Self {
128 Self {
129 x: mismatch,
130 o: gap_open,
131 e: gap_extend,
132 }
133 }
134 fn gcd_of_penalties(&self) -> u32 {
135 gcd(gcd(self.x, self.o), self.e)
136 }
137 fn divide_by_gcd(&mut self, gcd: u32) {
138 self.x /= gcd;
139 self.o /= gcd;
140 self.e /= gcd;
141 }
142}
143
144impl Cutoff {
145 fn new(minimum_length: u32, maximum_penalty_per_length: f32) -> Self {
146 let maximum_penalty_per_scale = (maximum_penalty_per_length * PREC_SCALE as f32) as u32;
147 Self::new_with_scaled_max_ppl(minimum_length, maximum_penalty_per_scale)
148 }
149 fn new_with_scaled_max_ppl(minimum_length: u32, maximum_penalty_per_scale: u32) -> Self {
150 Self {
151 minimum_length,
152 maximum_scaled_penalty_per_length: maximum_penalty_per_scale,
153 }
154 }
155 fn divide_by_gcd(&mut self, gcd: u32) {
156 self.maximum_scaled_penalty_per_length /= gcd;
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_gcd_calculation_for_penalties() {
166 let mut penalties = Penalty::new(4, 6, 2);
167 let gcd = penalties.gcd_of_penalties();
168 assert_eq!(gcd, 2);
169 penalties.divide_by_gcd(gcd);
170 assert_eq!(penalties, Penalty::new(2, 3, 1));
171
172 let mut penalties = Penalty::new(4, 5, 3);
173 let gcd = penalties.gcd_of_penalties();
174 assert_eq!(gcd, 1);
175 penalties.divide_by_gcd(gcd);
176 assert_eq!(penalties, Penalty::new(4, 5, 3));
177 }
178
179 #[allow(dead_code)]
180 fn print_calculate_maximum_kmer() {
181 let penalties = Penalty::new(4, 6, 2);
182 let cutoff = Cutoff::new(50, 0.15);
183 let min_penalty_for_pattern = MinPenaltyForPattern::new(&penalties);
184 let pattern_size = calculate_max_pattern_size(
185 &penalties,
186 &cutoff,
187 &min_penalty_for_pattern,
188 );
189 println!("{}", pattern_size);
190 }
191}