1
2use rustc_hash::FxHashMap;
3use std::sync::LazyLock;
4
5#[derive(Debug, Clone, PartialEq)]
6pub enum SnpStatus {
7
8 NotApplicable,
9
10 Confirmed,
11
12 WildType,
13
14 NovelVariant(char),
15
16 NotCovered,
17
18 Unverified(String),
19}
20
21impl std::fmt::Display for SnpStatus {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 SnpStatus::NotApplicable => write!(f, "Acquired"),
25 SnpStatus::Confirmed => write!(f, "Confirmed"),
26 SnpStatus::WildType => write!(f, "WildType"),
27 SnpStatus::NovelVariant(aa) => write!(f, "Novel({})", aa),
28 SnpStatus::NotCovered => write!(f, "NotCovered"),
29 SnpStatus::Unverified(reason) => write!(f, "Unverified({})", reason),
30 }
31 }
32}
33
34#[derive(Debug, Clone)]
35pub struct SnpInfo {
36
37 #[allow(dead_code)]
38 pub gene: String,
39
40 pub wildtype: char,
41
42 pub position: usize,
43
44 pub mutant: char,
45}
46
47impl SnpInfo {
48
49 pub fn nucleotide_range(&self) -> (usize, usize) {
50 let start = (self.position - 1) * 3;
51 let end = start + 3;
52 (start, end)
53 }
54}
55
56static CODON_TABLE: LazyLock<FxHashMap<&'static str, char>> = LazyLock::new(|| {
57 let mut table = FxHashMap::default();
58
59 table.insert("TTT", 'F'); table.insert("TTC", 'F');
60
61 table.insert("TTA", 'L'); table.insert("TTG", 'L');
62 table.insert("CTT", 'L'); table.insert("CTC", 'L');
63 table.insert("CTA", 'L'); table.insert("CTG", 'L');
64
65 table.insert("ATT", 'I'); table.insert("ATC", 'I'); table.insert("ATA", 'I');
66
67 table.insert("ATG", 'M');
68
69 table.insert("GTT", 'V'); table.insert("GTC", 'V');
70 table.insert("GTA", 'V'); table.insert("GTG", 'V');
71
72 table.insert("TCT", 'S'); table.insert("TCC", 'S');
73 table.insert("TCA", 'S'); table.insert("TCG", 'S');
74 table.insert("AGT", 'S'); table.insert("AGC", 'S');
75
76 table.insert("CCT", 'P'); table.insert("CCC", 'P');
77 table.insert("CCA", 'P'); table.insert("CCG", 'P');
78
79 table.insert("ACT", 'T'); table.insert("ACC", 'T');
80 table.insert("ACA", 'T'); table.insert("ACG", 'T');
81
82 table.insert("GCT", 'A'); table.insert("GCC", 'A');
83 table.insert("GCA", 'A'); table.insert("GCG", 'A');
84
85 table.insert("TAT", 'Y'); table.insert("TAC", 'Y');
86
87 table.insert("TAA", '*'); table.insert("TAG", '*'); table.insert("TGA", '*');
88
89 table.insert("CAT", 'H'); table.insert("CAC", 'H');
90
91 table.insert("CAA", 'Q'); table.insert("CAG", 'Q');
92
93 table.insert("AAT", 'N'); table.insert("AAC", 'N');
94
95 table.insert("AAA", 'K'); table.insert("AAG", 'K');
96
97 table.insert("GAT", 'D'); table.insert("GAC", 'D');
98
99 table.insert("GAA", 'E'); table.insert("GAG", 'E');
100
101 table.insert("TGT", 'C'); table.insert("TGC", 'C');
102
103 table.insert("TGG", 'W');
104
105 table.insert("CGT", 'R'); table.insert("CGC", 'R');
106 table.insert("CGA", 'R'); table.insert("CGG", 'R');
107 table.insert("AGA", 'R'); table.insert("AGG", 'R');
108
109 table.insert("GGT", 'G'); table.insert("GGC", 'G');
110 table.insert("GGA", 'G'); table.insert("GGG", 'G');
111 table
112});
113
114pub fn translate_codon(codon: &str) -> Option<char> {
115 let upper = codon.to_uppercase();
116 CODON_TABLE.get(upper.as_str()).copied()
117}
118
119#[allow(dead_code)]
120pub fn is_snp_gene(gene_name: &str) -> bool {
121 parse_snp_info(gene_name).is_some()
122}
123
124pub fn parse_snp_info(gene_name: &str) -> Option<SnpInfo> {
125
126 let name = gene_name.split('|').next().unwrap_or(gene_name);
127
128 let parts: Vec<&str> = name.rsplitn(2, '_').collect();
129 if parts.len() != 2 {
130 return None;
131 }
132
133 let snp_part = parts[0];
134 let gene = parts[1];
135
136 if snp_part.len() < 3 {
137 return None;
138 }
139
140 let chars: Vec<char> = snp_part.chars().collect();
141
142 let wildtype = chars[0].to_ascii_uppercase();
143 if !is_amino_acid(wildtype) {
144 return None;
145 }
146
147 let mutant = chars[chars.len() - 1].to_ascii_uppercase();
148 if !is_amino_acid(mutant) {
149 return None;
150 }
151
152 let pos_str: String = chars[1..chars.len()-1].iter().collect();
153 let position: usize = pos_str.parse().ok()?;
154
155 if position == 0 {
156 return None;
157 }
158
159 Some(SnpInfo {
160 gene: gene.to_string(),
161 wildtype,
162 position,
163 mutant,
164 })
165}
166
167fn is_amino_acid(c: char) -> bool {
168 matches!(c, 'A' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'K' | 'L' |
169 'M' | 'N' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'V' | 'W' | 'Y')
170}
171
172pub fn verify_snp(
173 contig_seq: &str,
174 gene_name: &str,
175 ref_start: usize,
176 ref_end: usize,
177 contig_start: usize,
178 contig_end: usize,
179 strand: char,
180) -> SnpStatus {
181
182 let snp_info = match parse_snp_info(gene_name) {
183 Some(info) => info,
184 None => return SnpStatus::NotApplicable,
185 };
186
187 let (snp_nt_start, snp_nt_end) = snp_info.nucleotide_range();
188
189 if snp_nt_start < ref_start || snp_nt_end > ref_end {
190 return SnpStatus::NotCovered;
191 }
192
193 let offset_in_ref = snp_nt_start - ref_start;
194 let contig_snp_start = if strand == '+' {
195 contig_start + offset_in_ref
196 } else {
197
198 contig_end - offset_in_ref - 3
199 };
200 let contig_snp_end = contig_snp_start + 3;
201
202 if contig_snp_end > contig_seq.len() {
203 return SnpStatus::NotCovered;
204 }
205
206 let codon_seq = &contig_seq[contig_snp_start..contig_snp_end];
207
208 let codon = if strand == '-' {
209 reverse_complement(codon_seq)
210 } else {
211 codon_seq.to_uppercase()
212 };
213
214 let amino_acid = match translate_codon(&codon) {
215 Some(aa) => aa,
216 None => return SnpStatus::Unverified(format!("invalid_codon:{}", codon)),
217 };
218
219 if amino_acid == snp_info.mutant {
220 SnpStatus::Confirmed
221 } else if amino_acid == snp_info.wildtype {
222 SnpStatus::WildType
223 } else {
224 SnpStatus::NovelVariant(amino_acid)
225 }
226}
227
228fn reverse_complement(seq: &str) -> String {
229 seq.chars()
230 .rev()
231 .map(|c| match c.to_ascii_uppercase() {
232 'A' => 'T',
233 'T' => 'A',
234 'G' => 'C',
235 'C' => 'G',
236 _ => 'N',
237 })
238 .collect()
239}
240
241#[cfg(test)]
242mod tests {
243 use super::*;
244
245 #[test]
246 fn test_parse_snp_info() {
247
248 let info = parse_snp_info("rpoB_V146F").unwrap();
249 assert_eq!(info.gene, "rpoB");
250 assert_eq!(info.wildtype, 'V');
251 assert_eq!(info.position, 146);
252 assert_eq!(info.mutant, 'F');
253
254 let info = parse_snp_info("gyrA_S83L|QUINOLONE|QUINOLONE|J01M").unwrap();
255 assert_eq!(info.gene, "gyrA");
256 assert_eq!(info.wildtype, 'S');
257 assert_eq!(info.position, 83);
258 assert_eq!(info.mutant, 'L');
259
260 let info = parse_snp_info("test_A1G").unwrap();
261 assert_eq!(info.position, 1);
262
263 assert!(parse_snp_info("blaTEM-1").is_none());
264 assert!(parse_snp_info("tet(M)").is_none());
265 }
266
267 #[test]
268 fn test_is_snp_gene() {
269 assert!(is_snp_gene("rpoB_V146F"));
270 assert!(is_snp_gene("gyrA_S83L|QUINOLONE"));
271 assert!(!is_snp_gene("blaTEM-1"));
272 assert!(!is_snp_gene("tet(M)"));
273 }
274
275 #[test]
276 fn test_translate_codon() {
277 assert_eq!(translate_codon("ATG"), Some('M'));
278 assert_eq!(translate_codon("TTT"), Some('F'));
279 assert_eq!(translate_codon("TTC"), Some('F'));
280 assert_eq!(translate_codon("GTT"), Some('V'));
281 assert_eq!(translate_codon("TAA"), Some('*'));
282 assert_eq!(translate_codon("XXX"), None);
283 }
284
285 #[test]
286 fn test_nucleotide_range() {
287 let info = SnpInfo {
288 gene: "test".to_string(),
289 wildtype: 'V',
290 position: 146,
291 mutant: 'F',
292 };
293
294 assert_eq!(info.nucleotide_range(), (435, 438));
295
296 let info2 = SnpInfo {
297 gene: "test".to_string(),
298 wildtype: 'A',
299 position: 1,
300 mutant: 'B',
301 };
302 assert_eq!(info2.nucleotide_range(), (0, 3));
303 }
304
305 #[test]
306 fn test_verify_snp() {
307
308 let contig = "ATGTTTGGG";
309 let status = verify_snp(
310 contig,
311 "test_V2F",
312 0, 9,
313 0, 9,
314 '+',
315 );
316 assert_eq!(status, SnpStatus::Confirmed);
317
318 let contig = "ATGGTTGGG";
319 let status = verify_snp(
320 contig,
321 "test_V2F",
322 0, 9,
323 0, 9,
324 '+',
325 );
326 assert_eq!(status, SnpStatus::WildType);
327
328 let contig = "ATGCTTGGG";
329 let status = verify_snp(
330 contig,
331 "test_V2F",
332 0, 9,
333 0, 9,
334 '+',
335 );
336 assert_eq!(status, SnpStatus::NovelVariant('L'));
337 }
338
339 #[test]
340 fn test_snp_status_display() {
341 assert_eq!(format!("{}", SnpStatus::Confirmed), "Confirmed");
342 assert_eq!(format!("{}", SnpStatus::WildType), "WildType");
343 assert_eq!(format!("{}", SnpStatus::NovelVariant('L')), "Novel(L)");
344 assert_eq!(format!("{}", SnpStatus::NotApplicable), "Acquired");
345 }
346}