1
2pub mod analysis {
3 use serde::{Deserialize, Serialize};
4 use std::collections::{HashMap, HashSet};
5 use std::error::Error;
6 use std::fs::File;
7 use std::io::{self, BufRead, BufReader, Write};
8
9 static CODE_FILE: &str = include_str!("genetic_code.json");
10
11 fn load_embedded_genetic_codes() -> Result<Vec<GeneticCodeJSON>, Box<dyn Error>> {
13 let genetic_codes: Vec<GeneticCodeJSON> = serde_json::from_str(CODE_FILE)?;
14 Ok(genetic_codes)
15 }
16
17 #[derive(Debug, Clone)]
18 pub struct GeneticCode {
19 id: String,
20 name: String,
21 codon_map: HashMap<String, String>,
22 }
23
24 impl GeneticCode {
26 pub fn new() -> Self {
27 GeneticCode {
28 id: String::new(),
29 name: String::new(),
30 codon_map: HashMap::new(),
31 }
32 }
33 }
34
35 #[derive(Debug, Serialize, Deserialize)]
37 struct GeneticCodeJSON {
38 name: Vec<String>,
39 id: u8,
40 ncbieaa: String,
41 sncbieaa: String,
42 base_mappings: HashMap<String, String>,
43 }
44
45 impl GeneticCodeJSON {
46 fn to_codon_map(&self) -> HashMap<String, String> {
48 let base1 = self
49 .base_mappings
50 .get("Base1")
51 .cloned()
52 .unwrap_or_else(|| String::new());
53 let base2 = self
54 .base_mappings
55 .get("Base2")
56 .cloned()
57 .unwrap_or_else(|| String::new());
58 let base3 = self
59 .base_mappings
60 .get("Base3")
61 .cloned()
62 .unwrap_or_else(|| String::new());
63
64 let mut codon_map = HashMap::new();
65
66 for (i, ((b1, b2), b3)) in base1
67 .chars()
68 .zip(base2.chars())
69 .zip(base3.chars())
70 .enumerate()
71 {
72 let codon = format!("{}{}{}", b1, b2, b3);
73 let amino_acid = self.ncbieaa.chars().nth(i).unwrap_or('*').to_string(); codon_map.insert(codon, amino_acid);
75 }
76
77 codon_map
78 }
79 }
80
81 fn get_genetic_code_by_id<'a>(
83 codes: &'a [GeneticCodeJSON],
84 id: &u8,
85 ) -> Option<&'a GeneticCodeJSON> {
86 codes.iter().find(|code| code.id == *id)
87 }
88
89 pub fn genetic_code_from_id(translation_table: &u8) -> GeneticCode {
99 let mut code = GeneticCode::new();
100 if let Ok(json_codes) = load_embedded_genetic_codes() {
101 if let Some(selected_code) = get_genetic_code_by_id(&json_codes, translation_table) {
102 code.id = selected_code.id.to_string();
103 code.name = selected_code.name.first().cloned().unwrap_or_default();
104 code.codon_map = selected_code.to_codon_map();
105 } else {
106 eprintln!("Error: Genetic Code ID {} not found!", translation_table);
107 }
108 } else {
109 eprintln!("Failed to load genetic codes");
110 }
111 code
112 }
113
114 pub fn count_codons(sequence: &str) -> HashMap<String, usize> {
124 let mut codon_counts = HashMap::new();
125
126 if sequence.len() % 3 != 0 {
128 return codon_counts; }
130
131 for codon in sequence.as_bytes().chunks(3) {
132 let codon_str = String::from_utf8_lossy(codon).to_uppercase();
133 *codon_counts.entry(codon_str).or_insert(0) += 1;
134 }
135
136 codon_counts
137 }
138
139 pub fn compute_rscu(codon_counts: &HashMap<String, usize>, code: &GeneticCode) -> HashMap<String, f64> {
150 let codon_table = &code.codon_map;
151 let mut rscu_values = HashMap::new();
152 let mut amino_acid_totals: HashMap<&str, usize> = HashMap::new();
153 let mut synonymous_codons: HashMap<&str, Vec<&str>> = HashMap::new();
154
155 for (codon, amino_acid) in codon_table {
157 synonymous_codons
158 .entry(amino_acid)
159 .or_insert(Vec::new())
160 .push(codon);
161 *amino_acid_totals.entry(amino_acid).or_insert(0) +=
162 codon_counts.get(codon).copied().unwrap_or(0);
163 }
164
165 for (amino_acid, codons) in &synonymous_codons {
167 let total_codon_count = amino_acid_totals.get(amino_acid).copied().unwrap_or(0) as f64;
168 let num_codons = codons.len() as f64;
169
170 for codon in codons {
171 let observed = *codon_counts.get(*codon).unwrap_or(&0) as f64;
172 let expected = total_codon_count / num_codons;
173 let rscu = if expected > 0.0 {
174 observed / expected
175 } else {
176 0.0
177 };
178
179 rscu_values.insert((*codon).to_string(), rscu);
180 }
181 }
182
183 rscu_values
184 }
185
186 pub fn write_rscu_to_csv(
188 filename: &str,
189 rscu_data: &Vec<(String, HashMap<String, f64>)>,
190 ) -> std::io::Result<()> {
191 let mut file = File::create(filename)?;
192
193 let mut codon_set = HashSet::new();
195 for (_, codon_map) in rscu_data {
196 for codon in codon_map.keys() {
197 codon_set.insert(codon.clone());
198 }
199 }
200
201 let mut codons: Vec<String> = codon_set.into_iter().collect();
202 codons.sort(); write!(file, "Sequence")?;
206 for codon in &codons {
207 write!(file, ",{}", codon)?;
208 }
209 writeln!(file)?;
210
211 for (seq_name, codon_map) in rscu_data {
213 write!(file, "{}", seq_name)?;
214 for codon in &codons {
215 let rscu_value = codon_map.get(codon).unwrap_or(&0.0);
216 write!(file, ",{:.6}", rscu_value)?;
217 }
218 writeln!(file)?;
219 }
220
221 Ok(())
222 }
223
224 pub fn compute_mean_rscu(rscu_results: &Vec<(String, HashMap<String, f64>)>) -> HashMap<String, f64> {
226 let mut total_rscu: HashMap<String, f64> = HashMap::new();
227 let gene_count = rscu_results.len() as f64;
228
229 for (_, rscu_map) in rscu_results {
230 for (codon, value) in rscu_map {
231 *total_rscu.entry(codon.clone()).or_insert(0.0) += value;
232 }
233 }
234
235 let mut mean_rscu = HashMap::new();
236 for (codon, total_value) in total_rscu {
237 mean_rscu.insert(codon, total_value / gene_count);
238 }
239
240 mean_rscu
241 }
242
243 pub fn compute_std_rscu(
245 rscu_results: &Vec<(String, HashMap<String, f64>)>,
246 mean_rscu: &HashMap<String, f64>,
247 ) -> HashMap<String, f64> {
248 let mut variance: HashMap<String, f64> = HashMap::new();
249 let gene_count = rscu_results.len() as f64;
250
251 for (_, rscu_map) in rscu_results {
252 for (codon, value) in rscu_map {
253 let mean = mean_rscu.get(codon).unwrap_or(&0.0);
254 let diff = value - mean;
255 *variance.entry(codon.clone()).or_insert(0.0) += diff * diff;
256 }
257 }
258
259 let mut std_dev = HashMap::new();
260 for (codon, var) in variance {
261 std_dev.insert(codon, (var / gene_count).sqrt());
262 }
263
264 std_dev
265 }
266
267 pub fn compute_rscu_z_scores(
269 rscu_results: &Vec<(String, HashMap<String, f64>)>,
270 mean_rscu: &HashMap<String, f64>,
271 std_rscu: &HashMap<String, f64>,
272 ) -> Vec<(String, HashMap<String, f64>)> {
273 let mut z_scores = Vec::new();
274
275 for (gene, rscu_map) in rscu_results {
276 let mut gene_z_scores = HashMap::new();
277 for (codon, value) in rscu_map {
278 let mean = mean_rscu.get(codon).unwrap_or(&0.0);
279 let std_dev = std_rscu.get(codon).unwrap_or(&1.0); let z_score = (value - mean) / std_dev;
281 gene_z_scores.insert(codon.clone(), z_score);
282 }
283 z_scores.push((gene.clone(), gene_z_scores));
284 }
285
286 z_scores
287 }
288
289 pub fn write_z_scores_to_csv(
291 filename: &str,
292 z_scores: &Vec<(String, HashMap<String, f64>)>,
293 ) -> io::Result<()> {
294 let mut file = File::create(filename)?;
295
296 let mut codon_set = HashSet::new();
297 for (_, codon_map) in z_scores {
298 for codon in codon_map.keys() {
299 codon_set.insert(codon.clone());
300 }
301 }
302
303 let mut codons: Vec<String> = codon_set.into_iter().collect();
304 codons.sort();
305
306 write!(file, "Gene")?;
308 for codon in &codons {
309 write!(file, ",{}", codon)?;
310 }
311 writeln!(file)?;
312
313 for (gene, codon_map) in z_scores {
315 write!(file, "{}", gene)?;
316 for codon in &codons {
317 let z_score = codon_map.get(codon).unwrap_or(&0.0);
318 write!(file, ",{:.6}", z_score)?;
319 }
320 writeln!(file)?;
321 }
322
323 Ok(())
324 }
325
326 pub fn count_amino_acids(sequence: &str, code: &GeneticCode) -> HashMap<String, usize> {
337 let codon_table = &code.codon_map;
338 let mut amino_acid_counts = HashMap::new();
339
340 for codon in sequence.as_bytes().chunks(3) {
341 if codon.len() == 3 {
342 let codon_str = String::from_utf8_lossy(codon).to_uppercase();
343 if let Some(amino_acid) = codon_table.get(codon_str.as_str()) {
344 *amino_acid_counts.entry(amino_acid.to_string()).or_insert(0) += 1;
345 }
346 }
347 }
348
349 amino_acid_counts
350 }
351
352 pub fn write_counts_to_csv(
364 filename_prefix: &str,
365 codon_data: &Vec<(String, HashMap<String, usize>)>,
366 amino_acid_data: &Vec<(String, HashMap<String, usize>)>,
367 ) -> std::io::Result<()> {
368 let codon_filename = format!("{}_codon.csv", filename_prefix);
369 let amino_filename = format!("{}_amino_acids.csv", filename_prefix);
370
371 let mut codon_file = File::create(&codon_filename)?;
373 let mut amino_file = File::create(&amino_filename)?;
374
375 let mut codon_set = HashSet::new();
377 let mut amino_acid_set = HashSet::new();
378
379 for (_, codon_map) in codon_data {
380 for codon in codon_map.keys() {
381 codon_set.insert(codon.clone());
382 }
383 }
384
385 for (_, amino_map) in amino_acid_data {
386 for amino in amino_map.keys() {
387 amino_acid_set.insert(amino.clone());
388 }
389 }
390
391 let mut codons: Vec<String> = codon_set.into_iter().collect();
392 let mut amino_acids: Vec<String> = amino_acid_set.into_iter().collect();
393 codons.sort(); amino_acids.sort();
395
396 write!(codon_file, "Sequence")?;
398 for codon in &codons {
399 write!(codon_file, ",{}", codon)?;
400 }
401 writeln!(codon_file)?;
402
403 for (seq_name, codon_map) in codon_data {
405 write!(codon_file, "{}", seq_name)?;
406 for codon in &codons {
407 let count = codon_map.get(codon).unwrap_or(&0);
408 write!(codon_file, ",{}", count)?;
409 }
410 writeln!(codon_file)?;
411 }
412
413 write!(amino_file, "Sequence")?;
415 for amino in &amino_acids {
416 write!(amino_file, ",{}", amino)?;
417 }
418 writeln!(amino_file)?;
419
420 for (seq_name, amino_map) in amino_acid_data {
422 write!(amino_file, "{}", seq_name)?;
423 for amino in &amino_acids {
424 let count = amino_map.get(amino).unwrap_or(&0);
425 write!(amino_file, ",{}", count)?;
426 }
427 writeln!(amino_file)?;
428 }
429
430 Ok(())
431 }
432
433 pub fn read_sequences_from_fasta(filename: &str) -> io::Result<Vec<(String, String)>> {
445 let file = File::open(filename)?;
446 let reader = BufReader::new(file);
447
448 let mut sequences = Vec::new();
449 let mut current_seq_name = String::new();
450 let mut current_sequence = String::new();
451
452 for line in reader.lines() {
453 let line = line?;
454 if line.starts_with('>') {
455 if !current_seq_name.is_empty() {
456 sequences.push((current_seq_name.clone(), current_sequence.clone()));
457 current_sequence.clear();
458 }
459 current_seq_name = line[1..].to_string();
460 } else {
461 current_sequence.push_str(&line);
462 }
463 }
464 if !current_seq_name.is_empty() {
465 sequences.push((current_seq_name, current_sequence));
466 }
467
468 Ok(sequences)
469 }
470}