microBioRust_seqmetrics/
metrics.rs

1//!  The purpose of seqmetrics is to allow calculations of protein sequence parameters
2//!  We define a set of amino acid getters to allow getting of the protein sequence
3//!  as either the three letter code such as Trp, Phe, the full name such as alanine, glycine
4//!  or the single letter code such as Y, A
5//!
6//!   Example function to calculate transmembrane statistics
7//!
8//! ```rust
9//!
10//! use clap::Parser;
11//! use std::fs::File;
12//! use microBioRust::gbk::Reader;
13//! use std::io;
14//! use std::collections::HashMap;
15//! use microBioRust_seqmetrics::metrics::hydrophobicity;
16//!
17//! pub fn suggest_transmembrane_domains() -> Result<(), anyhow::Error> {
18//!            let file_gbk = File::open("test_output.gbk")?;
19//!            let mut reader = Reader::new(file_gbk);
20//!            let mut records = reader.records();
21//!            loop {  
22//!                match records.next() {  
23//!                    Some(Ok(mut record)) => {
24//!                       //println!("next record");
25//!                       //println!("Record id: {:?}", record.id);
26//!                       for (k, v) in &record.cds.attributes {
27//!                           match record.seq_features.get_sequence_faa(&k) {
28//!                                     Some(value) => { let seq_faa = value.to_string();
29//!				                      println!("{:?}", &seq_faa);
30//!				                      let hydro_values = hydrophobicity(&seq_faa, 21);
31//!						      let mut result = String::new();
32//!						      for hydro in hydro_values {
33//!						           if hydro > 1.6 {
34//!						               println!("possible transmembrane region - score {}",&hydro);  
35//!							       }
36//!						           else {
37//!						               ()
38//!							   }
39//!						      }
40//!                                                 },
41//!                                     _ => (),
42//!                                     };
43//!                       
44//!                           }
45//!                    },
46//!                    Some(Err(e)) => { println!("theres an err {:?}", e); },
47//!                    None => {
48//!                             println!("finished iteration");
49//!                             break;
50//!                             },
51//!                    };
52//!               }
53//!            return Ok(());
54//!   }
55//!```   
56//!
57//!   Example function to calculate the molecular weight of a protein sequence
58//!
59//!```rust
60//!
61//! use clap::Parser;
62//! use std::fs::File;
63//! use microBioRust::gbk::Reader;
64//! use std::io;
65//! use std::collections::HashMap;
66//! use microBioRust_seqmetrics::metrics::molecular_weight;
67//!
68//! pub fn collect_molecular_weight() -> Result<(), anyhow::Error> {
69//!            let file_gbk = File::open("test_output.gbk")?;
70//!            let mut reader = Reader::new(file_gbk);
71//!            let mut records = reader.records();
72//!	    let mut molecular_weight_total: f64 = 0.0;
73//!            loop {  
74//!                match records.next() {  
75//!                    Some(Ok(mut record)) => {
76//!                       //println!("next record");
77//!                       //println!("Record id: {:?}", record.id);
78//!                      for (k, v) in &record.cds.attributes {
79//!                           match record.seq_features.get_sequence_faa(&k) {
80//!                                     Some(value) => {
81//!                                                     let seq_faa = value.to_string();
82//!				                        println!("id: {:?}", &k);
83//!				                        molecular_weight_total = molecular_weight(&seq_faa);
84//!                                                     println!(">{}|{}\n{}", &record.id, &k, molecular_weight_total);
85//!                                                    },
86//!                                     _ => (),
87//!                                     };
88//!                       
89//!                           }
90//!                    },
91//!                    Some(Err(e)) => { println!("theres an err {:?}", e); },
92//!                    None => {
93//!                              println!("finished iteration");
94//!                              break;
95//!                            },
96//!                    }
97//!               }
98//!            return Ok(());
99//!   }
100//!```
101//!
102//!   Example function to count amino acids of each protein as raw counts, see below to generate percentages per protein
103//!
104//!```rust
105//!
106//! use clap::Parser;
107//! use std::fs::File;
108//! use microBioRust::gbk::Reader;
109//! use std::io;
110//! use std::collections::HashMap;
111//! use microBioRust_seqmetrics::metrics::amino_counts;
112//!
113//! pub fn count_aminos() -> Result<(), anyhow::Error> {
114//!            let file_gbk = File::open("test_output.gbk")?;
115//!            let mut reader = Reader::new(file_gbk);
116//!            let mut records = reader.records();
117//!	    let mut results: HashMap<char, u64> = HashMap::new();
118//!            loop {  
119//!                match records.next() {  
120//!                   Some(Ok(mut record)) => {
121//!                       //println!("next record");
122//!                       //println!("Record id: {:?}", record.id);
123//!                       for (k, v) in &record.cds.attributes {
124//!                           match record.seq_features.get_sequence_faa(&k) {
125//!                                     Some(value) => { let seq_faa = value.to_string();
126//!				                      println!("id: {:?}", &k);
127//!				                      results = amino_counts(&seq_faa);
128//!                                                      println!(">{}|{}\n{:?}", &record.id, &k, results);
129//!                                                      },
130//!                                     _ => (),
131//!                                     };
132//!                       
133//!                           }
134//!                    },
135//!                    Some(Err(e)) => { println!("theres an err {:?}", e); },
136//!                    None => {
137//!                             println!("finished iteration");
138//!                             break;
139//!                             },
140//!                    }
141//!               }
142//!            return Ok(());
143//!   }
144//!```
145//!  Example function to calculate and print out the aromaticity of each protein
146//!
147//!```rust
148//! use clap::Parser;
149//! use std::fs::File;
150//! use microBioRust::gbk::Reader;
151//! use std::io;
152//! use std::collections::HashMap;
153//! use microBioRust_seqmetrics::metrics::amino_percentage;
154//!
155//! pub fn aromaticity() -> Result<(), anyhow::Error> {
156//!        // calculated as in biopython with aromaticity according to Lobry, 1994 as the relative freq of Phe+Trp+Tyr
157//!        let file_gbk = File::open("test_output.gbk")?;
158//!	let mut reader = Reader::new(file_gbk);
159//!	let mut records = reader.records();
160//!	let mut results: HashMap<char, f64> = HashMap::new();
161//!	loop {
162//!	   match records.next() {
163//!	      Some(Ok(mut record)) => {
164//!	          for (k, v) in &record.cds.attributes {
165//!		     match record.seq_features.get_sequence_faa(&k) {
166//!		         Some(value) => {  let seq_faa = value.to_string();
167//!			                   results = amino_percentage(&seq_faa);
168//!					   let aromatic_aas = vec!['Y','W','F'];
169//!					   let aromaticity: f64 = aromatic_aas.iter()
170//!					       .filter_map(|&amino| results.get(&amino))
171//!					       .map(|&perc| perc / 100.0)
172//!					       .sum();
173//!					   println!("aromaticity for {} {} is {}",&record.id, &k, &aromaticity);
174//!					  },
175//!			_ => (),
176//!			};
177//!		   }
178//!	         },
179//!	    Some(Err(e)) => { println!("theres an error {:?}", e); },
180//!	    None => {
181//!                   println!("finished iteration");
182//!	              break;
183//!		    },
184//!	    }
185//!       }
186//!      return Ok(());
187//!   }
188//!```
189//!  The purpose of hamming.rs is to allow calculations of the hamming distances between sequences
190//!  The Hamming distance is the minimum number of substitutions required to change one string to another
191//!  It is one of several string metrics for measuring the edit distance between two sequences.
192//!  It does not encompass or take into account any biology
193//!  It is named after the American Mathematician Richard Hamming (wikipedia)
194//!  This is aimed essentially at protein fasta sequences 
195//!  
196//!
197//!  ```
198//!  use microBioRust_seqmetrics::hamming::hamming_matrix;
199//!  use microBioRust_seqmetrics::write_dst_csv::write_distances_csv;
200//!  use tokio::fs::File;
201//!  use std::collections::HashMap;
202//!  use bio::io::fasta;
203//!  use tokio::io;
204//!  use tokio::io::{AsyncWriteExt, BufWriter};
205//!
206//!
207//!  #[tokio::main]
208//!  async fn main() -> Result<(), anyhow::Error> {
209//!            let reader = fasta::Reader::new(std::io::stdin());
210//!            let records: Vec<_> = reader.records().collect::<Result<_, _>>()?;
211//!	    println!("gathering records");
212//!            let sequences: Vec<String> = records
213//!	                          .iter()
214//!				  .map(|rec| String::from_utf8_lossy(rec.seq()).to_string())
215//!				  .collect();
216//!            let ids: Vec<String> = records
217//!	                          .iter()
218//!				  .map(|rec| rec.id().to_string())
219//!				  .collect();
220//!	    println!("gathered ids");
221//!	    let distances = hamming_matrix(&sequences).await?;
222//!	    write_distances_csv(ids, distances, "hamming_dists.csv").await?;
223//!
224//!         Ok(()) 
225//!  }
226//!  ```
227
228#![allow(unused_imports)]
229use microBioRust::gbk::Reader;
230use std::fs::File;
231use std::collections::HashMap;
232use crate::hamming::hamming_matrix;
233use crate::write_dst_csv::write_distances_csv;
234use bio::io::fasta;
235
236
237// Define a macro to generate the getters for each amino acid
238#[macro_export]
239macro_rules! amino_acid_getters {
240    ($struct_name:ident, $( ($field:ident, $full_name:ident, $three_letter:ident, $single_letter:ident) ),* ) => {
241        #[allow(non_snake_case)]
242	#[allow(dead_code)]
243        impl $struct_name {
244            $(
245	        // Capital full name getter
246		fn $field(&self) -> f64 {
247		   self.$field
248		}
249                // Full name getter
250                fn $full_name(&self) -> f64 {
251                    self.$field
252                }
253                // Three-letter code getter
254                fn $three_letter(&self) -> f64 {
255                    self.$field
256                }
257                // Single-letter code getter
258                fn $single_letter(&self) -> f64 {
259                    self.$field
260                }
261            )*
262        }
263    };
264}
265
266#[allow(non_snake_case)]
267#[allow(dead_code)]
268pub struct MolWeights {
269    Alanine: f64,
270    Arginine: f64,
271    Asparagine: f64,
272    Aspartate: f64,
273    Cysteine: f64,
274    Glutamate: f64,
275    Glutamine: f64,
276    Glycine: f64,
277    Histidine: f64,
278    Isoleucine: f64,
279    Leucine: f64,
280    Lysine: f64,
281    Methionine: f64,
282    Phenylalanine: f64,
283    Proline: f64,
284    Serine: f64,
285    Threonine: f64,
286    Tryptophan: f64,
287    Tyrosine: f64,
288    Valine: f64,
289}
290
291#[allow(non_snake_case)]
292#[allow(dead_code)]
293impl MolWeights {
294    fn new() -> Self {
295       Self {
296              //masses from NIST chemistry webbook US Dept of commerce
297              Alanine: 89.0932, //C3H7NO2
298              Arginine: 174.2010, //C6H14N4O2
299              Asparagine: 132.1179, //C4H8N2O3
300              Aspartate: 133.1027, //C4H7NO4
301              Cysteine: 121.158, //C3H7NO2S
302              Glutamate: 147.1293, //C5H9NO4
303              Glutamine: 146.1445, //C5H10N2O3
304              Glycine: 75.0666, //C2H5NO2
305              Histidine: 155.1546, //C6H9N3O2
306              Isoleucine: 131.1729, //C6H13NO2
307	      Leucine: 131.1729, //C6H13NO2
308              Lysine: 146.1876, //C6H14N2O2
309              Methionine: 149.211, //C5H11NO2S
310              Phenylalanine: 165.1891, //C9H11NO2
311              Proline: 115.1305, //C5H9NO2
312              Serine: 105.0926, //C3H7NO2
313              Threonine: 119.1192, //C4H9NO3
314              Tryptophan: 204.2252, //C11H12N2O2
315              Tyrosine: 181.1885, //C9H11NO3
316              Valine: 117.1463, //C5H11NO2
317             }
318      }
319}
320
321
322amino_acid_getters!(MolWeights,
323             (Alanine, alanine, Ala, A),
324             (Arginine, arginine, Arg, R),
325             (Asparagine, asparagine, Asn, N),
326             (Aspartate, aspartate, Asp, D),
327             (Cysteine, cysteine, Cys, C),
328             (Glutamine, glutamine, Gln, Q),
329             (Glutamate, glutamate, Glu, E),
330             (Glycine, glycine, Gly, G),
331             (Histidine, histidine, His, H),
332             (Isoleucine, isoleucine, Ile, I),
333             (Leucine, leucine, Leu, L),
334             (Lysine, lysine, Lys, K),
335             (Methionine, methionine, Met, M),
336             (Phenylalanine, phenylalanine, Phe, F),
337             (Proline, proline, Pro, P),
338             (Serine, serine, Ser, S),
339             (Threonine, threonine, Thr, T),
340             (Tryptophan, tryptophan, Trp, W),
341             (Tyrosine, tyrosine, Tyr, Y),
342             (Valine, valine, Val, V)
343             );
344
345#[allow(non_snake_case)]
346#[allow(dead_code)]
347#[allow(unused_variables)]
348pub fn molecular_weight(protein_seq: &str) -> f64 {
349    let amino_weights: MolWeights = MolWeights::new();
350    let mut total_weight = 0.0;
351    for ch in protein_seq.chars() {
352       match ch {
353           'A' => total_weight += amino_weights.A(),
354	   'R' => total_weight += amino_weights.R(),
355	   'N' => total_weight += amino_weights.N(),
356	   'D' => total_weight += amino_weights.D(),
357	   'C' => total_weight += amino_weights.C(),
358	   'Q' => total_weight += amino_weights.Q(),
359	   'E' => total_weight += amino_weights.E(),
360	   'G' => total_weight += amino_weights.G(),
361	   'H' => total_weight += amino_weights.H(),
362	   'I' => total_weight += amino_weights.I(),
363	   'L' => total_weight += amino_weights.L(),
364	   'K' => total_weight += amino_weights.K(),
365	   'M' => total_weight += amino_weights.M(),
366	   'F' => total_weight += amino_weights.F(),
367	   'P' => total_weight += amino_weights.P(),
368	   'S' => total_weight += amino_weights.S(),
369	   'T' => total_weight += amino_weights.T(),
370	   'W' => total_weight += amino_weights.W(),
371	   'Y' => total_weight += amino_weights.Y(),
372	   'V' => total_weight += amino_weights.V(),
373	   _ => continue,
374	   }
375      }
376   let result_weight = total_weight - ((protein_seq.len() - 1) as f64 * 18.02);
377   result_weight
378}
379
380#[allow(non_snake_case)]
381#[allow(dead_code)]
382pub struct Hydrophobicity {
383    Alanine: f64,
384    Arginine: f64,
385    Asparagine: f64,
386    Aspartate: f64,
387    Cysteine: f64,
388    Glutamate: f64,
389    Glutamine: f64,
390    Glycine: f64,
391    Histidine: f64,
392    Isoleucine: f64,
393    Leucine: f64,
394    Lysine: f64,
395    Methionine: f64,
396    Phenylalanine: f64,
397    Proline: f64,
398    Serine: f64,
399    Threonine: f64,
400    Tryptophan: f64,
401    Tyrosine: f64,
402    Valine: f64,
403}
404
405impl Hydrophobicity {
406    #[allow(non_snake_case)]
407    fn new_KD() -> Self {
408       Self {
409              //Kyte-Doolittle values from the Qiagen resources website
410              Alanine: 1.80, 
411              Arginine: -4.50, 
412              Asparagine: -3.50, 
413              Aspartate: -3.50, 
414              Cysteine: 2.50, 
415              Glutamate: -3.50, 
416              Glutamine: -3.50, 
417              Glycine: -0.40, 
418              Histidine: -3.20, 
419              Isoleucine: 4.50, 
420	      Leucine: 3.80, 
421              Lysine: -3.90, 
422              Methionine: 1.90, 
423              Phenylalanine: 2.80, 
424              Proline: -1.60, 
425              Serine: -0.80, 
426              Threonine: -0.70,
427              Tryptophan: -0.90, 
428              Tyrosine: -1.30, 
429              Valine: 4.20, 
430             }
431      }
432}
433
434amino_acid_getters!(Hydrophobicity,
435             (Alanine, alanine, Ala, A),
436             (Arginine, arginine, Arg, R),
437             (Asparagine, asparagine, Asn, N),
438             (Aspartate, aspartate, Asp, D),
439             (Cysteine, cysteine, Cys, C),
440             (Glutamine, glutamine, Gln, Q),
441             (Glutamate, glutamate, Glu, E),
442             (Glycine, glycine, Gly, G),
443             (Histidine, histidine, His, H),
444             (Isoleucine, isoleucine, Ile, I),
445             (Leucine, leucine, Leu, L),
446             (Lysine, lysine, Lys, K),
447             (Methionine, methionine, Met, M),
448             (Phenylalanine, phenylalanine, Phe, F),
449             (Proline, proline, Pro, P),
450             (Serine, serine, Ser, S),
451             (Threonine, threonine, Thr, T),
452             (Tryptophan, trytophan, Trp, W),
453             (Tyrosine, tyrosine, Tyr, Y),
454             (Valine, valine, Val, V)
455             );
456
457#[allow(non_snake_case)]
458#[allow(dead_code)]
459#[allow(unused_mut)]
460#[allow(unused_variables)]
461pub fn hydrophobicity(protein_seq: &str, window_size: usize) -> Vec<f64> {
462    let mut hydrophobicity: Hydrophobicity = Hydrophobicity::new_KD();
463    let mut total_hydrophobicity: Vec<f64> = Vec::new();
464    let mut window_values: f64 = 0.0;
465    let mut windows: Vec<String> = protein_seq
466           .chars()
467	   .collect::<Vec<_>>()
468	   .windows(window_size)
469	   .map(|window| window.iter().collect())
470	   .collect();
471    for (index, window) in windows.iter().enumerate() {
472       for ch in window.chars() {
473           match ch {
474               'A' => window_values += hydrophobicity.A(),
475	       'R' => window_values += hydrophobicity.R(),
476	       'N' => window_values += hydrophobicity.N(),
477	       'D' => window_values += hydrophobicity.D(),
478	       'C' => window_values += hydrophobicity.C(),
479	       'Q' => window_values += hydrophobicity.Q(),
480	       'E' => window_values += hydrophobicity.E(),
481	       'G' => window_values += hydrophobicity.G(),
482	       'H' => window_values += hydrophobicity.H(),
483	       'I' => window_values += hydrophobicity.I(),
484	       'L' => window_values += hydrophobicity.L(),
485	       'K' => window_values += hydrophobicity.K(),
486	       'M' => window_values += hydrophobicity.M(),
487	       'F' => window_values += hydrophobicity.F(),
488	       'P' => window_values += hydrophobicity.P(),
489	       'S' => window_values += hydrophobicity.S(),
490	       'T' => window_values += hydrophobicity.T(),
491	       'W' => window_values += hydrophobicity.W(),
492	       'Y' => window_values += hydrophobicity.Y(),
493	       'V' => window_values += hydrophobicity.V(),
494	        _ => continue,
495	        }
496          }
497         total_hydrophobicity.push(window_values);
498      }
499   total_hydrophobicity
500}
501
502
503pub fn amino_counts(protein_seq: &str) -> HashMap<char, u64> {
504    let mut counts: HashMap<char, u64> = HashMap::new();
505    for c in protein_seq.chars() {
506       *counts.entry(c).or_insert(0) +=1;
507       }
508    counts
509}
510
511#[allow(unused_mut)]
512#[allow(unused_variables)]
513#[allow(unused_assignments)]
514#[allow(dead_code)]
515pub fn amino_percentage(protein_seq: &str) -> HashMap<char, f64> {
516    let mut percentages: HashMap<char, f64> = HashMap::new();
517    let counts = amino_counts(protein_seq);
518    let seq_len: f64 = (protein_seq.len() as f64) as f64;
519    percentages = counts.iter().map(|(k, &value)| {
520        let percentage = (value as f64 / seq_len) * 100.0;
521	(k.clone(), percentage)
522	}).collect();
523    percentages
524}
525
526    
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531   #[test]
532   #[allow(unused_mut)]
533   #[allow(dead_code)]
534   #[allow(unused_variables)]
535   pub fn suggest_transmembrane_domains() -> Result<(), anyhow::Error> {
536            let file_gbk = File::open("test_output.gbk")?;
537            let mut reader = Reader::new(file_gbk);
538            let mut records = reader.records();
539            loop {  
540                match records.next() {  
541                    Some(Ok(mut record)) => {
542                       //println!("next record");
543                       //println!("Record id: {:?}", record.id);
544                       for (k, v) in &record.cds.attributes {
545                           match record.seq_features.get_sequence_faa(&k) {
546                                     Some(value) => { let seq_faa = value.to_string();
547				                      println!("{:?}", &seq_faa);
548				                      let hydro_values = hydrophobicity(&seq_faa, 21);
549						      let mut result = String::new();
550						      for hydro in hydro_values {
551						           if hydro > 1.6 {
552						               println!("possible transmembrane region - score {}",&hydro);  
553							       }
554						           else {
555						               ()
556							   }
557						      }
558                                                  },
559                                     _ => (),
560                                     };
561                       
562                           }
563                    },
564                    Some(Err(e)) => { println!("theres an err {:?}", e); },
565                    None => {
566                       println!("finished iteration");
567                             break; },
568                    }
569               }
570            return Ok(());
571   }
572   
573   #[test]
574   #[allow(unused_mut)]
575   #[allow(dead_code)]
576   #[allow(unused_variables)]
577   #[allow(unused_assignments)]
578   pub fn collect_molecular_weight() -> Result<(), anyhow::Error> {
579            let file_gbk = File::open("test_output.gbk")?;
580            let mut reader = Reader::new(file_gbk);
581            let mut records = reader.records();
582	    let mut molecular_weight_total: f64 = 0.0;
583            loop {  
584                match records.next() {  
585                    Some(Ok(mut record)) => {
586                       //println!("next record");
587                       //println!("Record id: {:?}", record.id);
588                       for (k, v) in &record.cds.attributes {
589                           match record.seq_features.get_sequence_faa(&k) {
590                                     Some(value) => { let seq_faa = value.to_string();
591				                      println!("id: {:?}", &k);
592				                      molecular_weight_total = molecular_weight(&seq_faa);
593                                                      println!(">{}|{}\n{}", &record.id, &k, molecular_weight_total);
594                                                      },
595                                     _ => (),
596                                     };
597                       
598                           }
599                    },
600                    Some(Err(e)) => { println!("theres an err {:?}", e); },
601                    None => {
602                       println!("finished iteration");
603                             break; },
604                    }
605               }
606            return Ok(());
607   }
608
609   #[test]
610   #[allow(unused_mut)]
611   #[allow(unused_variables)]
612   #[allow(unused_assignments)]
613   pub fn count_aminos() -> Result<(), anyhow::Error> {
614            let file_gbk = File::open("test_output.gbk")?;
615            let mut reader = Reader::new(file_gbk);
616            let mut records = reader.records();
617	    let mut results: HashMap<char, u64> = HashMap::new();
618            loop {  
619                match records.next() {  
620                    Some(Ok(record)) => {
621                       for (k, _v) in &record.cds.attributes {
622                           match record.seq_features.get_sequence_faa(&k) {
623                                     Some(value) => { let seq_faa = value.to_string();
624				                      println!("id: {:?}", &k);
625				                      results = amino_counts(&seq_faa);
626                                                      println!(">{}|{}\n{:?}", &record.id, &k, results);
627                                                      },
628                                     _ => (),
629                                     };
630                       
631                           }
632                    },
633                    Some(Err(e)) => { println!("theres an err {:?}", e); },
634                    None => {
635                       println!("finished iteration");
636                             break; },
637                    }
638               }
639            return Ok(());
640   }
641   
642   #[test]
643   #[allow(dead_code)]
644   #[allow(unused_mut)]
645   #[allow(unused_variables)]
646   #[allow(unused_assignments)]
647   pub fn aromaticity() -> Result<(), anyhow::Error> {
648        // calculated as in biopython with aromaticity according to Lobry, 1994 as the relative freq of Phe+Trp+Tyr
649        let file_gbk = File::open("test_output.gbk")?;
650	let reader = Reader::new(file_gbk);
651	let mut records = reader.records();
652	let mut results: HashMap<char, f64> = HashMap::new();
653	loop {
654	   match records.next() {
655	      Some(Ok(record)) => {
656	          for (k, _v) in &record.cds.attributes {
657		     match record.seq_features.get_sequence_faa(&k) {
658		         Some(value) => {  let seq_faa = value.to_string();
659			                   results = amino_percentage(&seq_faa);
660					   let aromatic_aas = vec!['Y','W','F'];
661					   let aromaticity: f64 = aromatic_aas.iter()
662					       .filter_map(|&amino| results.get(&amino))
663					       .map(|&perc| perc / 100.0)
664					       .sum();
665					   println!("aromaticity for {} {} is {}",&record.id, &k, &aromaticity);
666					  },
667			_ => (),
668			};
669		   }
670	         },
671	    Some(Err(e)) => { println!("theres an error {:?}", e); },
672	    None => { println!("finished iteration");
673	              break;
674		    },
675	    }
676       }
677      return Ok(());
678   }
679   #[tokio::test]
680   pub async fn main() -> Result<(), anyhow::Error> {
681               let reader = fasta::Reader::new(File::open("test_hamming.aln")?);
682               let records: Vec<_> = reader.records().collect::<Result<_, _>>()?;
683               let sequences: Vec<String> = records
684                               .iter()
685                               .map(|rec| String::from_utf8_lossy(rec.seq()).to_string())
686                               .collect();
687               let ids: Vec<String> = records
688                               .iter()
689                               .map(|rec| rec.id().to_string())
690                               .collect();
691         let distances = hamming_matrix(&sequences).await?;
692         let _ = write_distances_csv(ids, distances, "hamming_dists.csv");
693         Ok(())
694         }
695}