ppgg/data_structures/InternalRep/
sequence_tape.rs

1use std::collections::HashMap; 
2use std::path::Path; 
3use std::{fs, panic}; 
4use std::io::Write;
5/// An abstraction for a sequence tape, where more than one sequence are annotated in an head to tail fashion 
6/// and a has map that stores the sequence name and the boundries, i.e. the start and the end point in the sequence
7/// are stored. 
8#[derive(Debug,Clone)]
9pub struct SequenceTape
10{
11    seq_str:String,
12    annotations:HashMap<String,(usize,usize)>
13}
14
15impl SequenceTape
16{
17    /// create a new sequence map from a seuqnece tape and an annotation hash map 
18    /// ## Example 
19    ///``` 
20    /// use ppgg_rust::data_structures::InternalRep::sequence_tape::SequenceTape; 
21    /// use std::collections::HashMap; 
22    /// let code_string="SEQ1_SEQ2_SEQ3_SEQ4_SEQ5_SEQ6".to_string(); 
23    /// let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
24    /// res_map.insert("1".to_string(), (0,4)); 
25    /// res_map.insert("2".to_string(), (5,9)); 
26    /// res_map.insert("3".to_string(), (10,14)); 
27    /// let seq_tape=SequenceTape::new(code_string, res_map).unwrap(); // this panic incase of length mismatch 
28    /// //check the correct mapping between the annotations
29    /// assert_eq!("SEQ1",seq_tape.get_seq(&"1".to_string()).unwrap()); 
30    /// assert_eq!("SEQ2",seq_tape.get_seq(&"2".to_string()).unwrap());
31    /// assert_eq!("SEQ3",seq_tape.get_seq(&"3".to_string()).unwrap());
32    ///``` 
33    pub fn new(seq_str:String,annotations:HashMap<String,(usize,usize)>)->Result<Self,String>
34    {
35        let max_index=SequenceTape::get_max_index(&annotations); 
36        if max_index>seq_str.len()
37        {
38            return Err(format!("Bad Tape Encountered, the provided maximum index is {} while tape length is {} ",max_index,seq_str.len())); 
39        }
40        Ok(SequenceTape{seq_str,annotations})
41    }
42    /// Write the sequence tap to a fasta file on disk 
43    /// ## Example 
44    ///``` 
45    /// use std::path::Path;
46    /// use ppgg_rust::data_structures::InternalRep::sequence_tape::SequenceTape; 
47    /// use std::collections::HashMap; 
48    /// let code_string="SEQ1_SEQ2_SEQ3_SEQ4_SEQ5_SEQ6".to_string(); 
49    /// let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
50    /// res_map.insert("1".to_string(), (0,4)); 
51    /// res_map.insert("2".to_string(), (5,9)); 
52    /// res_map.insert("3".to_string(), (10,14)); 
53    /// let seq_tape=SequenceTape::new(code_string, res_map).unwrap(); // this panic incase of length mismatch 
54    /// seq_tape.write_to_fasta(Path::new("test_data/test_file.fasta")).unwrap();
55    ///``` 
56    pub fn write_to_fasta(&self,output_file_name:&Path)->Result<(),String>
57    {
58        let mut file_handle=match fs::File::create(output_file_name)
59        {
60            Ok(file)=>file,
61            Err(err_msg)=>return Err(format!("Could not create {} because {}",output_file_name.display(),err_msg))
62        }; 
63        for (key,_) in self.annotations.iter()
64        {
65            write!(&mut file_handle,">{}\n{}\n", key, self.get_seq(key).unwrap()).unwrap();
66        }
67        Ok(())
68    }
69    /// ## Summary 
70    /// return the hash map containing the annotation hash map 
71    pub fn get_annotation(&self)->&HashMap<String,(usize,usize)>
72    {
73        &self.annotations
74    }
75    /// ## Summary 
76    /// return the sequence corresponding to the provided sequence name 
77    pub fn get_seq(&self,seq_name:&String)->Result<&str,String>
78    {
79        let res=match self.annotations.get(seq_name) {
80            Some(res)=>res,
81            None=>return Err(format!("The provided sequence name: {}, is not defined in the current table",seq_name))
82        };
83        let res_string=match panic::catch_unwind(||&self.seq_str[res.0..res.1])
84        {
85            Ok(res)=>res,
86            Err(why)=>panic!("The following error was encountered: {:?} \n, while the sequence is: {}\n",why,&self.seq_str), 
87        };
88        Ok(res_string)
89    }
90    /// return the sequence corresponding to the maximum index of the tape
91    /// ## Example 
92    ///``` 
93    /// use std::collections::HashMap;  
94    /// use ppgg_rust::data_structures::InternalRep::sequence_tape::SequenceTape; 
95    /// let code_string="SEQ1_SEQ2_SEQ3_SEQ4_SEQ5_SEQ6".to_string(); 
96    /// let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
97    /// res_map.insert("1".to_string(), (0,4)); 
98    /// res_map.insert("2".to_string(), (5,9)); 
99    /// res_map.insert("3".to_string(), (10,14)); 
100    /// res_map.insert("4".to_string(), (15,19)); 
101    /// res_map.insert("5".to_string(), (20,24)); 
102    /// res_map.insert("6".to_string(), (25,29)); 
103    /// let seq_tape=SequenceTape::new(code_string, res_map).unwrap(); 
104    /// assert_eq!(SequenceTape::get_max_index(seq_tape.get_annotation()),29);
105    ///``` 
106    pub fn get_max_index(annotation:&HashMap<String,(usize,usize)>)->usize
107    {
108        let mut max=0; 
109        for (_,value) in annotation.iter()
110        {
111            if value.1 >max
112            {
113                max=value.1
114            }
115        }
116        max
117    }
118}
119#[cfg(test)]
120pub mod test_sequence_tape_module
121{
122    use std::usize;
123    use super::*;
124    #[test]
125    pub fn test_sequence_tape()
126    {
127        let code_string="SEQ1_SEQ2_SEQ3_SEQ4_SEQ5_SEQ6".to_string(); 
128        let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
129        res_map.insert("1".to_string(), (0,4)); 
130        res_map.insert("2".to_string(), (5,9)); 
131        res_map.insert("3".to_string(), (10,14)); 
132        res_map.insert("4".to_string(), (15,19)); 
133        res_map.insert("5".to_string(), (20,24)); 
134        res_map.insert("6".to_string(), (25,29)); 
135        let seq_tape=SequenceTape::new(code_string, res_map).unwrap(); 
136        // check the correct mapping between the annotations
137        assert_eq!("SEQ1",seq_tape.get_seq(&"1".to_string()).unwrap()); 
138        assert_eq!("SEQ2",seq_tape.get_seq(&"2".to_string()).unwrap());
139        assert_eq!("SEQ3",seq_tape.get_seq(&"3".to_string()).unwrap());
140        assert_eq!("SEQ4",seq_tape.get_seq(&"4".to_string()).unwrap());
141        assert_eq!("SEQ5",seq_tape.get_seq(&"5".to_string()).unwrap());
142        assert_eq!("SEQ6",seq_tape.get_seq(&"6".to_string()).unwrap());
143    }
144    #[test]
145    #[should_panic]
146    pub fn test_sequence_tape2()
147    {
148        let code_string="SEQ1_SEQ2_SEQ3".to_string(); 
149        let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
150        res_map.insert("1".to_string(), (0,4)); 
151        res_map.insert("2".to_string(), (5,9)); 
152        res_map.insert("3".to_string(), (10,code_string.len()+1)); 
153       
154        SequenceTape::new(code_string, res_map).unwrap(); 
155    }
156    #[test]
157    pub fn test_fasta_write()
158    {
159        let code_string="SEQ1_SEQ2_SEQ3_SEQ4_SEQ5_SEQ6".to_string(); 
160        let mut res_map:HashMap<String,(usize,usize)>=HashMap::new();
161        res_map.insert("1".to_string(), (0,4)); 
162        res_map.insert("2".to_string(), (5,9)); 
163        res_map.insert("3".to_string(), (10,14)); 
164        res_map.insert("4".to_string(), (15,19)); 
165        res_map.insert("5".to_string(), (20,24)); 
166        res_map.insert("6".to_string(), (25,29)); 
167        let seq_tape=SequenceTape::new(code_string, res_map).unwrap(); 
168        seq_tape.write_to_fasta(Path::new("test_data/test_file.fasta")).unwrap();
169    }
170    
171}