microBioRust/
gbk.rs

1//! # A Genbank to GFF parser
2//!
3//!
4//! You are able to parse genbank and save as a GFF (gff3) format as well as extracting DNA sequences, gene DNA sequences (ffn) and protein fasta sequences (faa)
5//!
6//! You can also create new records and save as a genbank (gbk) format
7//!
8//! ## Detailed Explanation
9//!
10//!
11//! The Genbank parser contains:
12//!
13//! Records - a top level structure which consists of either one record (single genbank) or multiple instances of record (multi-genbank).
14//!
15//! Each Record contains:
16//!
17//! 1. A source, ```SourceAttributes```, construct(enum) of counter (source name), start, stop [of source or contig], organism, mol_type, strain, type_material, db_xref
18//! 2. Features, ```FeatureAttributes```, construct(enum) of counter (locus tag), gene (if present), product, codon start, strand, start, stop [of cds/gene]
19//! 3. Sequence features, ```SequenceAttributes```, construct(enum) of counter (locus tag), sequence_ffn (DNA gene sequence) sequence_faa (protein translation), strand, codon start, start, stop [cds/gene]
20//! 4. The DNA sequence of the whole record (or contig)
21//!
22//!  Example to extract and print all the protein sequence fasta, example using getters or get_ functionality
23//!
24//!
25//!```rust
26//! use clap::Parser;
27//! use std::fs::File;
28//! use microBioRust::gbk::Reader;
29//! use std::io;
30//!
31//! #[derive(Parser, Debug)]
32//! #[clap(author, version, about)]
33//! struct Arguments {
34//! #[clap(short, long)]
35//! filename: String,
36//! }
37//!
38//! pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
39//!            let args = Arguments::parse();
40//!            let file_gbk = File::open(args.filename)?;
41//!            let mut reader = Reader::new(file_gbk);
42//!            let mut records = reader.records();
43//!            loop {
44//!                //collect from each record advancing on a next record basis, count cds records
45//!                match records.next() {	
46//!                    Some(Ok(mut record)) => {
47//!		                     for (k, v) in &record.cds.attributes {
48//!		                         match record.seq_features.get_sequence_faa(&k) {
49//!		                             Some(value) => { let seq_faa = value.to_string();
50//!				                              println!(">{}|{}\n{}", &record.id, &k, seq_faa);
51//!						              },
52//!				             _ => (),
53//!				             };
54//!		                         }
55//!                                      },
56//!	               Some(Err(e)) => { println!("Error encountered - an err {:?}", e); },
57//!	               None => break,
58//!	               }
59//!                 }
60//!            return Ok(());
61//!  }
62//!```
63//!
64//!  Example to extract the protein sequences with simplified genbank! macro use
65//!
66//!```rust
67//! use clap::Parser;
68//! use std::fs::File;
69//! use microBioRust::gbk::Reader;
70//! use std::io;
71//! use microBioRust::genbank;
72//!
73//!
74//! #[derive(Parser, Debug)]
75//! #[clap(author, version, about)]
76//! struct Arguments {
77//! #[clap(short, long)]
78//! filename: String,
79//! }
80//!
81//! pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
82//!            let args = Arguments::parse();
83//!            let records = genbank!(&args.filename);
84//!            for record in records {
85//!	          for (k, v) in &record.cds.attributes {
86//!                  if let Some(seq) = record.seq_features.get_sequence_faa(k) {
87//!		        println!(">{}|{}\n{}", &record.id, &k, seq);
88//!                     }
89//!                  }
90//!            }
91//!            return Ok(());
92//!  }
93//!
94//!```
95//!  Example to save a provided multi- or single genbank file as a GFF file (by joining any multi-genbank)
96//!
97//! ```rust
98//!    use microBioRust::gbk::gff_write;
99//!    use microBioRust::gbk::Reader;
100//!    use microBioRust::gbk::Record;
101//!    use std::collections::BTreeMap;
102//!    use std::fs::File;
103//!    use clap::Parser;
104//!    use std::io;
105//!
106//!   #[derive(Parser, Debug)]
107//!   #[clap(author, version, about)]
108//!   struct Arguments {
109//!   #[clap(short, long)]
110//!   filename: String,
111//!   }
112//!
113//!    pub fn genbank_to_gff() -> io::Result<()> {
114//!        let args = Arguments::parse();
115//!        let file_gbk = File::open(&args.filename)?;
116//!        let prev_start: u32 = 0;
117//!        let mut prev_end: u32 = 0;
118//!        let mut reader = Reader::new(file_gbk);
119//!        let mut records = reader.records();
120//!        let mut read_counter: u32 = 0;
121//!        let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
122//!        let mut record_vec: Vec<Record> = Vec::new();
123//!        loop {  
124//!            match records.next() {	
125//!                Some(Ok(mut record)) => {
126//!	               println!("next record");
127//!                    println!("Record id: {:?}", record.id);
128//!		       let source = record.source_map.source_name.clone().expect("issue collecting source name");
129//!		       let beginning = match record.source_map.get_start(&source) {
130//!		                        Some(value) => value.get_value(),
131//!				        _ => 0,
132//!					};
133//!		       let ending = match record.source_map.get_stop(&source) {
134//!		                        Some(value) => value.get_value(),
135//!					_ => 0,
136//!					};
137//!		       if ending + prev_end < beginning + prev_end {
138//!		          println!("debug: end value smaller is than the start {:?}", beginning);
139//!		          }
140//!		       seq_region.insert(source, (beginning + prev_end, ending + prev_end));
141//!		       record_vec.push(record);
142//!                    // Add additional fields to print if needed
143//!		       read_counter+=1;
144//!		       prev_end+=ending; // create the joined record if there are multiple
145//!                    },
146//!	            Some(Err(e)) => { println!("theres an err {:?}", e); },
147//!	            None => {
148//!	               println!("finished iteration");
149//!	                     break; },
150//!	            }
151//!            }
152//!        let output_file = format!("{}.gff", &args.filename);
153//!        if std::path::Path::new(&output_file).exists() {
154//!           println!("Deleting existing file: {}", &output_file);
155//!           std::fs::remove_file(&output_file).expect("NOOO");
156//!           }
157//!        gff_write(seq_region.clone(), record_vec, &output_file, true);
158//!        println!("Total records processed: {}", read_counter);
159//!        return Ok(());
160//!    }
161//!```
162//! Example to create a completely new record, use of setters or set_ functionality
163//!
164//! To write into GFF format requires gff_write(seq_region, record_vec, filename, true or false)
165//!
166//! The seq_region is the region of interest to save with name and DNA coordinates such as ``` seqregion.entry("source_1".to_string(), (1,897))```
167//! This makes it possible to save the whole file or to subset it 
168//!
169//! record_vec is a list of the records.  If there is only one record, include this as a vec using ``` vec![record] ```
170//!
171//! The boolean true/false describes whether the DNA sequence should be included in the GFF3 file
172//!
173//! To write into genbank format requires gbk_write(seq_region, record_vec, filename), no true or false since genbank format will include the DNA sequence
174//!
175//!
176//! ```rust
177//!    use microBioRust::gbk::gff_write;
178//!    use microBioRust::gbk::RangeValue;
179//!    use microBioRust::gbk::Record;
180//!    use std::fs::File;
181//!    use std::collections::BTreeMap;
182//!
183//!     pub fn create_new_record() -> Result<(), anyhow::Error> {
184//!         let filename = format!("new_record.gff");
185//!         if std::path::Path::new(&filename).exists() {
186//!           std::fs::remove_file(&filename)?;
187//!           }
188//!	    let mut record = Record::new();
189//!	    let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
190//!         //example from E.coli K12
191//!	    seq_region.insert("source_1".to_string(), (1,897));
192//!         //Add the source into SourceAttributes
193//!         record.source_map
194//!	         .set_counter("source_1".to_string())
195//!	         .set_start(RangeValue::Exact(1))
196//!	         .set_stop(RangeValue::Exact(897))
197//!	         .set_organism("Escherichia coli".to_string())
198//!	         .set_mol_type("DNA".to_string())
199//!	         .set_strain("K-12 substr. MG1655".to_string())
200//!		 .set_type_material("type strain of Escherichia coli K12".to_string())
201//!	         .set_db_xref("PRJNA57779".to_string());
202//!         //Add the features into FeatureAttributes, here we are setting two features, i.e. coding sequences or genes
203//!	    record.cds
204//!                  .set_counter("b3304".to_string())
205//!                  .set_start(RangeValue::Exact(1))
206//!                  .set_stop(RangeValue::Exact(354))
207//!                  .set_gene("rplR".to_string())
208//!                  .set_product("50S ribosomal subunit protein L18".to_string())
209//!                  .set_codon_start(1)
210//!                  .set_strand(-1);
211//!	    record.cds
212//!                  .set_counter("b3305".to_string())
213//!                  .set_start(RangeValue::Exact(364))
214//!                  .set_stop(RangeValue::Exact(897))
215//!                  .set_gene("rplF".to_string())
216//!                  .set_product("50S ribosomal subunit protein L6".to_string())
217//!                  .set_codon_start(1)
218//!                  .set_strand(-1);
219//!         //Add the sequences for the coding sequence (CDS) into SequenceAttributes
220//!	    record.seq_features
221//!	         .set_counter("b3304".to_string())
222//!		 .set_start(RangeValue::Exact(1))
223//!                 .set_stop(RangeValue::Exact(354))
224//!                 .set_sequence_ffn("ATGGATAAGAAATCTGCTCGTATCCGTCGTGCGACCCGCGCACGCCGCAAGCTCCAGGAG
225//!CTGGGCGCAACTCGCCTGGTGGTACATCGTACCCCGCGTCACATTTACGCACAGGTAATT
226//!GCACCGAACGGTTCTGAAGTTCTGGTAGCTGCTTCTACTGTAGAAAAAGCTATCGCTGAA
227//!CAACTGAAGTACACCGGTAACAAAGACGCGGCTGCAGCTGTGGGTAAAGCTGTCGCTGAA
228//!CGCGCTCTGGAAAAAGGCATCAAAGATGTATCCTTTGACCGTTCCGGGTTCCAATATCAT
229//!GGTCGTGTCCAGGCACTGGCAGATGCTGCCCGTGAAGCTGGCCTTCAGTTCTAA".to_string())
230//!                 .set_sequence_faa("MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAE
231//!QLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF".to_string())
232//!                 .set_codon_start(1)
233//!                 .set_strand(-1);
234//!	    record.seq_features
235//!	         .set_counter("bb3305".to_string())
236//!		 .set_start(RangeValue::Exact(364))
237//!                 .set_stop(RangeValue::Exact(897))
238//!                 .set_sequence_ffn("ATGTCTCGTGTTGCTAAAGCACCGGTCGTTGTTCCTGCCGGCGTTGACGTAAAAATCAAC
239//!GGTCAGGTTATTACGATCAAAGGTAAAAACGGCGAGCTGACTCGTACTCTCAACGATGCT
240//!GTTGAAGTTAAACATGCAGATAATACCCTGACCTTCGGTCCGCGTGATGGTTACGCAGAC
241//!GGTTGGGCACAGGCTGGTACCGCGCGTGCCCTGCTGAACTCAATGGTTATCGGTGTTACC
242//!GAAGGCTTCACTAAGAAGCTGCAGCTGGTTGGTGTAGGTTACCGTGCAGCGGTTAAAGGC
243//!AATGTGATTAACCTGTCTCTGGGTTTCTCTCATCCTGTTGACCATCAGCTGCCTGCGGGT
244//!ATCACTGCTGAATGTCCGACTCAGACTGAAATCGTGCTGAAAGGCGCTGATAAGCAGGTG
245//!ATCGGCCAGGTTGCAGCGGATCTGCGCGCCTACCGTCGTCCTGAGCCTTATAAAGGCAAG
246//!GGTGTTCGTTACGCCGACGAAGTCGTGCGTACCAAAGAGGCTAAGAAGAAGTAA".to_string())
247//!                 .set_sequence_faa("MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHADNTLTFGPRDGYAD
248//!GWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSLGFSHPVDHQLPAG
249//!ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK".to_string())
250//!                 .set_codon_start(1)
251//!                 .set_strand(-1);
252//!         //Add the full sequence of the entire record into the record.sequence
253//!	    record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA
254//!TTGGAACCCGGAACGGTCAAAGGATACATCTTTGATGCCTTTTTCCAGAGCGCGTTCAGC
255//!GACAGCTTTACCCACAGCTGCAGCCGCGTCTTTGTTACCGGTGTACTTCAGTTGTTCAGC
256//!GATAGCTTTTTCTACAGTAGAAGCAGCTACCAGAACTTCAGAACCGTTCGGTGCAATTAC
257//!CTGTGCGTAAATGTGACGCGGGGTACGATGTACCACCAGGCGAGTTGCGCCCAGCTCCTG
258//!GAGCTTGCGGCGTGCGCGGGTCGCACGACGGATACGAGCAGATTTCTTATCCATAGTGTT
259//!ACCTTACTTCTTCTTAGCCTCTTTGGTACGCACGACTTCGTCGGCGTAACGAACACCCTT
260//!GCCTTTATAAGGCTCAGGACGACGGTAGGCGCGCAGATCCGCTGCAACCTGGCCGATCAC
261//!CTGCTTATCAGCGCCTTTCAGCACGATTTCAGTCTGAGTCGGACATTCAGCAGTGATACC
262//!CGCAGGCAGCTGATGGTCAACAGGATGAGAGAAACCCAGAGACAGGTTAATCACATTGCC
263//!TTTAACCGCTGCACGGTAACCTACACCAACCAGCTGCAGCTTCTTAGTGAAGCCTTCGGT
264//!AACACCGATAACCATTGAGTTCAGCAGGGCACGCGCGGTACCAGCCTGTGCCCAACCGTC
265//!TGCGTAACCATCACGCGGACCGAAGGTCAGGGTATTATCTGCATGTTTAACTTCAACAGC
266//!ATCGTTGAGAGTACGAGTCAGCTCGCCGTTTTTACCTTTGATCGTAATAACCTGACCGTT
267//!GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACAT".to_string();
268//!           gff_write(seq_region, vec![record], &filename, true);
269//!	   return Ok(());
270//!      }
271//!```
272//!
273
274use std::io::{self, Write};
275use std::fs;
276use regex::Regex;
277use itertools::Itertools;
278use std::vec::Vec;
279use std::str;
280use std::convert::AsRef;
281use protein_translate::translate;
282use std::path::Path;
283use bio::alphabets::dna::revcomp;
284use anyhow::anyhow;
285use std::collections::BTreeMap;
286use std::fs::{OpenOptions, File};
287use anyhow::Context;
288use std::collections::HashSet;
289use paste::paste;
290use std::convert::TryInto;
291use chrono::prelude::*;
292
293
294/// macro to create get_ functions for the values
295#[macro_export]
296macro_rules! create_getters {
297    // macro for creating get methods
298    ($struct_name:ident, $attributes:ident, $enum_name:ident, $( $field:ident { value: $type:ty } ),* ) => {
299		impl $struct_name {
300            $(
301	        // creates a get method for each of the fields in the SourceAttributes, FeatureAttributes and SequenceAttributes
302	        paste! {
303                  pub fn [<get_$field:snake>](&self, key: &str) -> Option<&$type> {
304                    // Get the HashSet for the key (e.g., "source_1")
305                    self.$attributes.get(key).and_then(|set| {
306                        // Iterate over the HashSet to find the correct SourceAttributes value
307                        set.iter().find_map(|attr| {
308                            if let $enum_name::$field { value } = attr {
309                                Some(value)
310                            } else {
311                                None
312                            }
313                        })
314                    })
315                }
316	      }
317            )*
318        }
319    };
320}
321
322/// macro to create the set_ functions for the values in a Builder format
323#[macro_export] 
324macro_rules! create_builder {
325    // Macro for creating attribute builders for SourceAttributes, FeatureAttributes and SequenceAttributes
326    ($builder_name:ident, $attributes:ident, $enum_name:ident, $counter_name:ident, $( $field:ident { value: $type:ty } ),* ) => {
327        impl $builder_name {
328            pub fn new() -> Self {
329                $builder_name {
330                    $attributes: BTreeMap::new(),
331                    $counter_name: None,
332                }
333            }
334            //sets the key for the BTreeMap 
335            pub fn set_counter(&mut self, counter: String) -> &mut Self {
336                self.$counter_name = Some(counter);
337		self
338            }    
339            //function to insert the fields from the enum into the attributes
340            pub fn insert_to(&mut self, value: $enum_name) {
341	        if let Some(counter) = &self.$counter_name {
342		    self.$attributes
343		        .entry(counter.to_string())
344                        .or_insert_with(HashSet::new)
345                        .insert(value);
346		    }
347		else {
348		    panic!("Counter key not set"); // Needs better error handling
349		    }
350            }
351            // function to set each of the alternative fields in the builder
352            $(
353	      paste! { 
354	        pub fn [<set_$field:snake>](&mut self, value: $type) -> &mut Self {
355	           self.insert_to($enum_name::$field { value });
356		   self
357	           }
358		}
359	    )*
360	    // build function to the attributes
361	    pub fn build(self) -> BTreeMap<String, HashSet<$enum_name>> {
362	        self.$attributes
363            }
364	    // function to iterate immutably through the BTreeMap as required
365	    pub fn iter_sorted(&self) -> std::collections::btree_map::Iter<String, HashSet<$enum_name>> {
366	        self.$attributes.iter()
367	    }
368	    //default function
369	    pub fn default() -> Self {
370	        $builder_name {
371		    $attributes: BTreeMap::new(),
372		    $counter_name: None,
373		    }
374		}
375            }
376     };
377}
378
379#[macro_export]
380macro_rules! genbank {
381    ($filename:expr) => {{
382        use std::fs::File;
383        use std::io::BufReader;
384        let file = File::open($filename)
385            .unwrap_or_else(|e| panic!("Could not open file {}: {}", $filename, e));
386        let mut reader = $crate::gbk::Reader::new(file);
387        let mut vec = Vec::new();
388        for rec in reader.records() {
389            match rec {
390                Ok(r) => { println!("this is r {:?}", &r);
391		           vec.push(r);
392			   }
393                Err(e) => panic!("Error reading record: {:?}", e),
394            }
395        }
396        vec
397    }};
398}
399
400
401//const MAX_GBK_BUFFER_SIZE: usize = 512;
402/// A Gbk reader.
403
404#[derive(Debug)]
405#[allow(unused_mut)]
406pub struct Records<B>
407where
408    B: io::BufRead,
409{
410    reader: Reader<B>,
411    error_has_occurred: bool,
412}
413
414impl<B> Records<B>
415where
416    B: io::BufRead,
417{
418    #[allow(unused_mut)]
419    pub fn new(mut reader: Reader<B>) -> Self {
420        Records {
421            reader: reader,
422            error_has_occurred: false,
423        }
424    }
425}
426
427impl<B> Iterator for Records<B>
428where
429    B: io::BufRead,
430{
431    type Item = Result<Record, anyhow::Error>;
432
433    fn next(&mut self) -> Option<Result<Record, anyhow::Error>> {
434            if self.error_has_occurred {
435	        println!("error was encountered in iteration");
436		None
437	        } else {
438                let mut record = Record::new();
439                match self.reader.read(&mut record) {
440	            Ok(_) => { if record.is_empty() {
441		       None }
442		       else {
443		           Some(Ok(record))
444			   }
445		        }
446		    Err(err) => {
447		        //println!("we encountered an error {:?}", &err);
448		        self.error_has_occurred = true;
449		        Some(Err(anyhow!("next record read error {:?}",err)))
450		        }
451                    }
452                }
453     }
454}
455
456pub trait GbkRead {
457    fn read(&mut self, record: &mut Record) -> Result<Record, anyhow::Error>;
458}
459
460///per line reader for the file 
461#[derive(Debug, Default)]
462pub struct Reader<B> {
463    reader: B,
464    line_buffer: String,
465}
466
467impl Reader<io::BufReader<fs::File>> {
468    /// Read Gbk from given file path in given format.
469    pub fn from_file<P: AsRef<Path> + std::fmt::Debug>(path: P) -> anyhow::Result<Self> {
470        fs::File::open(&path)
471            .map(Reader::new)
472            .with_context(|| format!("Failed to read Gbk from {:#?}", path))
473    }
474}
475
476impl<R> Reader<io::BufReader<R>>
477where
478     R: io::Read,
479{
480    //// Create a new Gbk reader given an instance of `io::Read` in given format
481    pub fn new(reader: R) -> Self {
482        Reader {
483            reader: io::BufReader::new(reader),
484	    line_buffer: String::new(),
485        }
486    }
487}
488
489impl<B> Reader<B>
490where
491    B: io::BufRead,
492{
493    pub fn from_bufread(bufreader: B) -> Self {
494        Reader {
495	     reader: bufreader,
496	     line_buffer: String::new(),
497	}
498    }
499    //return an iterator over the records of the genbank file
500    pub fn records(self) -> Records<B> {
501          Records {
502	   reader: self,
503	   error_has_occurred: false,
504	}
505    }
506}
507
508///main gbk parser 
509impl<'a, B> GbkRead for Reader<B>
510where
511    B: io::BufRead,
512{
513    #[allow(unused_mut)]
514    #[allow(unused_variables)]
515    #[allow(unused_assignments)]
516    fn read(&mut self, record: &mut Record) -> Result<Record, anyhow::Error> {
517        record.rec_clear();
518	//println!("reading new record");
519	//initialise variables
520	let mut sequences = String::new();
521	let mut source_map = SourceAttributeBuilder::new();
522        let mut cds = FeatureAttributeBuilder::new();
523        let mut seq_features = SequenceAttributeBuilder::new();
524	let mut cds_counter: i32 = 0;
525	let mut source_counter: i32 = 0;
526	let mut prev_end: u32 = 0;
527	let mut organism = String::new();
528	let mut mol_type = String::new();
529	let mut strain = String::new();
530	let mut source_name = String::new();
531	let mut type_material = String::new();
532	let mut theend: u32 = 0;
533	let mut thestart: u32 = 0;
534	let mut db_xref = String::new();
535	//check if there are any more lines, if not return the record as is
536    	if self.line_buffer.is_empty() {
537	    self.reader.read_line(&mut self.line_buffer)?;
538	    if self.line_buffer.is_empty() {
539	        return Ok(record.to_owned());
540	        }
541            }
542	//main loop to populate the attributes and iterate through the file
543	'outer: while !self.line_buffer.is_empty() {
544	    //println!("is line buffer {:?}", &self.line_buffer);
545	    //collect the header fields
546	    if self.line_buffer.starts_with("LOCUS") {
547			record.rec_clear();
548	            	let mut header_fields: Vec<&str> = self.line_buffer.split_whitespace().collect();
549	                let mut header_iter = header_fields.iter();
550	                header_iter.next();
551	                record.id = header_iter.next()
552						.ok_or_else(|| anyhow::anyhow!("missing record id"))? // Get &str or error
553						.to_string();
554	                let lens = header_iter.next()
555						.ok_or_else(|| anyhow::anyhow!("missing record length"))? // Get &str or error
556						.to_string();
557	                record.length = lens.trim().parse::<u32>()?;
558			self.line_buffer.clear();
559			}
560	    //collect the source fields and populate the source_map and source_attributes
561	    if self.line_buffer.starts_with("     source") {
562	        let re = Regex::new(r"([0-9]+)[[:punct:]]+([0-9]+)")?;
563		let location = re.captures(&self.line_buffer).ok_or_else(|| anyhow::anyhow!("missing location"))?;
564		let start = &location[1];
565		let end = &location[2];
566		thestart = start.trim().parse::<u32>()?;
567		source_counter+=1;
568		source_name = format!("source_{}_{}",record.id,source_counter).to_string();
569		thestart += prev_end;
570		theend = end.trim().parse::<u32>()? + prev_end;
571		//println!("so the start and end are {:?} {:?}", &thestart, &theend);
572		loop {
573		    self.line_buffer.clear();
574		    self.reader.read_line(&mut self.line_buffer)?;
575		    if self.line_buffer.starts_with("     CDS") {
576		            //println!("this source name {:?} start {:?} end {:?} organism {:?} mol_type {:?} strain {:?} type_material {:?} db_xref {:?}", &source_name,&thestart, &theend, &organism, &mol_type, &strain, &type_material, &db_xref);
577		            record.source_map
578			       .set_counter(source_name.to_string())
579			       .set_start(RangeValue::Exact(thestart))
580			       .set_stop(RangeValue::Exact(theend))
581			       .set_organism(organism.clone())
582			       .set_mol_type(mol_type.clone())
583			       .set_strain(strain.clone())
584			      // culture_collection.clone()
585			       .set_type_material(type_material.clone())
586			       .set_db_xref(db_xref.clone());
587		            continue 'outer;
588			}
589		    if self.line_buffer.contains("/organism") {
590		        let org: Vec<&str> = self.line_buffer.split('\"').collect();
591			organism = org[1].to_string();
592			}
593		    if self.line_buffer.contains("/mol_type") {
594		    	let mol: Vec<&str> = self.line_buffer.split('\"').collect();
595			mol_type = mol[1].to_string();
596			}
597		    if self.line_buffer.contains("/strain") {
598		    	let stra: Vec<&str> = self.line_buffer.split('\"').collect();
599			strain = stra[1].to_string();
600			}
601		//    if self.line_buffer.contains("/culture_collection") {
602		//        let cc: Vec<&str> = self.line_buffer.split('\"').collect();
603	//		culture_collection = cc[1].to_string();
604	//		}
605		    if self.line_buffer.contains("/type_material") {
606		        let mat: Vec<&str> = self.line_buffer.split('\"').collect();
607			type_material = mat[1].to_string();
608			}
609		    if self.line_buffer.contains("/db_xref") {
610		        let db: Vec<&str> = self.line_buffer.split('\"').collect();
611			db_xref = db[1].to_string();
612			}
613		    }
614		}
615	    //populate the FeatureAttributes and the coding sequence annotation
616	    if self.line_buffer.starts_with("     CDS") {
617	        let mut startiter: Vec<_> = Vec::new();
618		let mut enditer: Vec<_> = Vec::new();
619		let mut thestart: u32 = 0;
620		let mut thend: u32 = 0;
621		let mut joined: bool = false;
622	        //gather the feature coordinates
623		let joined = if self.line_buffer.contains("join") { true } else { false };
624	        let re = Regex::new(r"([0-9]+)[[:punct:]]+([0-9]+)")?;
625		//let matches: Vec<&regex::Captures> = re.captures_iter(&self.line_buffer).collect();
626		for cap in re.captures_iter(&self.line_buffer) {
627		   cds_counter+=1;
628		   thestart = cap[1].parse().expect("failed to match and parse numerical start");
629		   theend = cap[2].parse().expect("failed to match and parse numerical end");
630		   startiter.push(thestart);
631		   enditer.push(theend);
632		   }
633		let mut gene = String::new();
634		let mut product = String::new();
635		let strand: i8 = if self.line_buffer.contains("complement") {-1} else {1};
636                let mut locus_tag = String::new();
637                let mut codon_start: u8 = 1;
638		//loop to populate the feature attributes, when complete it calls to the outer loop directly to prevent reading a new line into self.line_buffer
639		loop {
640		        self.line_buffer.clear();
641			self.reader.read_line(&mut self.line_buffer)?;
642                        if self.line_buffer.contains("/locus_tag=") {
643                            let loctag: Vec<&str> = self.line_buffer.split('\"').collect();
644                            locus_tag = loctag[1].to_string();
645			    //println!("designated locus tag {:?}", &locus_tag);
646                            }
647                        if self.line_buffer.contains("/codon_start") {
648                            let codstart: Vec<&str> = self.line_buffer.split('=').collect();
649                            let valstart = codstart[1].trim().parse::<u8>()?;
650                            codon_start = valstart;
651			    //println!("designated codon start {:?} {:?}", &codon_start, &locus_tag);
652                            }
653                        if self.line_buffer.contains("/gene=") {
654		            let gen: Vec<&str> = self.line_buffer.split('\"').collect();
655			    gene = gen[1].to_string();
656			    //println!("gene designated {:?} {:?}", &gene, &locus_tag);
657			    }
658			if self.line_buffer.contains("/product") {
659		            let prod: Vec<&str> = self.line_buffer.split('\"').collect();
660			    product = substitute_odd_punctuation(prod[1].to_string())?;
661			    //println!("designated product {:?} {:?}", &product, &locus_tag);
662			    }
663			if self.line_buffer.starts_with("     CDS") || self.line_buffer.starts_with("ORIGIN") || self.line_buffer.starts_with("     gene") || self.line_buffer.starts_with("     misc_feature") {
664			    if locus_tag.is_empty() {
665			         locus_tag = format!("CDS_{}",cds_counter).to_string();
666				 }
667			    if joined {
668				 //println!("currently the start is {:?} and the stop is {:?}", &startiter, &enditer);
669				 for (i, m) in startiter.iter().enumerate() {
670				      let loc_tag = format!("{}_{}",locus_tag.clone(),i);
671                                      //check we may need to add or subtract one to m
672				      record.cds
673				          .set_counter(loc_tag)
674					  .set_start(RangeValue::Exact(*m))
675					  .set_stop(RangeValue::Exact(enditer[i]))
676					  .set_gene(gene.to_string())
677					  .set_product(product.to_string())
678					  .set_codon_start(codon_start)
679					  .set_strand(strand);
680				      }
681				 continue 'outer;
682			         }
683			    else {
684			         record.cds
685			             .set_counter(locus_tag.clone())
686				     .set_start(RangeValue::Exact(thestart))
687				     .set_stop(RangeValue::Exact(theend))
688				     .set_gene(gene.to_string())
689				     .set_product(product.to_string())
690				     .set_codon_start(codon_start)
691				     .set_strand(strand);
692			    continue 'outer;
693			    }
694			}
695	     }     }
696	    //check if we have reached the DNA sequence section and populate the record sequences field if so.  Returns the record on finding end of record mark
697	    if self.line_buffer.starts_with("ORIGIN") {
698	        let mut sequences = String::new();
699	        let result_seq = loop {  
700		     self.line_buffer.clear();
701		     self.reader.read_line(&mut self.line_buffer)?;
702                     if self.line_buffer.starts_with("//") {
703		         break sequences;
704                     } else {
705	                 let s: Vec<&str> = self.line_buffer.split_whitespace().collect();
706		         let s = &s[1..];
707		         let sequence = s.iter().join("");
708		         sequences.push_str(&sequence);
709                         }     
710	             };
711		record.sequence = result_seq.to_string();
712		let mut iterablecount: u32 = 0;
713		//Fields are completed and populated for the FeatureAttributes, collect and populate the SequenceAttributes fields
714	        for (key,val) in record.cds.iter_sorted() {
715	              let (mut a, mut b, mut c, mut d): (Option<u32>, Option<u32>, Option<i8>, Option<u8>) = (None, None, None, None);
716	              for value in val {
717		          //println!("this is key {:?} value {:?}", &key, &value);
718	                  match value {
719		               FeatureAttributes::Start { value } => a = match value {
720		                   RangeValue::Exact(v) => Some(*v),
721                                   RangeValue::LessThan(v) => Some(*v), // Assign the value even if it's <value
722                                   RangeValue::GreaterThan(v) => Some(*v), //Assign the value even it's > value
723                                   },
724		               FeatureAttributes::Stop { value } => b = match value {
725		                   RangeValue::Exact(v) => Some(*v),
726                                   RangeValue::LessThan(v) => Some(*v), // Assign the value even if it's <value
727                                   RangeValue::GreaterThan(v) => Some(*v), //Assign the value even if it's > value
728                                   },
729		               FeatureAttributes::Strand { value } => c = match value {
730		                  value => Some(*value),
731			          },
732		               FeatureAttributes::CodonStart { value } => d = match value {
733		                  value => Some(value.clone()),
734			          },
735		               _ => (),
736		               }
737	                 }
738	                 let sta = a.map(|o| o as usize)
739						 .ok_or(anyhow!("No value for start"))?;
740	                 let sto = b.map(|t| t as usize)
741						 .ok_or(anyhow!("No value for stop"))? - 1;
742	                 let stra = c.map(|u| u as i8)
743						 .ok_or(anyhow!("No value for strand"))?;
744	                 let cod = d.map(|v| v as usize - 1)
745						 .ok_or(anyhow!("No value for strand"))?;
746
747	                 let star = sta.try_into()?;
748	                 let stow = sto.try_into()?;
749	                 let codd = cod.try_into()?;
750	                 let mut sliced_sequence: &str = "";
751	                 //collects the DNA sequence and translations on the correct strand
752	                 if stra == -1 {
753	                    if cod > 1 {
754			       //println!("reverse strand coding start more than one {:?}", &iterablecount);
755			       if sto + 1 <= record.sequence.len() {
756		                   sliced_sequence = &record.sequence[sta+cod..sto+1];
757				   }
758				   else {
759				   sliced_sequence = &record.sequence[sta+cod..sto];
760				   }
761		               }
762		            else {
763			       //println!("record sta {:?} sto {:?} cod {:?} stra {:?} record.seq length {:?}", &sta, &sto, &cod, &stra, &record.sequence.len());
764			       //println!("sliced sta {:?} sliced sto {:?} record.id {:?}", sta, sto, &record.id);
765			       //println!("iterable count is {:?} reverse strand codon start one", &iterablecount);
766			       if sto + 1 <= record.sequence.len() {
767	                                        sliced_sequence = &record.sequence[sta..sto+1];
768				                  }
769			       else {
770			           sliced_sequence = &record.sequence[sta..sto];
771				   }
772			       //println!("iterable count after is {:?}", &iterablecount);
773		               }
774	                 let cds_char = sliced_sequence;
775		         let prot_seq =  translate(&revcomp(cds_char.as_bytes()));
776		         let parts: Vec<&str> = prot_seq.split('*').collect();
777		         record.seq_features
778			             .set_counter(key.to_string())
779				     .set_start(RangeValue::Exact(star))
780                                     .set_stop(RangeValue::Exact(stow))
781                                     .set_sequence_ffn(cds_char.to_string())
782                                     .set_sequence_faa(parts[0].to_string())
783                                     .set_codon_start(codd)
784                                     .set_strand(stra);
785	                 } else {
786	                      if cod > 1 {
787			          //println!("forward strand codon value more than one cnt {:?}", &iterablecount);
788		                  sliced_sequence = &record.sequence[sta+cod-1..sto];
789		                  }
790		              else {
791			          //println!("forward strand codon value one cnt {:?}", &iterablecount);
792		                  sliced_sequence = &record.sequence[sta-1..sto];
793		                  }
794		         let cds_char = sliced_sequence;
795		         let prot_seq = translate(cds_char.as_bytes());
796		         let parts: Vec<&str> = prot_seq.split('*').collect();
797		         record.seq_features
798			             .set_counter(key.to_string())
799				     .set_start(RangeValue::Exact(star))
800                                     .set_stop(RangeValue::Exact(stow))
801                                     .set_sequence_ffn(cds_char.to_string())
802                                     .set_sequence_faa(parts[0].to_string())
803                                     .set_codon_start(codd)
804                                     .set_strand(stra);
805		                      }
806                              }
807                 	//return the record when completed
808		        return Ok(record.to_owned());
809                        }
810	 //clear the line buffer and read the next to continue back to the outer loop
811	 self.line_buffer.clear();
812	 self.reader.read_line(&mut self.line_buffer)?;
813        }
814       Ok(record.to_owned())
815     }
816}
817
818///stores a value for start or stop (end) which can be denoted as a < value or > value.
819#[derive(Debug, Hash, PartialEq, Eq, Clone)]
820pub enum RangeValue {
821    Exact(u32),
822    LessThan(u32),
823    GreaterThan(u32),
824}
825
826//trait for rangevalue
827impl RangeValue {
828    pub fn get_value(&self) -> u32 {
829        match self {
830            RangeValue::Exact(value) => *value,
831            RangeValue::LessThan(value) => *value,
832            RangeValue::GreaterThan(value) => *value,
833        }
834    }
835}
836
837//stores the details of the source features in genbank (contigs)
838#[derive(Debug, Eq, PartialEq, Hash, Clone)]
839pub enum SourceAttributes {
840    Start { value: RangeValue },
841    Stop { value: RangeValue },
842    Organism { value: String },
843    MolType { value: String},
844    Strain { value: String},
845    CultureCollection { value: String},
846    TypeMaterial { value: String},
847    DbXref { value:String}
848}
849
850//macro for creating the getters
851create_getters!(
852    SourceAttributeBuilder,
853    source_attributes,
854    SourceAttributes,
855    Start { value: RangeValue },
856    Stop { value: RangeValue },
857    Organism { value: String },
858    MolType { value: String},
859    Strain { value: String},
860    // CultureCollection { value: String},
861    TypeMaterial { value: String},
862    DbXref { value:String}
863);
864
865///builder for the source information on a per record basis
866#[derive(Debug, Default, Clone)]
867pub struct SourceAttributeBuilder {
868    pub source_attributes: BTreeMap<String, HashSet<SourceAttributes>>,
869    pub source_name: Option<String>,
870}
871
872impl SourceAttributeBuilder {
873    // Method to set source name
874    pub fn set_source_name(&mut self, name: String) {
875        self.source_name = Some(name);
876    }
877
878    // Method to get source name
879    pub fn get_source_name(&self) -> Option<&String> {
880        self.source_name.as_ref()
881    }
882
883    // Method to add source attributes
884    pub fn add_source_attribute(&mut self, key: String, attribute: SourceAttributes) {
885        self.source_attributes
886            .entry(key)
887            .or_insert_with(HashSet::new)
888            .insert(attribute);
889    }
890
891    // Method to retrieve source attributes for a given key
892    pub fn get_source_attributes(&self, key: &str) -> Option<&HashSet<SourceAttributes>> {
893        self.source_attributes.get(key)
894    }
895}
896
897
898create_builder!(
899    SourceAttributeBuilder,
900    source_attributes,
901    SourceAttributes,
902    source_name,
903    Start { value: RangeValue },
904    Stop { value: RangeValue },
905    Organism { value: String },
906    MolType { value: String},
907    Strain { value: String},
908    // CultureCollection { value: String},
909    TypeMaterial { value: String},
910    DbXref { value:String}
911);
912
913///attributes for each feature, cds or gene
914#[derive(Debug, Eq, Hash, PartialEq, Clone)]
915pub enum FeatureAttributes {
916    Start { value: RangeValue },
917    Stop { value: RangeValue },
918    Gene { value: String },
919    Product { value: String },
920    CodonStart { value: u8 },
921    Strand { value: i8 },
922 //   ec_number { value: String }
923}
924
925
926create_getters!(
927    FeatureAttributeBuilder,
928    attributes,
929    FeatureAttributes,
930    Start { value: RangeValue },
931    Stop { value: RangeValue },
932    Gene { value: String },
933    Product { value: String },
934    CodonStart { value: u8 },
935    Strand { value: i8 }
936);
937
938///builder for the feature information on a per coding sequence (CDS) basis
939#[derive(Debug, Default, Clone)]
940pub struct FeatureAttributeBuilder {
941    pub attributes: BTreeMap<String, HashSet<FeatureAttributes>>,
942    locus_tag: Option<String>,
943}
944
945create_builder!(
946    FeatureAttributeBuilder,
947    attributes,
948    FeatureAttributes,
949    locus_tag,
950    Start { value: RangeValue },
951    Stop { value: RangeValue },
952    Gene { value: String },
953    Product { value: String },
954    CodonStart { value: u8 },
955    Strand { value: i8 }
956);
957
958///stores the sequences of the coding sequences (genes) and proteins. Also stores start, stop, codon_start and strand information
959#[derive(Debug, Eq, PartialEq, Hash, Clone)]
960pub enum SequenceAttributes {
961    Start { value: RangeValue },
962    Stop { value: RangeValue },
963    SequenceFfn { value: String },
964    SequenceFaa { value: String },
965    CodonStart { value: u8 },
966    Strand { value: i8 },
967}
968
969create_getters!(
970    SequenceAttributeBuilder,
971    seq_attributes,
972    SequenceAttributes,
973    Start { value: RangeValue },
974    Stop { value: RangeValue },
975    SequenceFfn { value: String},
976    SequenceFaa { value: String},
977    CodonStart { value: u8},
978    Strand { value: i8}
979);
980
981///builder for the sequence information on a per coding sequence (CDS) basis
982#[derive(Debug, Default, Clone)]
983pub struct SequenceAttributeBuilder {
984    pub seq_attributes: BTreeMap<String, HashSet<SequenceAttributes>>,
985    pub locus_tag: Option<String>,
986}
987
988create_builder!(
989    SequenceAttributeBuilder,
990    seq_attributes,
991    SequenceAttributes,
992    locus_tag,
993    Start { value: RangeValue },
994    Stop { value: RangeValue },
995    SequenceFfn { value: String},
996    SequenceFaa { value: String},
997    CodonStart { value: u8 },
998    Strand { value: i8 }
999);
1000
1001///product lines can contain difficult to parse punctuation such as biochemical symbols like unclosed single quotes, superscripts, single and double brackets etc.
1002///here we substitute these for an underscore
1003pub fn substitute_odd_punctuation(input: String) -> Result<String, anyhow::Error> {
1004    let re = Regex::new(r"[/?()',`]|[α-ωΑ-Ω]")?;
1005
1006	// Strip either \r\n or \n more elegantly
1007	let cleaned = input.trim_end_matches(&['\r', '\n'][..]);
1008
1009	Ok(re.replace_all(cleaned, "_").to_string())
1010}
1011
1012///GFF3 field9 construct
1013#[derive(Debug)]
1014pub struct GFFInner {
1015    pub id: String,
1016    pub name: String,
1017    pub locus_tag: String,
1018    pub gene: String,
1019   // Inference: String,
1020   // Parent: String,
1021 //   db_xref: String,
1022    pub product: String,
1023   // is_circular: bool,
1024}
1025
1026impl GFFInner {
1027    pub fn new(
1028      id: String,
1029      name: String,
1030      locus_tag: String,
1031      gene: String,
1032   //   Inference: String,
1033  //    Parent: String,
1034  //    db_xref: String,
1035      product: String,
1036    ) -> Self {
1037       GFFInner {
1038          id, name, locus_tag, gene, product,
1039          }
1040    }
1041}
1042
1043///The main GFF3 construct
1044#[derive(Debug)]
1045pub struct GFFOuter<'a> {
1046    pub seqid: String,
1047    pub source: String,
1048    pub type_val: String,
1049    pub start: u32,
1050    pub end: u32,
1051    pub score: f64,
1052    pub strand: String,
1053    pub phase: u8,
1054    pub attributes: &'a GFFInner,
1055}
1056
1057impl<'a> GFFOuter<'a> {
1058    pub fn new(
1059       seqid: String,
1060       source: String,
1061       type_val: String,
1062       start: u32,
1063       end: u32,
1064       score: f64,
1065       strand: String,
1066       phase: u8,
1067       attributes: &'a GFFInner
1068    ) -> Self {
1069       GFFOuter {
1070          seqid, source, type_val, start, end, score, strand, phase, attributes,
1071          }
1072    }
1073    pub fn field9_attributes_build(&self) -> String {
1074       let mut full_field9 = Vec::new();
1075       if !self.attributes.id.is_empty() {
1076          full_field9.push(format!("id={}",self.attributes.id));
1077	  }
1078       if !self.attributes.name.is_empty() {
1079          full_field9.push(format!("name={}", self.attributes.name));
1080	  }
1081       if !self.attributes.gene.is_empty() {
1082          full_field9.push(format!("gene={}",self.attributes.gene));
1083	  }
1084   //    if !self.attributes.Inference.is_empty() {
1085   //       full_field9.push(format!("inference={}",self.attributes.Inference));
1086//	  }
1087       if !self.attributes.locus_tag.is_empty() {
1088          full_field9.push(format!("locus_tag={}",self.attributes.locus_tag));
1089	  }
1090       if !self.attributes.product.is_empty() {
1091          full_field9.push(format!("product={}",self.attributes.product));
1092	  }
1093   //    if !self.attributes.Parent.is_empty() {
1094   //       full_field9.push(format!("Parent={}",self.attributes.Parent));
1095//	  }
1096//       if !self.attributes.db_xref.is_empty() {
1097//          full_field9.push(format!("db_xref={}",self.attributes.db_xref));
1098//	  }
1099       full_field9.join(";")
1100       }
1101}
1102
1103///formats the translation string which can be multiple lines, for gbk
1104pub fn format_translation(translation: &str) -> String {
1105	       //create method to add the protein sequence into the translation qualifier with correct line lengths
1106	       let mut formatted = String::new();
1107	       let cleaned_translation = translation.replace("\n", "");
1108	       formatted.push_str("                     /translation=\"");
1109	       let line_length: usize = 60;
1110	       let final_num = line_length - 15;
1111	       formatted.push_str(&format!("{}\n",&cleaned_translation[0..final_num]));
1112	       for i in (47..translation.len()).step_by(60) {
1113	             let end = i+60 -1;
1114		     let valid_end = if end >= translation.len() { &cleaned_translation.len() -1 } else { end };
1115                     formatted.push_str(&format!("                     {}",&cleaned_translation[i..valid_end]));
1116		     println!("cleaned translation leng is {:?}", &cleaned_translation[i..valid_end].len());
1117		     if *&cleaned_translation[i..valid_end].len() < 59 {
1118		        formatted.push('\"');
1119			}
1120		     else {
1121		        formatted.push('\n');
1122			}
1123		}
1124	       formatted
1125}
1126
1127///writes the DNA sequence in gbk format with numbering
1128pub fn write_gbk_format_sequence(sequence: &str,file: &mut File) -> io::Result<()> {
1129       //function to write gbk format sequence
1130       writeln!(file, "ORIGIN")?;
1131       let mut formatted = String::new();
1132       let cleaned_input = sequence.replace("\n", "");
1133       let mut index = 1;
1134       for (_i, chunk) in cleaned_input.as_bytes().chunks(60).enumerate() {
1135           formatted.push_str(&format!("{:>5} ", index));
1136	   for (j, sub_chunk) in chunk.chunks(10).enumerate() {
1137	      if j > 0 {
1138	          formatted.push(' ');
1139		  }
1140	      formatted.push_str(&String::from_utf8_lossy(sub_chunk));
1141	    }
1142	    formatted.push('\n');
1143	    index+=60;
1144       }
1145       writeln!(file, "{:>6}", &formatted)?;
1146       writeln!(file, "//")?;
1147   Ok(())
1148}
1149
1150///saves the parsed data in genbank format
1151//writes a genbank or multi-genbank file
1152pub fn gbk_write(seq_region: BTreeMap<String, (u32,u32)>, record_vec: Vec<Record>, filename: &str) -> io::Result<()> {
1153       let now = Local::now();
1154       let formatted_date = now.format("%d-%b-%Y").to_string().to_uppercase();
1155       let mut file = OpenOptions::new()
1156           .write(true)     // Allow writing to the file
1157           .append(true)    // Enable appending to the file
1158           .create(true)    // Create the file if it doesn't exist
1159           .open(filename)?;
1160       for (i, (key, _val)) in seq_region.iter().enumerate() {
1161	   let strain  = match &record_vec[i].source_map.get_strain(key) {
1162	          Some(value) => value.to_string(),
1163	          None => "Unknown".to_string(),
1164	          };
1165	   //write lines for the header
1166	   let organism  = match &record_vec[i].source_map.get_organism(key) {
1167	          Some(value) => value.to_string(),
1168	          None => "Unknown".to_string(),
1169	          };
1170           let mol_type  = match &record_vec[i].source_map.get_mol_type(key) {
1171	          Some(value) => value.to_string(),
1172	          None => "Unknown".to_string(),
1173	          };
1174	   let type_material  = match &record_vec[i].source_map.get_type_material(&key) {
1175	          Some(value) => value.to_string(),
1176	          None => "Unknown".to_string(),
1177                  };
1178	   let db_xref = match &record_vec[i].source_map.get_db_xref(key) {
1179	          Some(value) => value.to_string(),
1180	          None => "Unknown".to_string(),
1181	          };
1182	   let source_stop  = match &record_vec[i].source_map.get_stop(key) {
1183	          Some(value) => value.get_value(),
1184	          None => { println!("stop value not found");
1185	                   None }.expect("stop value not received")
1186	          };
1187           writeln!(file, "LOCUS       {}             {} bp    DNA     linear CON {}", &key,&record_vec[i].sequence.len(),&formatted_date)?;
1188	   writeln!(file, "DEFINITION  {} {}.", &organism, &strain)?;
1189	   writeln!(file, "ACCESSION   {}", &key)?;
1190	   writeln!(file, "KEYWORDS    .")?;
1191	   writeln!(file, "SOURCE      {} {}", &organism,&strain)?;
1192	   writeln!(file, "  ORGANISM  {} {}", &organism,&strain)?;
1193	   //write lines for the source
1194	   writeln!(file, "FEATURES             Location/Qualifiers")?;
1195	   writeln!(file, "     source          1..{}", &source_stop)?;
1196	   writeln!(file, "                     /organism=\"{}\"",&strain)?;
1197	   writeln!(file, "                     /mol_type=\"{}\"",&mol_type)?;
1198	   writeln!(file, "                     /strain=\"{}\"",&strain)?;
1199	   if type_material != *"Unknown".to_string() {
1200	       writeln!(file, "                     /type_material=\"{}\"",&type_material)?;
1201	       }
1202	   writeln!(file, "                     /db_xref=\"{}\"",&db_xref)?;
1203	   //write lines for each CDS
1204	   for (locus_tag, _value) in &record_vec[i].cds.attributes {
1205	      let start  = match &record_vec[i].cds.get_start(locus_tag) {
1206	          Some(value) => value.get_value(),
1207	          None => { println!("start value not found");
1208	                   None }.expect("start value not received")
1209	          };
1210	      let stop  = match &record_vec[i].cds.get_stop(locus_tag) {
1211	          Some(value) => value.get_value(),
1212	          None => { println!("stop value not found");
1213	                   None }.expect("stop value not received")
1214	          };
1215	      let product  = match &record_vec[i].cds.get_product(locus_tag) {
1216	          Some(value) => value.to_string(),
1217	          None => "unknown product".to_string(),
1218	          };
1219	       let strand = match &record_vec[i].cds.get_strand(locus_tag) {
1220	          Some(value) => **value,
1221		  None => 0,
1222		  };
1223	       let codon_start = match &record_vec[i].cds.get_codon_start(locus_tag) {
1224	          Some(value) => **value,
1225		  None => 0,
1226		  };
1227	       let gene  = match &record_vec[i].cds.get_gene(locus_tag) {
1228	                         Some(value) => value.to_string(),
1229	                         None => "unknown".to_string(),
1230	                         };
1231	       let translation = match &record_vec[i].seq_features.get_sequence_faa(locus_tag) {
1232	                         Some(value) => value.to_string(),
1233				 None => "unknown".to_string(),
1234				 };
1235	       if strand == 1 {
1236	           writeln!(file, "     gene            {}..{}",&start,&stop)?;
1237                   } else {
1238	           writeln!(file, "     gene            complement({}..{})",&start,&stop)?;
1239		   }
1240	       writeln!(file, "                     /locus_tag=\"{}\"",&locus_tag)?;
1241	       if strand == 1 {
1242	           writeln!(file, "     CDS             {}..{}",&start,&stop)?;
1243		   }
1244	       else {
1245	           writeln!(file, "     CDS             complement({}..{})",&start,&stop)?;
1246		   }
1247	       writeln!(file, "                     /locus_tag=\"{}\"",&locus_tag)?;
1248	       writeln!(file, "                     /codon_start=\"{}\"", &codon_start)?;
1249	       if gene != "unknown" {
1250	           writeln!(file, "                     /gene=\"{}\"", &gene)?;
1251		   }
1252	       if translation != "unknown" {
1253  	           let formatted_translation = format_translation(&translation);
1254		   writeln!(file, "{}", &formatted_translation)?;
1255		   }
1256	       writeln!(file, "                     /product=\"{}\"",&product)?;
1257	       }
1258	  write_gbk_format_sequence(&record_vec[i].sequence, &mut file)?;
1259	  }
1260	  Ok(())
1261}
1262
1263///saves the parsed data in gff3 format
1264//writes a gff3 file from a genbank
1265#[allow(unused_assignments)]
1266#[allow(unused_variables)]
1267pub fn gff_write(seq_region: BTreeMap<String, (u32, u32)>, mut record_vec: Vec<Record>, filename: &str, dna: bool) -> io::Result<()> {
1268       let mut file = OpenOptions::new()
1269           //.write(true)     // Allow writing to the file
1270           .append(true)    // Enable appending to the file
1271           .create(true)    // Create the file if it doesn't exist
1272           .open(filename)?;
1273       if file.metadata()?.len() == 0 {
1274           writeln!(file, "##gff-version 3")?;
1275	   }
1276       let mut full_seq = String::new();
1277       let mut prev_end: u32 = 0;
1278       //println!("this is the full seq_region {:?}", &seq_region);
1279       for (k, v) in seq_region.iter() {
1280          writeln!(file, "##sequence-region\t{}\t{}\t{}", &k, v.0, v.1)?;
1281	  }
1282       for ((source_name, (seq_start, seq_end)), record) in seq_region.iter().zip(record_vec.drain(..)) {
1283	  if dna == true {
1284	     full_seq.push_str(&record.sequence);
1285             }
1286           for (locus_tag, _valu) in &record.cds.attributes {
1287               let start  = match record.cds.get_start(locus_tag) {
1288	          Some(value) => value.get_value(),
1289	          None => { println!("start value not found");
1290	                   None }.expect("start value not received")
1291	          };
1292	      let stop  = match record.cds.get_stop(locus_tag) {
1293	          Some(value) => value.get_value(),
1294	          None => { println!("stop value not found");
1295	                   None }.expect("stop value not received")
1296	          };
1297	      let gene  = match record.cds.get_gene(locus_tag) {
1298	          Some(value) => value.to_string(),
1299	          None => "unknown".to_string(),
1300	          };
1301	      let product  = match record.cds.get_product(locus_tag) {
1302	          Some(value) => value.to_string(),
1303	          None => "unknown product".to_string(),
1304	          };
1305	      let strand  = match record.cds.get_strand(locus_tag) {
1306	          Some(valu) => {
1307	             match valu {
1308		        1 => "+".to_string(),
1309		        -1 => "-".to_string(),
1310		        _ => { println!("unexpected strand value {} for locus_tag {}", valu, locus_tag);
1311		            "unknownstrand".to_string() }
1312		     }
1313	          },
1314	          None => "unknownvalue".to_string(),
1315	       };
1316	       let phase = match record.cds.get_codon_start(locus_tag) {
1317	          Some(valuer) => {
1318	             match valuer {
1319		        1 => 0,
1320		        2 => 1,
1321		        3 => 2,
1322		        _ => { println!("unexpected phase value {} in the bagging area for locus_tag {}", valuer, locus_tag);
1323		            1 }
1324		     }
1325	          },
1326	          None => 1,
1327	       };
1328              let gff_inner = GFFInner::new(
1329                 locus_tag.to_string(),
1330	         source_name.clone(),
1331	         locus_tag.to_string(),
1332	         gene,
1333	    //  &record.cds.get_Inference(&locus_tag),
1334	    //  &record.cds.get_Parent(&locus_tag),
1335	   //   db_xref,
1336	         product,
1337	         );
1338              let gff_outer = GFFOuter::new(
1339                 source_name.clone(),
1340                 ".".to_string(),
1341                 "CDS".to_string(),
1342                 start + prev_end,
1343                 stop + prev_end,
1344                 0.0,
1345                 strand,
1346                 phase,
1347                 &gff_inner,
1348	         );
1349	      let field9_attributes = gff_outer.field9_attributes_build();
1350	      //println!("{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes);
1351              writeln!(file, "{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes)?;
1352          
1353	  }
1354	  prev_end = *seq_end;
1355	  }
1356          if dna {
1357             writeln!(file, "##FASTA")?;
1358	     //writeln!(file, ">{}\n",&filename.to_string())?;
1359             writeln!(file, "{}", full_seq)?;
1360	     }
1361          Ok(())
1362}
1363	       	       
1364///saves the parsed data in gff3 format
1365//writes a gff3 file from a genbank
1366#[allow(unused_assignments)]
1367pub fn orig_gff_write(seq_region: BTreeMap<String, (u32, u32)>, record_vec: Vec<Record>, filename: &str, dna: bool) -> io::Result<()> {
1368       let mut file = OpenOptions::new()
1369           //.write(true)     // Allow writing to the file
1370           .append(true)    // Enable appending to the file
1371           .create(true)    // Create the file if it doesn't exist
1372           .open(filename)?;
1373       if file.metadata()?.len() == 0 {
1374           writeln!(file, "##gff-version 3")?;
1375	   }
1376       let mut source_name = String::new();
1377       let mut full_seq = String::new();
1378       let mut prev_end: u32 = 0;
1379       //println!("this is the full seq_region {:?}", &seq_region);
1380       for (k, v) in seq_region.iter() {
1381          writeln!(file, "##sequence-region\t{}\t{}\t{}", &k, v.0, v.1)?;
1382	  }
1383       for (i, (key, val)) in seq_region.iter().enumerate() {
1384	  source_name = key.to_string();
1385	  if dna == true {
1386	     full_seq.push_str(&record_vec[i].sequence);
1387             }
1388           for (locus_tag, _valu) in &record_vec[i].cds.attributes {
1389               let start  = match record_vec[i].cds.get_start(locus_tag) {
1390	          Some(value) => value.get_value(),
1391	          None => { println!("start value not found");
1392	                   None }.expect("start value not received")
1393	          };
1394	      let stop  = match record_vec[i].cds.get_stop(locus_tag) {
1395	          Some(value) => value.get_value(),
1396	          None => { println!("stop value not found");
1397	                   None }.expect("stop value not received")
1398	          };
1399	      let gene  = match record_vec[i].cds.get_gene(locus_tag) {
1400	          Some(value) => value.to_string(),
1401	          None => "unknown".to_string(),
1402	          };
1403	      let product  = match record_vec[i].cds.get_product(locus_tag) {
1404	          Some(value) => value.to_string(),
1405	          None => "unknown product".to_string(),
1406	          };
1407	      let strand  = match record_vec[i].cds.get_strand(locus_tag) {
1408	          Some(valu) => {
1409	             match valu {
1410		        1 => "+".to_string(),
1411		        -1 => "-".to_string(),
1412		        _ => { println!("unexpected strand value {} for locus_tag {}", valu, locus_tag);
1413		            "unknownstrand".to_string() }
1414		     }
1415	          },
1416	          None => "unknownvalue".to_string(),
1417	       };
1418	       let phase = match record_vec[i].cds.get_codon_start(locus_tag) {
1419	          Some(valuer) => {
1420	             match valuer {
1421		        1 => 0,
1422		        2 => 1,
1423		        3 => 2,
1424		        _ => { println!("unexpected phase value {} in the bagging area for locus_tag {}", valuer, locus_tag);
1425		            1 }
1426		     }
1427	          },
1428	          None => 1,
1429	       };
1430              let gff_inner = GFFInner::new(
1431                 locus_tag.to_string(),
1432	         source_name.clone(),
1433	         locus_tag.to_string(),
1434	         gene,
1435	    //  &record.cds.get_Inference(&locus_tag),
1436	    //  &record.cds.get_Parent(&locus_tag),
1437	   //   db_xref,
1438	         product,
1439	         );
1440              let gff_outer = GFFOuter::new(
1441                 source_name.clone(),
1442                 ".".to_string(),
1443                 "CDS".to_string(),
1444                 start + prev_end,
1445                 stop + prev_end,
1446                 0.0,
1447                 strand,
1448                 phase,
1449                 &gff_inner,
1450	         );
1451	      let field9_attributes = gff_outer.field9_attributes_build();
1452	      //println!("{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes);
1453              writeln!(file, "{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes)?;
1454          
1455	  }
1456	  prev_end = val.1;
1457	  }
1458          if dna {
1459             writeln!(file, "##FASTA")?;
1460	     //writeln!(file, ">{}\n",&filename.to_string())?;
1461             writeln!(file, "{}", full_seq)?;
1462	     }
1463          Ok(())
1464}
1465
1466///internal record containing data from a single source or contig.  Has multiple features.
1467//sets up a record
1468#[derive(Debug, Clone)]
1469pub struct Record {
1470    pub id: String,
1471    pub length: u32,
1472    pub sequence: String,
1473    pub start: usize,
1474    pub end: usize,
1475    pub strand: i32,
1476    pub cds: FeatureAttributeBuilder,
1477    pub source_map: SourceAttributeBuilder,
1478    pub seq_features: SequenceAttributeBuilder,
1479}
1480  
1481impl Record {
1482    /// Create a new instance.
1483    pub fn new() -> Self {
1484        Record {
1485            id: "".to_owned(),
1486            length: 0,
1487            sequence: "".to_owned(),
1488	    start: 0,
1489	    end: 0,
1490	    strand: 0,
1491	    source_map: SourceAttributeBuilder::new(),
1492	    cds: FeatureAttributeBuilder::new(),
1493	    seq_features: SequenceAttributeBuilder::new(),
1494        }
1495    }
1496    pub fn is_empty(&mut self) -> bool {
1497        self.id.is_empty() && self.length == 0
1498    }
1499    pub fn check(&mut self) -> Result<(), &str> {
1500        if self.id().is_empty() {
1501            return Err("Expecting id for Gbk record.");
1502        }
1503        Ok(())
1504    }
1505    pub fn id(&mut self) -> &str {
1506        &self.id
1507    }
1508    pub fn length(&mut self) -> u32 {
1509        self.length
1510    }   
1511    pub fn sequence(&mut self) -> &str {
1512        &self.sequence
1513    }
1514    pub fn start(&mut self) -> u32 {
1515        self.start.try_into().unwrap()
1516    }
1517    pub fn end(&mut self) -> u32 {
1518        self.end.try_into().unwrap()
1519    }
1520    pub fn strand(&mut self) -> i32 {
1521        self.strand
1522    }
1523    pub fn cds(&mut self) -> FeatureAttributeBuilder {
1524        self.cds.clone()
1525    }
1526    pub fn source_map(&mut self) -> SourceAttributeBuilder {
1527        self.source_map.clone()
1528    }
1529    pub fn seq_features(&mut self) -> SequenceAttributeBuilder {
1530        self.seq_features.clone()
1531    }
1532    fn rec_clear(&mut self) {
1533        self.id.clear();
1534	self.length = 0;
1535	self.sequence.clear();
1536	self.start = 0;
1537	self.end = 0;
1538	self.strand = 0;
1539	self.source_map = SourceAttributeBuilder::new();
1540	self.cds = FeatureAttributeBuilder::new();
1541	self.seq_features = SequenceAttributeBuilder::new();
1542    }
1543}
1544
1545impl Default for Record {
1546     fn default() -> Self {
1547         Self::new()
1548     }
1549}
1550
1551#[allow(dead_code)]
1552pub struct Config {
1553    filename: String,
1554}
1555
1556impl Config {
1557    pub fn new(args: &[String]) -> Result<Config, &str> {
1558    if args.len() < 2 {
1559        panic!("not enough arguments, please provide filename");
1560    }
1561    let filename = args[1].clone();
1562
1563    Ok(Config { filename })
1564    }
1565}
1566
1567#[cfg(test)]
1568mod tests {
1569    use super::*;
1570    
1571    #[test]
1572    #[allow(unused_mut)]
1573    #[allow(unused_variables)]
1574    #[allow(dead_code)]
1575    #[allow(unused_assignments)]
1576    pub fn genbank_to_gff() -> io::Result<()> {
1577        let file_gbk = fs::File::open("test_output.gbk")?;
1578        let prev_start: u32 = 0;
1579        let mut prev_end: u32 = 0;
1580        let mut reader = Reader::new(file_gbk);
1581        let mut records = reader.records();
1582        let mut read_counter: u32 = 0;
1583        let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
1584        let mut record_vec: Vec<Record> = Vec::new();
1585        loop {  
1586            match records.next() {	
1587                Some(Ok(mut record)) => {
1588	           //println!("next record");
1589                   //println!("Record id: {:?}", record.id);
1590		   let sour = record.source_map.source_name.clone().expect("issue collecting source name");
1591		   let beginning = match record.source_map.get_start(&sour) {
1592		                        Some(value) => value.get_value(),
1593				        _ => 0,
1594					};
1595		   let ending = match record.source_map.get_stop(&sour) {
1596		                        Some(value) => value.get_value(),
1597					_ => 0,
1598					};
1599		   if ending + prev_end < beginning + prev_end {
1600		      println!("debug since the end value smaller is than the start {:?}", beginning);
1601		      }
1602		   seq_region.insert(sour, (beginning + prev_end, ending + prev_end));
1603		   record_vec.push(record);
1604                   // Add additional fields to print if needed
1605		   read_counter+=1;
1606		   prev_end+=ending;
1607                },
1608	        Some(Err(e)) => { println!("theres an err {:?}", e); },
1609	        None => {
1610	           println!("finished iteration");
1611	                 break; },
1612	        }
1613           }
1614        let output_file = format!("test_output.gff");
1615        gff_write(seq_region.clone(), record_vec, &output_file, true)?;
1616        println!("Total records processed: {}", read_counter);
1617        return Ok(());
1618    }
1619    #[test]
1620    #[allow(unused_mut)]
1621    #[allow(unused_variables)]
1622    #[allow(unused_assignments)]
1623    pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
1624            let file_gbk = fs::File::open("test_output.gbk")?;
1625            let mut reader = Reader::new(file_gbk);
1626            let mut records = reader.records();
1627            let mut read_counter: u32 = 0;
1628            loop {  
1629                match records.next() {	
1630                    Some(Ok(mut record)) => {
1631	               //println!("next record");
1632                       //println!("Record id: {:?}", record.id);
1633		       for (k, _v) in &record.cds.attributes {
1634		           match record.seq_features.get_sequence_faa(&k) {
1635		                     Some(value) => { let seq_faa = value.to_string();
1636				                      println!(">{}|{}\n{}", &record.id, &k, seq_faa);
1637						      },
1638				     _ => (),
1639				     };
1640		       
1641		           }
1642		       read_counter+=1;
1643                    },
1644	            Some(Err(e)) => { println!("theres an err {:?}", e); },
1645	            None => {
1646	               println!("finished iteration");
1647	                     break; },
1648	            }
1649               }
1650            println!("Total records processed: {}", read_counter);
1651            return Ok(());
1652	    }
1653     #[test]
1654     #[allow(unused_mut)]
1655     #[allow(unused_assignments)]
1656     #[allow(unused_variables)]
1657     pub fn genbank_to_ffn() -> Result<(), anyhow::Error> {
1658            let file_gbk = fs::File::open("test_output.gbk")?;
1659            let mut reader = Reader::new(file_gbk);
1660            let mut records = reader.records();
1661            let mut read_counter: u32 = 0;
1662            loop {  
1663                match records.next() {	
1664                    Some(Ok(mut record)) => {
1665	               //println!("next record");
1666                       //println!("Record id: {:?}", record.id);
1667		       for (k, v) in &record.cds.attributes {
1668		           match record.seq_features.get_sequence_ffn(&k) {
1669		                     Some(value) => { let seq_ffn = value.to_string();
1670				                      println!(">{}|{}\n{}", &record.id, &k, seq_ffn);
1671						      },
1672				     _ => (),
1673				     };
1674		       
1675		           }
1676		       read_counter+=1;
1677                    },
1678	            Some(Err(e)) => { println!("theres an err {:?}", e); },
1679	            None => {
1680	               println!("finished iteration");
1681	                     break; },
1682	            }
1683               }
1684            println!("Total records processed: {}", read_counter);
1685            return Ok(());
1686	    }
1687     #[test]
1688     /// Test to create a new record
1689     /// We require a source, features, sequence features and a sequence
1690     /// The source is top level, a single genbank file has one source, multi-genbank has one per contig
1691     /// The SourceAttributes construct has a name (counter), start, stop, organism, moltype, strain, type material and db_xref
1692     /// The FeatureAttributes construct has a locus tag (counter), gene, product, start, stop, codon start, strand
1693     /// SourceAttribute start and stop are the coordinates of the source feature or per contig, FeatureAttributes start and stop are per coding sequence (CDS)
1694     /// The SequenceAttributes construct has a locus tag (counter), start, stop, sequence_ffn, sequence_faa, codon start, and strand
1695     /// SequenceAttribute start and stop, codon start and strand are duplicates of those in the FeatureAttributes
1696     /// To add an entry requires using the set_ values such as set_start, set_stop, set_counter, set_strand
1697     /// To write in GFF format requires gff_write(seq_region, record_vec, filename and true/false
1698     /// The seq_region is the region of interest with name and DNA coordinates such as ``` "source_1".to_string(), (1,897) ```
1699     /// record_vec is a list of the records.  If there is only one record ``` vec![record] ``` will suffice
1700     /// filename is the required filename string, true/false is whether the DNA sequence should be included in the GFF3 file
1701     /// Some GFF3 files have the DNA sequence, whilst others do not.  Some tools require the DNA sequence included.
1702     #[allow(unused_mut)]
1703     #[allow(unused_assignments)]
1704     #[allow(unused_variables)]
1705     pub fn create_new_record() -> Result<(), anyhow::Error> {
1706            let filename = format!("new_record.gff");
1707	    let mut record = Record::new();
1708	    let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
1709	    seq_region.insert("source_1".to_string(), (1,910));
1710            record.source_map
1711	         .set_counter("source_1".to_string())
1712	         .set_start(RangeValue::Exact(1))
1713	         .set_stop(RangeValue::Exact(910))
1714	         .set_organism("Escherichia coli".to_string())
1715	         .set_mol_type("DNA".to_string())
1716	         .set_strain("K-12 substr. MG1655".to_string())
1717	         // culture_collection.clone()
1718		 .set_type_material("type strain of Escherichia coli K12".to_string())
1719	         .set_db_xref("PRJNA57779".to_string());
1720	    record.cds
1721                  .set_counter("b3304".to_string())
1722                  .set_start(RangeValue::Exact(1))
1723                  .set_stop(RangeValue::Exact(354))
1724                  .set_gene("rplR".to_string())
1725                  .set_product("50S ribosomal subunit protein L18".to_string())
1726                  .set_codon_start(1)
1727                  .set_strand(-1);
1728	   record.cds
1729                  .set_counter("b3305".to_string())
1730                  .set_start(RangeValue::Exact(364))
1731                  .set_stop(RangeValue::Exact(897))
1732                  .set_gene("rplF".to_string())
1733                  .set_product("50S ribosomal subunit protein L6".to_string())
1734                  .set_codon_start(1)
1735                  .set_strand(-1);
1736	   record.seq_features
1737	         .set_counter("b3304".to_string())
1738		 .set_start(RangeValue::Exact(1))
1739                 .set_stop(RangeValue::Exact(354))
1740                 .set_sequence_ffn("ATGGATAAGAAATCTGCTCGTATCCGTCGTGCGACCCGCGCACGCCGCAAGCTCCAGGAG
1741CTGGGCGCAACTCGCCTGGTGGTACATCGTACCCCGCGTCACATTTACGCACAGGTAATT
1742GCACCGAACGGTTCTGAAGTTCTGGTAGCTGCTTCTACTGTAGAAAAAGCTATCGCTGAA
1743CAACTGAAGTACACCGGTAACAAAGACGCGGCTGCAGCTGTGGGTAAAGCTGTCGCTGAA
1744CGCGCTCTGGAAAAAGGCATCAAAGATGTATCCTTTGACCGTTCCGGGTTCCAATATCAT
1745GGTCGTGTCCAGGCACTGGCAGATGCTGCCCGTGAAGCTGGCCTTCAGTTCTAA".to_string())
1746                 .set_sequence_faa("MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAE
1747QLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF".to_string())
1748                 .set_codon_start(1)
1749                 .set_strand(-1);
1750	    record.seq_features
1751	         .set_counter("b3305".to_string())
1752		 .set_start(RangeValue::Exact(364))
1753                 .set_stop(RangeValue::Exact(897))
1754                 .set_sequence_ffn("ATGTCTCGTGTTGCTAAAGCACCGGTCGTTGTTCCTGCCGGCGTTGACGTAAAAATCAAC
1755GGTCAGGTTATTACGATCAAAGGTAAAAACGGCGAGCTGACTCGTACTCTCAACGATGCT
1756GTTGAAGTTAAACATGCAGATAATACCCTGACCTTCGGTCCGCGTGATGGTTACGCAGAC
1757GGTTGGGCACAGGCTGGTACCGCGCGTGCCCTGCTGAACTCAATGGTTATCGGTGTTACC
1758GAAGGCTTCACTAAGAAGCTGCAGCTGGTTGGTGTAGGTTACCGTGCAGCGGTTAAAGGC
1759AATGTGATTAACCTGTCTCTGGGTTTCTCTCATCCTGTTGACCATCAGCTGCCTGCGGGT
1760ATCACTGCTGAATGTCCGACTCAGACTGAAATCGTGCTGAAAGGCGCTGATAAGCAGGTG
1761ATCGGCCAGGTTGCAGCGGATCTGCGCGCCTACCGTCGTCCTGAGCCTTATAAAGGCAAG
1762GGTGTTCGTTACGCCGACGAAGTCGTGCGTACCAAAGAGGCTAAGAAGAAGTAA".to_string())
1763                 .set_sequence_faa("MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHADNTLTFGPRDGYAD
1764GWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSLGFSHPVDHQLPAG
1765ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK".to_string())
1766                 .set_codon_start(1)
1767                 .set_strand(-1);
1768	    record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA
1769TTGGAACCCGGAACGGTCAAAGGATACATCTTTGATGCCTTTTTCCAGAGCGCGTTCAGC
1770GACAGCTTTACCCACAGCTGCAGCCGCGTCTTTGTTACCGGTGTACTTCAGTTGTTCAGC
1771GATAGCTTTTTCTACAGTAGAAGCAGCTACCAGAACTTCAGAACCGTTCGGTGCAATTAC
1772CTGTGCGTAAATGTGACGCGGGGTACGATGTACCACCAGGCGAGTTGCGCCCAGCTCCTG
1773GAGCTTGCGGCGTGCGCGGGTCGCACGACGGATACGAGCAGATTTCTTATCCATAGTGTT
1774ACCTTACTTCTTCTTAGCCTCTTTGGTACGCACGACTTCGTCGGCGTAACGAACACCCTT
1775GCCTTTATAAGGCTCAGGACGACGGTAGGCGCGCAGATCCGCTGCAACCTGGCCGATCAC
1776CTGCTTATCAGCGCCTTTCAGCACGATTTCAGTCTGAGTCGGACATTCAGCAGTGATACC
1777CGCAGGCAGCTGATGGTCAACAGGATGAGAGAAACCCAGAGACAGGTTAATCACATTGCC
1778TTTAACCGCTGCACGGTAACCTACACCAACCAGCTGCAGCTTCTTAGTGAAGCCTTCGGT
1779AACACCGATAACCATTGAGTTCAGCAGGGCACGCGCGGTACCAGCCTGTGCCCAACCGTC
1780TGCGTAACCATCACGCGGACCGAAGGTCAGGGTATTATCTGCATGTTTAACTTCAACAGC
1781ATCGTTGAGAGTACGAGTCAGCTCGCCGTTTTTACCTTTGATCGTAATAACCTGACCGTT
1782GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACA".to_string();
1783           gff_write(seq_region.clone(), vec![record.clone()], "new_output.gff", true)?;
1784	   gbk_write(seq_region, vec![record], "new_output.gbk")?;
1785	   return Ok(());
1786      }
1787}