1use std::io::{self, Write};
275use std::fs;
276use regex::Regex;
277use itertools::Itertools;
278use std::vec::Vec;
279use std::str;
280use std::convert::AsRef;
281use protein_translate::translate;
282use std::path::Path;
283use bio::alphabets::dna::revcomp;
284use anyhow::anyhow;
285use std::collections::BTreeMap;
286use std::fs::{OpenOptions, File};
287use anyhow::Context;
288use std::collections::HashSet;
289use paste::paste;
290use std::convert::TryInto;
291use chrono::prelude::*;
292
293
294#[macro_export]
296macro_rules! create_getters {
297 ($struct_name:ident, $attributes:ident, $enum_name:ident, $( $field:ident { value: $type:ty } ),* ) => {
299 impl $struct_name {
300 $(
301 paste! {
303 pub fn [<get_$field:snake>](&self, key: &str) -> Option<&$type> {
304 self.$attributes.get(key).and_then(|set| {
306 set.iter().find_map(|attr| {
308 if let $enum_name::$field { value } = attr {
309 Some(value)
310 } else {
311 None
312 }
313 })
314 })
315 }
316 }
317 )*
318 }
319 };
320}
321
322#[macro_export]
324macro_rules! create_builder {
325 ($builder_name:ident, $attributes:ident, $enum_name:ident, $counter_name:ident, $( $field:ident { value: $type:ty } ),* ) => {
327 impl $builder_name {
328 pub fn new() -> Self {
329 $builder_name {
330 $attributes: BTreeMap::new(),
331 $counter_name: None,
332 }
333 }
334 pub fn set_counter(&mut self, counter: String) -> &mut Self {
336 self.$counter_name = Some(counter);
337 self
338 }
339 pub fn insert_to(&mut self, value: $enum_name) {
341 if let Some(counter) = &self.$counter_name {
342 self.$attributes
343 .entry(counter.to_string())
344 .or_insert_with(HashSet::new)
345 .insert(value);
346 }
347 else {
348 panic!("Counter key not set"); }
350 }
351 $(
353 paste! {
354 pub fn [<set_$field:snake>](&mut self, value: $type) -> &mut Self {
355 self.insert_to($enum_name::$field { value });
356 self
357 }
358 }
359 )*
360 pub fn build(self) -> BTreeMap<String, HashSet<$enum_name>> {
362 self.$attributes
363 }
364 pub fn iter_sorted(&self) -> std::collections::btree_map::Iter<String, HashSet<$enum_name>> {
366 self.$attributes.iter()
367 }
368 pub fn default() -> Self {
370 $builder_name {
371 $attributes: BTreeMap::new(),
372 $counter_name: None,
373 }
374 }
375 }
376 };
377}
378
379#[macro_export]
380macro_rules! genbank {
381 ($filename:expr) => {{
382 use std::fs::File;
383 use std::io::BufReader;
384 let file = File::open($filename)
385 .unwrap_or_else(|e| panic!("Could not open file {}: {}", $filename, e));
386 let mut reader = $crate::gbk::Reader::new(file);
387 let mut vec = Vec::new();
388 for rec in reader.records() {
389 match rec {
390 Ok(r) => { println!("this is r {:?}", &r);
391 vec.push(r);
392 }
393 Err(e) => panic!("Error reading record: {:?}", e),
394 }
395 }
396 vec
397 }};
398}
399
400
401#[derive(Debug)]
405#[allow(unused_mut)]
406pub struct Records<B>
407where
408 B: io::BufRead,
409{
410 reader: Reader<B>,
411 error_has_occurred: bool,
412}
413
414impl<B> Records<B>
415where
416 B: io::BufRead,
417{
418 #[allow(unused_mut)]
419 pub fn new(mut reader: Reader<B>) -> Self {
420 Records {
421 reader: reader,
422 error_has_occurred: false,
423 }
424 }
425}
426
427impl<B> Iterator for Records<B>
428where
429 B: io::BufRead,
430{
431 type Item = Result<Record, anyhow::Error>;
432
433 fn next(&mut self) -> Option<Result<Record, anyhow::Error>> {
434 if self.error_has_occurred {
435 println!("error was encountered in iteration");
436 None
437 } else {
438 let mut record = Record::new();
439 match self.reader.read(&mut record) {
440 Ok(_) => { if record.is_empty() {
441 None }
442 else {
443 Some(Ok(record))
444 }
445 }
446 Err(err) => {
447 self.error_has_occurred = true;
449 Some(Err(anyhow!("next record read error {:?}",err)))
450 }
451 }
452 }
453 }
454}
455
456pub trait GbkRead {
457 fn read(&mut self, record: &mut Record) -> Result<Record, anyhow::Error>;
458}
459
460#[derive(Debug, Default)]
462pub struct Reader<B> {
463 reader: B,
464 line_buffer: String,
465}
466
467impl Reader<io::BufReader<fs::File>> {
468 pub fn from_file<P: AsRef<Path> + std::fmt::Debug>(path: P) -> anyhow::Result<Self> {
470 fs::File::open(&path)
471 .map(Reader::new)
472 .with_context(|| format!("Failed to read Gbk from {:#?}", path))
473 }
474}
475
476impl<R> Reader<io::BufReader<R>>
477where
478 R: io::Read,
479{
480 pub fn new(reader: R) -> Self {
482 Reader {
483 reader: io::BufReader::new(reader),
484 line_buffer: String::new(),
485 }
486 }
487}
488
489impl<B> Reader<B>
490where
491 B: io::BufRead,
492{
493 pub fn from_bufread(bufreader: B) -> Self {
494 Reader {
495 reader: bufreader,
496 line_buffer: String::new(),
497 }
498 }
499 pub fn records(self) -> Records<B> {
501 Records {
502 reader: self,
503 error_has_occurred: false,
504 }
505 }
506}
507
508impl<'a, B> GbkRead for Reader<B>
510where
511 B: io::BufRead,
512{
513 #[allow(unused_mut)]
514 #[allow(unused_variables)]
515 #[allow(unused_assignments)]
516 fn read(&mut self, record: &mut Record) -> Result<Record, anyhow::Error> {
517 record.rec_clear();
518 let mut sequences = String::new();
521 let mut source_map = SourceAttributeBuilder::new();
522 let mut cds = FeatureAttributeBuilder::new();
523 let mut seq_features = SequenceAttributeBuilder::new();
524 let mut cds_counter: i32 = 0;
525 let mut source_counter: i32 = 0;
526 let mut prev_end: u32 = 0;
527 let mut organism = String::new();
528 let mut mol_type = String::new();
529 let mut strain = String::new();
530 let mut source_name = String::new();
531 let mut type_material = String::new();
532 let mut theend: u32 = 0;
533 let mut thestart: u32 = 0;
534 let mut db_xref = String::new();
535 if self.line_buffer.is_empty() {
537 self.reader.read_line(&mut self.line_buffer)?;
538 if self.line_buffer.is_empty() {
539 return Ok(record.to_owned());
540 }
541 }
542 'outer: while !self.line_buffer.is_empty() {
544 if self.line_buffer.starts_with("LOCUS") {
547 record.rec_clear();
548 let mut header_fields: Vec<&str> = self.line_buffer.split_whitespace().collect();
549 let mut header_iter = header_fields.iter();
550 header_iter.next();
551 record.id = header_iter.next()
552 .ok_or_else(|| anyhow::anyhow!("missing record id"))? .to_string();
554 let lens = header_iter.next()
555 .ok_or_else(|| anyhow::anyhow!("missing record length"))? .to_string();
557 record.length = lens.trim().parse::<u32>()?;
558 self.line_buffer.clear();
559 }
560 if self.line_buffer.starts_with(" source") {
562 let re = Regex::new(r"([0-9]+)[[:punct:]]+([0-9]+)")?;
563 let location = re.captures(&self.line_buffer).ok_or_else(|| anyhow::anyhow!("missing location"))?;
564 let start = &location[1];
565 let end = &location[2];
566 thestart = start.trim().parse::<u32>()?;
567 source_counter+=1;
568 source_name = format!("source_{}_{}",record.id,source_counter).to_string();
569 thestart += prev_end;
570 theend = end.trim().parse::<u32>()? + prev_end;
571 loop {
573 self.line_buffer.clear();
574 self.reader.read_line(&mut self.line_buffer)?;
575 if self.line_buffer.starts_with(" CDS") {
576 record.source_map
578 .set_counter(source_name.to_string())
579 .set_start(RangeValue::Exact(thestart))
580 .set_stop(RangeValue::Exact(theend))
581 .set_organism(organism.clone())
582 .set_mol_type(mol_type.clone())
583 .set_strain(strain.clone())
584 .set_type_material(type_material.clone())
586 .set_db_xref(db_xref.clone());
587 continue 'outer;
588 }
589 if self.line_buffer.contains("/organism") {
590 let org: Vec<&str> = self.line_buffer.split('\"').collect();
591 organism = org[1].to_string();
592 }
593 if self.line_buffer.contains("/mol_type") {
594 let mol: Vec<&str> = self.line_buffer.split('\"').collect();
595 mol_type = mol[1].to_string();
596 }
597 if self.line_buffer.contains("/strain") {
598 let stra: Vec<&str> = self.line_buffer.split('\"').collect();
599 strain = stra[1].to_string();
600 }
601 if self.line_buffer.contains("/type_material") {
606 let mat: Vec<&str> = self.line_buffer.split('\"').collect();
607 type_material = mat[1].to_string();
608 }
609 if self.line_buffer.contains("/db_xref") {
610 let db: Vec<&str> = self.line_buffer.split('\"').collect();
611 db_xref = db[1].to_string();
612 }
613 }
614 }
615 if self.line_buffer.starts_with(" CDS") {
617 let mut startiter: Vec<_> = Vec::new();
618 let mut enditer: Vec<_> = Vec::new();
619 let mut thestart: u32 = 0;
620 let mut thend: u32 = 0;
621 let mut joined: bool = false;
622 let joined = if self.line_buffer.contains("join") { true } else { false };
624 let re = Regex::new(r"([0-9]+)[[:punct:]]+([0-9]+)")?;
625 for cap in re.captures_iter(&self.line_buffer) {
627 cds_counter+=1;
628 thestart = cap[1].parse().expect("failed to match and parse numerical start");
629 theend = cap[2].parse().expect("failed to match and parse numerical end");
630 startiter.push(thestart);
631 enditer.push(theend);
632 }
633 let mut gene = String::new();
634 let mut product = String::new();
635 let strand: i8 = if self.line_buffer.contains("complement") {-1} else {1};
636 let mut locus_tag = String::new();
637 let mut codon_start: u8 = 1;
638 loop {
640 self.line_buffer.clear();
641 self.reader.read_line(&mut self.line_buffer)?;
642 if self.line_buffer.contains("/locus_tag=") {
643 let loctag: Vec<&str> = self.line_buffer.split('\"').collect();
644 locus_tag = loctag[1].to_string();
645 }
647 if self.line_buffer.contains("/codon_start") {
648 let codstart: Vec<&str> = self.line_buffer.split('=').collect();
649 let valstart = codstart[1].trim().parse::<u8>()?;
650 codon_start = valstart;
651 }
653 if self.line_buffer.contains("/gene=") {
654 let gen: Vec<&str> = self.line_buffer.split('\"').collect();
655 gene = gen[1].to_string();
656 }
658 if self.line_buffer.contains("/product") {
659 let prod: Vec<&str> = self.line_buffer.split('\"').collect();
660 product = substitute_odd_punctuation(prod[1].to_string())?;
661 }
663 if self.line_buffer.starts_with(" CDS") || self.line_buffer.starts_with("ORIGIN") || self.line_buffer.starts_with(" gene") || self.line_buffer.starts_with(" misc_feature") {
664 if locus_tag.is_empty() {
665 locus_tag = format!("CDS_{}",cds_counter).to_string();
666 }
667 if joined {
668 for (i, m) in startiter.iter().enumerate() {
670 let loc_tag = format!("{}_{}",locus_tag.clone(),i);
671 record.cds
673 .set_counter(loc_tag)
674 .set_start(RangeValue::Exact(*m))
675 .set_stop(RangeValue::Exact(enditer[i]))
676 .set_gene(gene.to_string())
677 .set_product(product.to_string())
678 .set_codon_start(codon_start)
679 .set_strand(strand);
680 }
681 continue 'outer;
682 }
683 else {
684 record.cds
685 .set_counter(locus_tag.clone())
686 .set_start(RangeValue::Exact(thestart))
687 .set_stop(RangeValue::Exact(theend))
688 .set_gene(gene.to_string())
689 .set_product(product.to_string())
690 .set_codon_start(codon_start)
691 .set_strand(strand);
692 continue 'outer;
693 }
694 }
695 } }
696 if self.line_buffer.starts_with("ORIGIN") {
698 let mut sequences = String::new();
699 let result_seq = loop {
700 self.line_buffer.clear();
701 self.reader.read_line(&mut self.line_buffer)?;
702 if self.line_buffer.starts_with("//") {
703 break sequences;
704 } else {
705 let s: Vec<&str> = self.line_buffer.split_whitespace().collect();
706 let s = &s[1..];
707 let sequence = s.iter().join("");
708 sequences.push_str(&sequence);
709 }
710 };
711 record.sequence = result_seq.to_string();
712 let mut iterablecount: u32 = 0;
713 for (key,val) in record.cds.iter_sorted() {
715 let (mut a, mut b, mut c, mut d): (Option<u32>, Option<u32>, Option<i8>, Option<u8>) = (None, None, None, None);
716 for value in val {
717 match value {
719 FeatureAttributes::Start { value } => a = match value {
720 RangeValue::Exact(v) => Some(*v),
721 RangeValue::LessThan(v) => Some(*v), RangeValue::GreaterThan(v) => Some(*v), },
724 FeatureAttributes::Stop { value } => b = match value {
725 RangeValue::Exact(v) => Some(*v),
726 RangeValue::LessThan(v) => Some(*v), RangeValue::GreaterThan(v) => Some(*v), },
729 FeatureAttributes::Strand { value } => c = match value {
730 value => Some(*value),
731 },
732 FeatureAttributes::CodonStart { value } => d = match value {
733 value => Some(value.clone()),
734 },
735 _ => (),
736 }
737 }
738 let sta = a.map(|o| o as usize)
739 .ok_or(anyhow!("No value for start"))?;
740 let sto = b.map(|t| t as usize)
741 .ok_or(anyhow!("No value for stop"))? - 1;
742 let stra = c.map(|u| u as i8)
743 .ok_or(anyhow!("No value for strand"))?;
744 let cod = d.map(|v| v as usize - 1)
745 .ok_or(anyhow!("No value for strand"))?;
746
747 let star = sta.try_into()?;
748 let stow = sto.try_into()?;
749 let codd = cod.try_into()?;
750 let mut sliced_sequence: &str = "";
751 if stra == -1 {
753 if cod > 1 {
754 if sto + 1 <= record.sequence.len() {
756 sliced_sequence = &record.sequence[sta+cod..sto+1];
757 }
758 else {
759 sliced_sequence = &record.sequence[sta+cod..sto];
760 }
761 }
762 else {
763 if sto + 1 <= record.sequence.len() {
767 sliced_sequence = &record.sequence[sta..sto+1];
768 }
769 else {
770 sliced_sequence = &record.sequence[sta..sto];
771 }
772 }
774 let cds_char = sliced_sequence;
775 let prot_seq = translate(&revcomp(cds_char.as_bytes()));
776 let parts: Vec<&str> = prot_seq.split('*').collect();
777 record.seq_features
778 .set_counter(key.to_string())
779 .set_start(RangeValue::Exact(star))
780 .set_stop(RangeValue::Exact(stow))
781 .set_sequence_ffn(cds_char.to_string())
782 .set_sequence_faa(parts[0].to_string())
783 .set_codon_start(codd)
784 .set_strand(stra);
785 } else {
786 if cod > 1 {
787 sliced_sequence = &record.sequence[sta+cod-1..sto];
789 }
790 else {
791 sliced_sequence = &record.sequence[sta-1..sto];
793 }
794 let cds_char = sliced_sequence;
795 let prot_seq = translate(cds_char.as_bytes());
796 let parts: Vec<&str> = prot_seq.split('*').collect();
797 record.seq_features
798 .set_counter(key.to_string())
799 .set_start(RangeValue::Exact(star))
800 .set_stop(RangeValue::Exact(stow))
801 .set_sequence_ffn(cds_char.to_string())
802 .set_sequence_faa(parts[0].to_string())
803 .set_codon_start(codd)
804 .set_strand(stra);
805 }
806 }
807 return Ok(record.to_owned());
809 }
810 self.line_buffer.clear();
812 self.reader.read_line(&mut self.line_buffer)?;
813 }
814 Ok(record.to_owned())
815 }
816}
817
818#[derive(Debug, Hash, PartialEq, Eq, Clone)]
820pub enum RangeValue {
821 Exact(u32),
822 LessThan(u32),
823 GreaterThan(u32),
824}
825
826impl RangeValue {
828 pub fn get_value(&self) -> u32 {
829 match self {
830 RangeValue::Exact(value) => *value,
831 RangeValue::LessThan(value) => *value,
832 RangeValue::GreaterThan(value) => *value,
833 }
834 }
835}
836
837#[derive(Debug, Eq, PartialEq, Hash, Clone)]
839pub enum SourceAttributes {
840 Start { value: RangeValue },
841 Stop { value: RangeValue },
842 Organism { value: String },
843 MolType { value: String},
844 Strain { value: String},
845 CultureCollection { value: String},
846 TypeMaterial { value: String},
847 DbXref { value:String}
848}
849
850create_getters!(
852 SourceAttributeBuilder,
853 source_attributes,
854 SourceAttributes,
855 Start { value: RangeValue },
856 Stop { value: RangeValue },
857 Organism { value: String },
858 MolType { value: String},
859 Strain { value: String},
860 TypeMaterial { value: String},
862 DbXref { value:String}
863);
864
865#[derive(Debug, Default, Clone)]
867pub struct SourceAttributeBuilder {
868 pub source_attributes: BTreeMap<String, HashSet<SourceAttributes>>,
869 pub source_name: Option<String>,
870}
871
872impl SourceAttributeBuilder {
873 pub fn set_source_name(&mut self, name: String) {
875 self.source_name = Some(name);
876 }
877
878 pub fn get_source_name(&self) -> Option<&String> {
880 self.source_name.as_ref()
881 }
882
883 pub fn add_source_attribute(&mut self, key: String, attribute: SourceAttributes) {
885 self.source_attributes
886 .entry(key)
887 .or_insert_with(HashSet::new)
888 .insert(attribute);
889 }
890
891 pub fn get_source_attributes(&self, key: &str) -> Option<&HashSet<SourceAttributes>> {
893 self.source_attributes.get(key)
894 }
895}
896
897
898create_builder!(
899 SourceAttributeBuilder,
900 source_attributes,
901 SourceAttributes,
902 source_name,
903 Start { value: RangeValue },
904 Stop { value: RangeValue },
905 Organism { value: String },
906 MolType { value: String},
907 Strain { value: String},
908 TypeMaterial { value: String},
910 DbXref { value:String}
911);
912
913#[derive(Debug, Eq, Hash, PartialEq, Clone)]
915pub enum FeatureAttributes {
916 Start { value: RangeValue },
917 Stop { value: RangeValue },
918 Gene { value: String },
919 Product { value: String },
920 CodonStart { value: u8 },
921 Strand { value: i8 },
922 }
924
925
926create_getters!(
927 FeatureAttributeBuilder,
928 attributes,
929 FeatureAttributes,
930 Start { value: RangeValue },
931 Stop { value: RangeValue },
932 Gene { value: String },
933 Product { value: String },
934 CodonStart { value: u8 },
935 Strand { value: i8 }
936);
937
938#[derive(Debug, Default, Clone)]
940pub struct FeatureAttributeBuilder {
941 pub attributes: BTreeMap<String, HashSet<FeatureAttributes>>,
942 locus_tag: Option<String>,
943}
944
945create_builder!(
946 FeatureAttributeBuilder,
947 attributes,
948 FeatureAttributes,
949 locus_tag,
950 Start { value: RangeValue },
951 Stop { value: RangeValue },
952 Gene { value: String },
953 Product { value: String },
954 CodonStart { value: u8 },
955 Strand { value: i8 }
956);
957
958#[derive(Debug, Eq, PartialEq, Hash, Clone)]
960pub enum SequenceAttributes {
961 Start { value: RangeValue },
962 Stop { value: RangeValue },
963 SequenceFfn { value: String },
964 SequenceFaa { value: String },
965 CodonStart { value: u8 },
966 Strand { value: i8 },
967}
968
969create_getters!(
970 SequenceAttributeBuilder,
971 seq_attributes,
972 SequenceAttributes,
973 Start { value: RangeValue },
974 Stop { value: RangeValue },
975 SequenceFfn { value: String},
976 SequenceFaa { value: String},
977 CodonStart { value: u8},
978 Strand { value: i8}
979);
980
981#[derive(Debug, Default, Clone)]
983pub struct SequenceAttributeBuilder {
984 pub seq_attributes: BTreeMap<String, HashSet<SequenceAttributes>>,
985 pub locus_tag: Option<String>,
986}
987
988create_builder!(
989 SequenceAttributeBuilder,
990 seq_attributes,
991 SequenceAttributes,
992 locus_tag,
993 Start { value: RangeValue },
994 Stop { value: RangeValue },
995 SequenceFfn { value: String},
996 SequenceFaa { value: String},
997 CodonStart { value: u8 },
998 Strand { value: i8 }
999);
1000
1001pub fn substitute_odd_punctuation(input: String) -> Result<String, anyhow::Error> {
1004 let re = Regex::new(r"[/?()',`]|[α-ωΑ-Ω]")?;
1005
1006 let cleaned = input.trim_end_matches(&['\r', '\n'][..]);
1008
1009 Ok(re.replace_all(cleaned, "_").to_string())
1010}
1011
1012#[derive(Debug)]
1014pub struct GFFInner {
1015 pub id: String,
1016 pub name: String,
1017 pub locus_tag: String,
1018 pub gene: String,
1019 pub product: String,
1023 }
1025
1026impl GFFInner {
1027 pub fn new(
1028 id: String,
1029 name: String,
1030 locus_tag: String,
1031 gene: String,
1032 product: String,
1036 ) -> Self {
1037 GFFInner {
1038 id, name, locus_tag, gene, product,
1039 }
1040 }
1041}
1042
1043#[derive(Debug)]
1045pub struct GFFOuter<'a> {
1046 pub seqid: String,
1047 pub source: String,
1048 pub type_val: String,
1049 pub start: u32,
1050 pub end: u32,
1051 pub score: f64,
1052 pub strand: String,
1053 pub phase: u8,
1054 pub attributes: &'a GFFInner,
1055}
1056
1057impl<'a> GFFOuter<'a> {
1058 pub fn new(
1059 seqid: String,
1060 source: String,
1061 type_val: String,
1062 start: u32,
1063 end: u32,
1064 score: f64,
1065 strand: String,
1066 phase: u8,
1067 attributes: &'a GFFInner
1068 ) -> Self {
1069 GFFOuter {
1070 seqid, source, type_val, start, end, score, strand, phase, attributes,
1071 }
1072 }
1073 pub fn field9_attributes_build(&self) -> String {
1074 let mut full_field9 = Vec::new();
1075 if !self.attributes.id.is_empty() {
1076 full_field9.push(format!("id={}",self.attributes.id));
1077 }
1078 if !self.attributes.name.is_empty() {
1079 full_field9.push(format!("name={}", self.attributes.name));
1080 }
1081 if !self.attributes.gene.is_empty() {
1082 full_field9.push(format!("gene={}",self.attributes.gene));
1083 }
1084 if !self.attributes.locus_tag.is_empty() {
1088 full_field9.push(format!("locus_tag={}",self.attributes.locus_tag));
1089 }
1090 if !self.attributes.product.is_empty() {
1091 full_field9.push(format!("product={}",self.attributes.product));
1092 }
1093 full_field9.join(";")
1100 }
1101}
1102
1103pub fn format_translation(translation: &str) -> String {
1105 let mut formatted = String::new();
1107 let cleaned_translation = translation.replace("\n", "");
1108 formatted.push_str(" /translation=\"");
1109 let line_length: usize = 60;
1110 let final_num = line_length - 15;
1111 formatted.push_str(&format!("{}\n",&cleaned_translation[0..final_num]));
1112 for i in (47..translation.len()).step_by(60) {
1113 let end = i+60 -1;
1114 let valid_end = if end >= translation.len() { &cleaned_translation.len() -1 } else { end };
1115 formatted.push_str(&format!(" {}",&cleaned_translation[i..valid_end]));
1116 println!("cleaned translation leng is {:?}", &cleaned_translation[i..valid_end].len());
1117 if *&cleaned_translation[i..valid_end].len() < 59 {
1118 formatted.push('\"');
1119 }
1120 else {
1121 formatted.push('\n');
1122 }
1123 }
1124 formatted
1125}
1126
1127pub fn write_gbk_format_sequence(sequence: &str,file: &mut File) -> io::Result<()> {
1129 writeln!(file, "ORIGIN")?;
1131 let mut formatted = String::new();
1132 let cleaned_input = sequence.replace("\n", "");
1133 let mut index = 1;
1134 for (_i, chunk) in cleaned_input.as_bytes().chunks(60).enumerate() {
1135 formatted.push_str(&format!("{:>5} ", index));
1136 for (j, sub_chunk) in chunk.chunks(10).enumerate() {
1137 if j > 0 {
1138 formatted.push(' ');
1139 }
1140 formatted.push_str(&String::from_utf8_lossy(sub_chunk));
1141 }
1142 formatted.push('\n');
1143 index+=60;
1144 }
1145 writeln!(file, "{:>6}", &formatted)?;
1146 writeln!(file, "//")?;
1147 Ok(())
1148}
1149
1150pub fn gbk_write(seq_region: BTreeMap<String, (u32,u32)>, record_vec: Vec<Record>, filename: &str) -> io::Result<()> {
1153 let now = Local::now();
1154 let formatted_date = now.format("%d-%b-%Y").to_string().to_uppercase();
1155 let mut file = OpenOptions::new()
1156 .write(true) .append(true) .create(true) .open(filename)?;
1160 for (i, (key, _val)) in seq_region.iter().enumerate() {
1161 let strain = match &record_vec[i].source_map.get_strain(key) {
1162 Some(value) => value.to_string(),
1163 None => "Unknown".to_string(),
1164 };
1165 let organism = match &record_vec[i].source_map.get_organism(key) {
1167 Some(value) => value.to_string(),
1168 None => "Unknown".to_string(),
1169 };
1170 let mol_type = match &record_vec[i].source_map.get_mol_type(key) {
1171 Some(value) => value.to_string(),
1172 None => "Unknown".to_string(),
1173 };
1174 let type_material = match &record_vec[i].source_map.get_type_material(&key) {
1175 Some(value) => value.to_string(),
1176 None => "Unknown".to_string(),
1177 };
1178 let db_xref = match &record_vec[i].source_map.get_db_xref(key) {
1179 Some(value) => value.to_string(),
1180 None => "Unknown".to_string(),
1181 };
1182 let source_stop = match &record_vec[i].source_map.get_stop(key) {
1183 Some(value) => value.get_value(),
1184 None => { println!("stop value not found");
1185 None }.expect("stop value not received")
1186 };
1187 writeln!(file, "LOCUS {} {} bp DNA linear CON {}", &key,&record_vec[i].sequence.len(),&formatted_date)?;
1188 writeln!(file, "DEFINITION {} {}.", &organism, &strain)?;
1189 writeln!(file, "ACCESSION {}", &key)?;
1190 writeln!(file, "KEYWORDS .")?;
1191 writeln!(file, "SOURCE {} {}", &organism,&strain)?;
1192 writeln!(file, " ORGANISM {} {}", &organism,&strain)?;
1193 writeln!(file, "FEATURES Location/Qualifiers")?;
1195 writeln!(file, " source 1..{}", &source_stop)?;
1196 writeln!(file, " /organism=\"{}\"",&strain)?;
1197 writeln!(file, " /mol_type=\"{}\"",&mol_type)?;
1198 writeln!(file, " /strain=\"{}\"",&strain)?;
1199 if type_material != *"Unknown".to_string() {
1200 writeln!(file, " /type_material=\"{}\"",&type_material)?;
1201 }
1202 writeln!(file, " /db_xref=\"{}\"",&db_xref)?;
1203 for (locus_tag, _value) in &record_vec[i].cds.attributes {
1205 let start = match &record_vec[i].cds.get_start(locus_tag) {
1206 Some(value) => value.get_value(),
1207 None => { println!("start value not found");
1208 None }.expect("start value not received")
1209 };
1210 let stop = match &record_vec[i].cds.get_stop(locus_tag) {
1211 Some(value) => value.get_value(),
1212 None => { println!("stop value not found");
1213 None }.expect("stop value not received")
1214 };
1215 let product = match &record_vec[i].cds.get_product(locus_tag) {
1216 Some(value) => value.to_string(),
1217 None => "unknown product".to_string(),
1218 };
1219 let strand = match &record_vec[i].cds.get_strand(locus_tag) {
1220 Some(value) => **value,
1221 None => 0,
1222 };
1223 let codon_start = match &record_vec[i].cds.get_codon_start(locus_tag) {
1224 Some(value) => **value,
1225 None => 0,
1226 };
1227 let gene = match &record_vec[i].cds.get_gene(locus_tag) {
1228 Some(value) => value.to_string(),
1229 None => "unknown".to_string(),
1230 };
1231 let translation = match &record_vec[i].seq_features.get_sequence_faa(locus_tag) {
1232 Some(value) => value.to_string(),
1233 None => "unknown".to_string(),
1234 };
1235 if strand == 1 {
1236 writeln!(file, " gene {}..{}",&start,&stop)?;
1237 } else {
1238 writeln!(file, " gene complement({}..{})",&start,&stop)?;
1239 }
1240 writeln!(file, " /locus_tag=\"{}\"",&locus_tag)?;
1241 if strand == 1 {
1242 writeln!(file, " CDS {}..{}",&start,&stop)?;
1243 }
1244 else {
1245 writeln!(file, " CDS complement({}..{})",&start,&stop)?;
1246 }
1247 writeln!(file, " /locus_tag=\"{}\"",&locus_tag)?;
1248 writeln!(file, " /codon_start=\"{}\"", &codon_start)?;
1249 if gene != "unknown" {
1250 writeln!(file, " /gene=\"{}\"", &gene)?;
1251 }
1252 if translation != "unknown" {
1253 let formatted_translation = format_translation(&translation);
1254 writeln!(file, "{}", &formatted_translation)?;
1255 }
1256 writeln!(file, " /product=\"{}\"",&product)?;
1257 }
1258 write_gbk_format_sequence(&record_vec[i].sequence, &mut file)?;
1259 }
1260 Ok(())
1261}
1262
1263#[allow(unused_assignments)]
1266#[allow(unused_variables)]
1267pub fn gff_write(seq_region: BTreeMap<String, (u32, u32)>, mut record_vec: Vec<Record>, filename: &str, dna: bool) -> io::Result<()> {
1268 let mut file = OpenOptions::new()
1269 .append(true) .create(true) .open(filename)?;
1273 if file.metadata()?.len() == 0 {
1274 writeln!(file, "##gff-version 3")?;
1275 }
1276 let mut full_seq = String::new();
1277 let mut prev_end: u32 = 0;
1278 for (k, v) in seq_region.iter() {
1280 writeln!(file, "##sequence-region\t{}\t{}\t{}", &k, v.0, v.1)?;
1281 }
1282 for ((source_name, (seq_start, seq_end)), record) in seq_region.iter().zip(record_vec.drain(..)) {
1283 if dna == true {
1284 full_seq.push_str(&record.sequence);
1285 }
1286 for (locus_tag, _valu) in &record.cds.attributes {
1287 let start = match record.cds.get_start(locus_tag) {
1288 Some(value) => value.get_value(),
1289 None => { println!("start value not found");
1290 None }.expect("start value not received")
1291 };
1292 let stop = match record.cds.get_stop(locus_tag) {
1293 Some(value) => value.get_value(),
1294 None => { println!("stop value not found");
1295 None }.expect("stop value not received")
1296 };
1297 let gene = match record.cds.get_gene(locus_tag) {
1298 Some(value) => value.to_string(),
1299 None => "unknown".to_string(),
1300 };
1301 let product = match record.cds.get_product(locus_tag) {
1302 Some(value) => value.to_string(),
1303 None => "unknown product".to_string(),
1304 };
1305 let strand = match record.cds.get_strand(locus_tag) {
1306 Some(valu) => {
1307 match valu {
1308 1 => "+".to_string(),
1309 -1 => "-".to_string(),
1310 _ => { println!("unexpected strand value {} for locus_tag {}", valu, locus_tag);
1311 "unknownstrand".to_string() }
1312 }
1313 },
1314 None => "unknownvalue".to_string(),
1315 };
1316 let phase = match record.cds.get_codon_start(locus_tag) {
1317 Some(valuer) => {
1318 match valuer {
1319 1 => 0,
1320 2 => 1,
1321 3 => 2,
1322 _ => { println!("unexpected phase value {} in the bagging area for locus_tag {}", valuer, locus_tag);
1323 1 }
1324 }
1325 },
1326 None => 1,
1327 };
1328 let gff_inner = GFFInner::new(
1329 locus_tag.to_string(),
1330 source_name.clone(),
1331 locus_tag.to_string(),
1332 gene,
1333 product,
1337 );
1338 let gff_outer = GFFOuter::new(
1339 source_name.clone(),
1340 ".".to_string(),
1341 "CDS".to_string(),
1342 start + prev_end,
1343 stop + prev_end,
1344 0.0,
1345 strand,
1346 phase,
1347 &gff_inner,
1348 );
1349 let field9_attributes = gff_outer.field9_attributes_build();
1350 writeln!(file, "{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes)?;
1352
1353 }
1354 prev_end = *seq_end;
1355 }
1356 if dna {
1357 writeln!(file, "##FASTA")?;
1358 writeln!(file, "{}", full_seq)?;
1360 }
1361 Ok(())
1362}
1363
1364#[allow(unused_assignments)]
1367pub fn orig_gff_write(seq_region: BTreeMap<String, (u32, u32)>, record_vec: Vec<Record>, filename: &str, dna: bool) -> io::Result<()> {
1368 let mut file = OpenOptions::new()
1369 .append(true) .create(true) .open(filename)?;
1373 if file.metadata()?.len() == 0 {
1374 writeln!(file, "##gff-version 3")?;
1375 }
1376 let mut source_name = String::new();
1377 let mut full_seq = String::new();
1378 let mut prev_end: u32 = 0;
1379 for (k, v) in seq_region.iter() {
1381 writeln!(file, "##sequence-region\t{}\t{}\t{}", &k, v.0, v.1)?;
1382 }
1383 for (i, (key, val)) in seq_region.iter().enumerate() {
1384 source_name = key.to_string();
1385 if dna == true {
1386 full_seq.push_str(&record_vec[i].sequence);
1387 }
1388 for (locus_tag, _valu) in &record_vec[i].cds.attributes {
1389 let start = match record_vec[i].cds.get_start(locus_tag) {
1390 Some(value) => value.get_value(),
1391 None => { println!("start value not found");
1392 None }.expect("start value not received")
1393 };
1394 let stop = match record_vec[i].cds.get_stop(locus_tag) {
1395 Some(value) => value.get_value(),
1396 None => { println!("stop value not found");
1397 None }.expect("stop value not received")
1398 };
1399 let gene = match record_vec[i].cds.get_gene(locus_tag) {
1400 Some(value) => value.to_string(),
1401 None => "unknown".to_string(),
1402 };
1403 let product = match record_vec[i].cds.get_product(locus_tag) {
1404 Some(value) => value.to_string(),
1405 None => "unknown product".to_string(),
1406 };
1407 let strand = match record_vec[i].cds.get_strand(locus_tag) {
1408 Some(valu) => {
1409 match valu {
1410 1 => "+".to_string(),
1411 -1 => "-".to_string(),
1412 _ => { println!("unexpected strand value {} for locus_tag {}", valu, locus_tag);
1413 "unknownstrand".to_string() }
1414 }
1415 },
1416 None => "unknownvalue".to_string(),
1417 };
1418 let phase = match record_vec[i].cds.get_codon_start(locus_tag) {
1419 Some(valuer) => {
1420 match valuer {
1421 1 => 0,
1422 2 => 1,
1423 3 => 2,
1424 _ => { println!("unexpected phase value {} in the bagging area for locus_tag {}", valuer, locus_tag);
1425 1 }
1426 }
1427 },
1428 None => 1,
1429 };
1430 let gff_inner = GFFInner::new(
1431 locus_tag.to_string(),
1432 source_name.clone(),
1433 locus_tag.to_string(),
1434 gene,
1435 product,
1439 );
1440 let gff_outer = GFFOuter::new(
1441 source_name.clone(),
1442 ".".to_string(),
1443 "CDS".to_string(),
1444 start + prev_end,
1445 stop + prev_end,
1446 0.0,
1447 strand,
1448 phase,
1449 &gff_inner,
1450 );
1451 let field9_attributes = gff_outer.field9_attributes_build();
1452 writeln!(file, "{}\t{}\t{}\t{:?}\t{:?}\t{}\t{}\t{}\t{}", gff_outer.seqid, gff_outer.source, gff_outer.type_val, gff_outer.start, gff_outer.end, gff_outer.score, gff_outer.strand, gff_outer.phase, field9_attributes)?;
1454
1455 }
1456 prev_end = val.1;
1457 }
1458 if dna {
1459 writeln!(file, "##FASTA")?;
1460 writeln!(file, "{}", full_seq)?;
1462 }
1463 Ok(())
1464}
1465
1466#[derive(Debug, Clone)]
1469pub struct Record {
1470 pub id: String,
1471 pub length: u32,
1472 pub sequence: String,
1473 pub start: usize,
1474 pub end: usize,
1475 pub strand: i32,
1476 pub cds: FeatureAttributeBuilder,
1477 pub source_map: SourceAttributeBuilder,
1478 pub seq_features: SequenceAttributeBuilder,
1479}
1480
1481impl Record {
1482 pub fn new() -> Self {
1484 Record {
1485 id: "".to_owned(),
1486 length: 0,
1487 sequence: "".to_owned(),
1488 start: 0,
1489 end: 0,
1490 strand: 0,
1491 source_map: SourceAttributeBuilder::new(),
1492 cds: FeatureAttributeBuilder::new(),
1493 seq_features: SequenceAttributeBuilder::new(),
1494 }
1495 }
1496 pub fn is_empty(&mut self) -> bool {
1497 self.id.is_empty() && self.length == 0
1498 }
1499 pub fn check(&mut self) -> Result<(), &str> {
1500 if self.id().is_empty() {
1501 return Err("Expecting id for Gbk record.");
1502 }
1503 Ok(())
1504 }
1505 pub fn id(&mut self) -> &str {
1506 &self.id
1507 }
1508 pub fn length(&mut self) -> u32 {
1509 self.length
1510 }
1511 pub fn sequence(&mut self) -> &str {
1512 &self.sequence
1513 }
1514 pub fn start(&mut self) -> u32 {
1515 self.start.try_into().unwrap()
1516 }
1517 pub fn end(&mut self) -> u32 {
1518 self.end.try_into().unwrap()
1519 }
1520 pub fn strand(&mut self) -> i32 {
1521 self.strand
1522 }
1523 pub fn cds(&mut self) -> FeatureAttributeBuilder {
1524 self.cds.clone()
1525 }
1526 pub fn source_map(&mut self) -> SourceAttributeBuilder {
1527 self.source_map.clone()
1528 }
1529 pub fn seq_features(&mut self) -> SequenceAttributeBuilder {
1530 self.seq_features.clone()
1531 }
1532 fn rec_clear(&mut self) {
1533 self.id.clear();
1534 self.length = 0;
1535 self.sequence.clear();
1536 self.start = 0;
1537 self.end = 0;
1538 self.strand = 0;
1539 self.source_map = SourceAttributeBuilder::new();
1540 self.cds = FeatureAttributeBuilder::new();
1541 self.seq_features = SequenceAttributeBuilder::new();
1542 }
1543}
1544
1545impl Default for Record {
1546 fn default() -> Self {
1547 Self::new()
1548 }
1549}
1550
1551#[allow(dead_code)]
1552pub struct Config {
1553 filename: String,
1554}
1555
1556impl Config {
1557 pub fn new(args: &[String]) -> Result<Config, &str> {
1558 if args.len() < 2 {
1559 panic!("not enough arguments, please provide filename");
1560 }
1561 let filename = args[1].clone();
1562
1563 Ok(Config { filename })
1564 }
1565}
1566
1567#[cfg(test)]
1568mod tests {
1569 use super::*;
1570
1571 #[test]
1572 #[allow(unused_mut)]
1573 #[allow(unused_variables)]
1574 #[allow(dead_code)]
1575 #[allow(unused_assignments)]
1576 pub fn genbank_to_gff() -> io::Result<()> {
1577 let file_gbk = fs::File::open("test_output.gbk")?;
1578 let prev_start: u32 = 0;
1579 let mut prev_end: u32 = 0;
1580 let mut reader = Reader::new(file_gbk);
1581 let mut records = reader.records();
1582 let mut read_counter: u32 = 0;
1583 let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
1584 let mut record_vec: Vec<Record> = Vec::new();
1585 loop {
1586 match records.next() {
1587 Some(Ok(mut record)) => {
1588 let sour = record.source_map.source_name.clone().expect("issue collecting source name");
1591 let beginning = match record.source_map.get_start(&sour) {
1592 Some(value) => value.get_value(),
1593 _ => 0,
1594 };
1595 let ending = match record.source_map.get_stop(&sour) {
1596 Some(value) => value.get_value(),
1597 _ => 0,
1598 };
1599 if ending + prev_end < beginning + prev_end {
1600 println!("debug since the end value smaller is than the start {:?}", beginning);
1601 }
1602 seq_region.insert(sour, (beginning + prev_end, ending + prev_end));
1603 record_vec.push(record);
1604 read_counter+=1;
1606 prev_end+=ending;
1607 },
1608 Some(Err(e)) => { println!("theres an err {:?}", e); },
1609 None => {
1610 println!("finished iteration");
1611 break; },
1612 }
1613 }
1614 let output_file = format!("test_output.gff");
1615 gff_write(seq_region.clone(), record_vec, &output_file, true)?;
1616 println!("Total records processed: {}", read_counter);
1617 return Ok(());
1618 }
1619 #[test]
1620 #[allow(unused_mut)]
1621 #[allow(unused_variables)]
1622 #[allow(unused_assignments)]
1623 pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
1624 let file_gbk = fs::File::open("test_output.gbk")?;
1625 let mut reader = Reader::new(file_gbk);
1626 let mut records = reader.records();
1627 let mut read_counter: u32 = 0;
1628 loop {
1629 match records.next() {
1630 Some(Ok(mut record)) => {
1631 for (k, _v) in &record.cds.attributes {
1634 match record.seq_features.get_sequence_faa(&k) {
1635 Some(value) => { let seq_faa = value.to_string();
1636 println!(">{}|{}\n{}", &record.id, &k, seq_faa);
1637 },
1638 _ => (),
1639 };
1640
1641 }
1642 read_counter+=1;
1643 },
1644 Some(Err(e)) => { println!("theres an err {:?}", e); },
1645 None => {
1646 println!("finished iteration");
1647 break; },
1648 }
1649 }
1650 println!("Total records processed: {}", read_counter);
1651 return Ok(());
1652 }
1653 #[test]
1654 #[allow(unused_mut)]
1655 #[allow(unused_assignments)]
1656 #[allow(unused_variables)]
1657 pub fn genbank_to_ffn() -> Result<(), anyhow::Error> {
1658 let file_gbk = fs::File::open("test_output.gbk")?;
1659 let mut reader = Reader::new(file_gbk);
1660 let mut records = reader.records();
1661 let mut read_counter: u32 = 0;
1662 loop {
1663 match records.next() {
1664 Some(Ok(mut record)) => {
1665 for (k, v) in &record.cds.attributes {
1668 match record.seq_features.get_sequence_ffn(&k) {
1669 Some(value) => { let seq_ffn = value.to_string();
1670 println!(">{}|{}\n{}", &record.id, &k, seq_ffn);
1671 },
1672 _ => (),
1673 };
1674
1675 }
1676 read_counter+=1;
1677 },
1678 Some(Err(e)) => { println!("theres an err {:?}", e); },
1679 None => {
1680 println!("finished iteration");
1681 break; },
1682 }
1683 }
1684 println!("Total records processed: {}", read_counter);
1685 return Ok(());
1686 }
1687 #[test]
1688 #[allow(unused_mut)]
1703 #[allow(unused_assignments)]
1704 #[allow(unused_variables)]
1705 pub fn create_new_record() -> Result<(), anyhow::Error> {
1706 let filename = format!("new_record.gff");
1707 let mut record = Record::new();
1708 let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
1709 seq_region.insert("source_1".to_string(), (1,910));
1710 record.source_map
1711 .set_counter("source_1".to_string())
1712 .set_start(RangeValue::Exact(1))
1713 .set_stop(RangeValue::Exact(910))
1714 .set_organism("Escherichia coli".to_string())
1715 .set_mol_type("DNA".to_string())
1716 .set_strain("K-12 substr. MG1655".to_string())
1717 .set_type_material("type strain of Escherichia coli K12".to_string())
1719 .set_db_xref("PRJNA57779".to_string());
1720 record.cds
1721 .set_counter("b3304".to_string())
1722 .set_start(RangeValue::Exact(1))
1723 .set_stop(RangeValue::Exact(354))
1724 .set_gene("rplR".to_string())
1725 .set_product("50S ribosomal subunit protein L18".to_string())
1726 .set_codon_start(1)
1727 .set_strand(-1);
1728 record.cds
1729 .set_counter("b3305".to_string())
1730 .set_start(RangeValue::Exact(364))
1731 .set_stop(RangeValue::Exact(897))
1732 .set_gene("rplF".to_string())
1733 .set_product("50S ribosomal subunit protein L6".to_string())
1734 .set_codon_start(1)
1735 .set_strand(-1);
1736 record.seq_features
1737 .set_counter("b3304".to_string())
1738 .set_start(RangeValue::Exact(1))
1739 .set_stop(RangeValue::Exact(354))
1740 .set_sequence_ffn("ATGGATAAGAAATCTGCTCGTATCCGTCGTGCGACCCGCGCACGCCGCAAGCTCCAGGAG
1741CTGGGCGCAACTCGCCTGGTGGTACATCGTACCCCGCGTCACATTTACGCACAGGTAATT
1742GCACCGAACGGTTCTGAAGTTCTGGTAGCTGCTTCTACTGTAGAAAAAGCTATCGCTGAA
1743CAACTGAAGTACACCGGTAACAAAGACGCGGCTGCAGCTGTGGGTAAAGCTGTCGCTGAA
1744CGCGCTCTGGAAAAAGGCATCAAAGATGTATCCTTTGACCGTTCCGGGTTCCAATATCAT
1745GGTCGTGTCCAGGCACTGGCAGATGCTGCCCGTGAAGCTGGCCTTCAGTTCTAA".to_string())
1746 .set_sequence_faa("MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAE
1747QLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF".to_string())
1748 .set_codon_start(1)
1749 .set_strand(-1);
1750 record.seq_features
1751 .set_counter("b3305".to_string())
1752 .set_start(RangeValue::Exact(364))
1753 .set_stop(RangeValue::Exact(897))
1754 .set_sequence_ffn("ATGTCTCGTGTTGCTAAAGCACCGGTCGTTGTTCCTGCCGGCGTTGACGTAAAAATCAAC
1755GGTCAGGTTATTACGATCAAAGGTAAAAACGGCGAGCTGACTCGTACTCTCAACGATGCT
1756GTTGAAGTTAAACATGCAGATAATACCCTGACCTTCGGTCCGCGTGATGGTTACGCAGAC
1757GGTTGGGCACAGGCTGGTACCGCGCGTGCCCTGCTGAACTCAATGGTTATCGGTGTTACC
1758GAAGGCTTCACTAAGAAGCTGCAGCTGGTTGGTGTAGGTTACCGTGCAGCGGTTAAAGGC
1759AATGTGATTAACCTGTCTCTGGGTTTCTCTCATCCTGTTGACCATCAGCTGCCTGCGGGT
1760ATCACTGCTGAATGTCCGACTCAGACTGAAATCGTGCTGAAAGGCGCTGATAAGCAGGTG
1761ATCGGCCAGGTTGCAGCGGATCTGCGCGCCTACCGTCGTCCTGAGCCTTATAAAGGCAAG
1762GGTGTTCGTTACGCCGACGAAGTCGTGCGTACCAAAGAGGCTAAGAAGAAGTAA".to_string())
1763 .set_sequence_faa("MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHADNTLTFGPRDGYAD
1764GWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSLGFSHPVDHQLPAG
1765ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK".to_string())
1766 .set_codon_start(1)
1767 .set_strand(-1);
1768 record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA
1769TTGGAACCCGGAACGGTCAAAGGATACATCTTTGATGCCTTTTTCCAGAGCGCGTTCAGC
1770GACAGCTTTACCCACAGCTGCAGCCGCGTCTTTGTTACCGGTGTACTTCAGTTGTTCAGC
1771GATAGCTTTTTCTACAGTAGAAGCAGCTACCAGAACTTCAGAACCGTTCGGTGCAATTAC
1772CTGTGCGTAAATGTGACGCGGGGTACGATGTACCACCAGGCGAGTTGCGCCCAGCTCCTG
1773GAGCTTGCGGCGTGCGCGGGTCGCACGACGGATACGAGCAGATTTCTTATCCATAGTGTT
1774ACCTTACTTCTTCTTAGCCTCTTTGGTACGCACGACTTCGTCGGCGTAACGAACACCCTT
1775GCCTTTATAAGGCTCAGGACGACGGTAGGCGCGCAGATCCGCTGCAACCTGGCCGATCAC
1776CTGCTTATCAGCGCCTTTCAGCACGATTTCAGTCTGAGTCGGACATTCAGCAGTGATACC
1777CGCAGGCAGCTGATGGTCAACAGGATGAGAGAAACCCAGAGACAGGTTAATCACATTGCC
1778TTTAACCGCTGCACGGTAACCTACACCAACCAGCTGCAGCTTCTTAGTGAAGCCTTCGGT
1779AACACCGATAACCATTGAGTTCAGCAGGGCACGCGCGGTACCAGCCTGTGCCCAACCGTC
1780TGCGTAACCATCACGCGGACCGAAGGTCAGGGTATTATCTGCATGTTTAACTTCAACAGC
1781ATCGTTGAGAGTACGAGTCAGCTCGCCGTTTTTACCTTTGATCGTAATAACCTGACCGTT
1782GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACA".to_string();
1783 gff_write(seq_region.clone(), vec![record.clone()], "new_output.gff", true)?;
1784 gbk_write(seq_region, vec![record], "new_output.gbk")?;
1785 return Ok(());
1786 }
1787}