bio/io/
gff.rs

1// Copyright 2016 Pierre Marijon.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! [GFF3] format reading and writing. [GFF2] is currently not supported.
7//!
8//! [GFF2]: http://gmod.org/wiki/GFF2 (not supported)
9//! [GTF2]: http://mblab.wustl.edu/GTF2.html (not supported)
10//! [GFF3]: http://gmod.org/wiki/GFF3#GFF3_Format
11//!
12//! # Example
13//!
14//! ```no_run
15//! // import functions (at top of script)
16//! use bio::io::gff;
17//! use std::io;
18//! let mut reader = gff::Reader::new(io::stdin(), gff::GffType::GFF3);
19//! let mut writer = gff::Writer::new(vec![], gff::GffType::GFF3);
20//! for record in reader.records() {
21//!     let rec = record.ok().expect("Error reading record.");
22//!     println!("{}", rec.seqname());
23//!     writer.write(&rec).ok().expect("Error writing record.");
24//! }
25//! ```
26
27use anyhow::Context;
28use itertools::Itertools;
29use multimap::MultiMap;
30use regex::Regex;
31use std::convert::{AsRef, TryInto};
32use std::fs;
33use std::io;
34use std::path::Path;
35use std::str::FromStr;
36
37use bio_types::strand::Strand;
38use serde::{Deserialize, Deserializer, Serialize};
39
40/// `GffType`
41///
42/// We have three format in the GFF family.
43/// The change is in the last field of GFF.
44/// For each type we have key value separator and field separator
45#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
46pub enum GffType {
47    /// Attribute format is: key1=value; key2=value1,value2
48    GFF3,
49    /// Attribute format is: key1 value; key2 value1; key2 value2
50    GFF2,
51    /// Same as GFF2 just possible keyword and possible value change
52    GTF2,
53    /// Any, first field of tuple separates key from value,
54    /// second field separates multiple key value pairs, and
55    /// third field separates multiple values for the same key
56    Any(u8, u8, u8),
57}
58
59impl FromStr for GffType {
60    type Err = String;
61
62    /// Create a GffType from a string.
63    ///
64    /// # Arguments
65    ///
66    /// * `src_str` - The source string to convert to the GffType.
67    fn from_str(src_str: &str) -> Result<Self, Self::Err> {
68        match src_str {
69            "gff3" => Ok(GffType::GFF3),
70            "gff2" => Ok(GffType::GFF2),
71            "gtf2" => Ok(GffType::GTF2),
72            _ => Err(format!(
73                "String '{}' is not a valid GFFType (GFF/GTF format version).",
74                src_str
75            )),
76        }
77    }
78}
79
80impl GffType {
81    #[inline]
82    /// First field is key value separator.
83    /// Second field terminates a key value pair.
84    /// Third field
85    fn separator(self) -> (u8, u8, u8) {
86        match self {
87            GffType::GFF3 => (b'=', b';', b','),
88            GffType::GFF2 => (b' ', b';', 0u8),
89            GffType::GTF2 => (b' ', b';', 0u8),
90            GffType::Any(x, y, z) => (x, y, z),
91        }
92    }
93}
94
95/// A GFF reader.
96#[derive(Debug)]
97pub struct Reader<R: io::Read> {
98    inner: csv::Reader<R>,
99    gff_type: GffType,
100}
101
102impl Reader<fs::File> {
103    /// Read GFF from given file path in given format.
104    pub fn from_file<P: AsRef<Path> + std::fmt::Debug>(
105        path: P,
106        fileformat: GffType,
107    ) -> anyhow::Result<Self> {
108        fs::File::open(&path)
109            .map(|f| Reader::new(f, fileformat))
110            .with_context(|| format!("Failed to read GFF from {:#?}", path))
111    }
112}
113
114impl<R: io::Read> Reader<R> {
115    /// Create a new GFF reader given an instance of `io::Read`, in given format.
116    pub fn new(reader: R, fileformat: GffType) -> Self {
117        Reader {
118            inner: csv::ReaderBuilder::new()
119                .delimiter(b'\t')
120                .has_headers(false)
121                .comment(Some(b'#'))
122                .from_reader(reader),
123            gff_type: fileformat,
124        }
125    }
126
127    /// Iterate over all records.
128    pub fn records(&mut self) -> Records<'_, R> {
129        let (delim, term, vdelim) = self.gff_type.separator();
130        let r = format!(
131            r" *(?P<key>[^{delim}{term}\t]+){delim}(?P<value>[^{delim}{term}\t]+){term}?",
132            delim = delim as char,
133            term = term as char
134        );
135        let attribute_re = Regex::new(&r).unwrap();
136        Records {
137            inner: self.inner.deserialize(),
138            attribute_re,
139            value_delim: vdelim as char,
140        }
141    }
142}
143
144type GffRecordInner = (
145    String,
146    String,
147    String,
148    u64,
149    u64,
150    String,
151    String,
152    Phase,
153    String,
154);
155
156#[derive(Debug, PartialEq, Eq, Clone, Default)]
157pub struct Phase(Option<u8>);
158
159impl Phase {
160    fn validate<T: Into<u8>>(p: T) -> Option<u8> {
161        let p = p.into();
162        if p < 3 {
163            Some(p)
164        } else {
165            None
166        }
167    }
168}
169
170impl From<u8> for Phase {
171    /// Create a new Phase from a u8.
172    ///
173    /// # Example
174    /// ```
175    /// use bio::io::gff::Phase;
176    ///
177    /// let p = Phase::from(0);
178    /// let p = Phase::from(3); // This will create Phase(None)
179    /// ```
180    fn from(p: u8) -> Self {
181        Phase(Self::validate(p))
182    }
183}
184
185impl From<Option<u8>> for Phase {
186    /// Create a new Phase from an Option<u8>.
187    ///
188    /// # Example
189    /// ```
190    /// use bio::io::gff::Phase;
191    ///
192    /// let p = Phase::from(Some(0));
193    /// let p = Phase::from(None);
194    /// let p = Phase::from(Some(3)); // This will create Phase(None)
195    /// ```
196    fn from(p: Option<u8>) -> Self {
197        Phase(p.and_then(Self::validate))
198    }
199}
200
201impl TryInto<u8> for Phase {
202    type Error = ();
203
204    /// Try to convert a Phase into a u8.
205    ///
206    /// # Example
207    /// ```
208    /// use bio::io::gff::Phase;
209    /// use std::convert::TryInto;
210    ///
211    /// let p = Phase::from(0);
212    /// let u: u8 = p.try_into().unwrap();
213    /// assert_eq!(u, 0);
214    /// ```
215    fn try_into(self) -> Result<u8, Self::Error> {
216        match self.0 {
217            Some(p) => Ok(p),
218            None => Err(()),
219        }
220    }
221}
222
223impl TryInto<Option<u8>> for Phase {
224    type Error = ();
225
226    /// Try to convert a Phase into an Option<u8>.
227    ///
228    /// # Example
229    /// ```
230    /// use bio::io::gff::Phase;
231    /// use std::convert::TryInto;
232    ///
233    /// let p = Phase::from(Some(0));
234    /// let u: Option<u8> = p.try_into().unwrap();
235    /// assert_eq!(u, Some(0));
236    ///
237    /// let p = Phase::from(None);
238    /// let u: Option<u8> = p.try_into().unwrap();
239    /// assert_eq!(u, None);
240    /// ```
241    fn try_into(self) -> Result<Option<u8>, Self::Error> {
242        Ok(self.0)
243    }
244}
245
246impl<'de> Deserialize<'de> for Phase {
247    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
248    where
249        D: Deserializer<'de>,
250    {
251        let s = String::deserialize(deserializer)?;
252        match s.as_str() {
253            "." => Ok(Phase(None)),
254            _ => {
255                let p = u8::from_str(&s)
256                    .map_err(|_| serde::de::Error::custom("Phase must be \".\", 0, 1, or 2"))?;
257                Ok(Phase(Self::validate(p)))
258            }
259        }
260    }
261}
262
263impl Serialize for Phase {
264    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
265    where
266        S: serde::Serializer,
267    {
268        match self.0 {
269            Some(p) => serializer.serialize_u8(p),
270            None => serializer.serialize_str("."),
271        }
272    }
273}
274
275/// An iterator over the records of a GFF file.
276pub struct Records<'a, R: io::Read> {
277    inner: csv::DeserializeRecordsIter<'a, R, GffRecordInner>,
278    attribute_re: Regex,
279    value_delim: char,
280}
281
282impl<'a, R: io::Read> Iterator for Records<'a, R> {
283    type Item = csv::Result<Record>;
284
285    fn next(&mut self) -> Option<csv::Result<Record>> {
286        self.inner.next().map(|res| {
287            res.map(
288                |(
289                    seqname,
290                    source,
291                    feature_type,
292                    start,
293                    end,
294                    score,
295                    strand,
296                    phase,
297                    raw_attributes,
298                )| {
299                    let trim_quotes = |s: &str| s.trim_matches('\'').trim_matches('"').to_owned();
300                    let mut attributes = MultiMap::new();
301                    for caps in self.attribute_re.captures_iter(&raw_attributes) {
302                        for value in caps["value"].split(self.value_delim) {
303                            attributes.insert(trim_quotes(&caps["key"]), trim_quotes(value));
304                        }
305                    }
306                    Record {
307                        seqname,
308                        source,
309                        feature_type,
310                        start,
311                        end,
312                        score,
313                        strand,
314                        phase,
315                        attributes,
316                    }
317                },
318            )
319        })
320    }
321}
322
323/// A GFF writer.
324#[derive(Debug)]
325pub struct Writer<W: io::Write> {
326    inner: csv::Writer<W>,
327    delimiter: char,
328    terminator: String,
329}
330
331impl Writer<fs::File> {
332    /// Write to a given file path in given format.
333    #[allow(clippy::wrong_self_convention)]
334    pub fn to_file<P: AsRef<Path>>(path: P, fileformat: GffType) -> io::Result<Self> {
335        fs::File::create(path).map(|f| Writer::new(f, fileformat))
336    }
337}
338
339impl<W: io::Write> Writer<W> {
340    /// Write to a given writer.
341    pub fn new(writer: W, fileformat: GffType) -> Self {
342        let (delim, termi, _) = fileformat.separator();
343
344        Writer {
345            inner: csv::WriterBuilder::new()
346                .delimiter(b'\t')
347                .flexible(true)
348                .from_writer(writer),
349            delimiter: delim as char,
350            terminator: String::from_utf8(vec![termi]).unwrap(),
351        }
352    }
353
354    /// Write a given GFF record.
355    pub fn write(&mut self, record: &Record) -> csv::Result<()> {
356        let attributes = if !record.attributes.is_empty() {
357            record
358                .attributes
359                .iter()
360                .map(|(a, b)| format!("{}{}{}", a, self.delimiter, b))
361                .join(&self.terminator)
362        } else {
363            "".to_owned()
364        };
365
366        self.inner.serialize((
367            &record.seqname,
368            &record.source,
369            &record.feature_type,
370            record.start,
371            record.end,
372            &record.score,
373            &record.strand,
374            &record.phase,
375            attributes,
376        ))
377    }
378}
379
380/// A GFF record
381#[derive(Default, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)]
382pub struct Record {
383    seqname: String,
384    source: String,
385    feature_type: String,
386    start: u64,
387    end: u64,
388    score: String,
389    strand: String,
390    phase: Phase,
391    attributes: MultiMap<String, String>,
392}
393
394impl Record {
395    /// Create a new GFF record.
396    pub fn new() -> Self {
397        Record {
398            seqname: "".to_owned(),
399            source: "".to_owned(),
400            feature_type: "".to_owned(),
401            start: 0,
402            end: 0,
403            score: ".".to_owned(),
404            strand: ".".to_owned(),
405            phase: Phase(None),
406            attributes: MultiMap::<String, String>::new(),
407        }
408    }
409
410    /// Sequence name of the feature.
411    pub fn seqname(&self) -> &str {
412        &self.seqname
413    }
414
415    /// Source of the feature.
416    pub fn source(&self) -> &str {
417        &self.source
418    }
419
420    /// Type of the feature.
421    pub fn feature_type(&self) -> &str {
422        &self.feature_type
423    }
424
425    /// Start position of feature (1-based).
426    pub fn start(&self) -> &u64 {
427        &self.start
428    }
429
430    /// End position of feature (1-based, not included).
431    pub fn end(&self) -> &u64 {
432        &self.end
433    }
434
435    /// Score of feature
436    pub fn score(&self) -> Option<u64> {
437        match self.score.as_ref() {
438            "." => None,
439            _ => self.score.parse::<u64>().ok(),
440        }
441    }
442
443    /// Strand of the feature.
444    pub fn strand(&self) -> Option<Strand> {
445        match self.strand.as_ref() {
446            "+" => Some(Strand::Forward),
447            "-" => Some(Strand::Reverse),
448            _ => None,
449        }
450    }
451
452    /// Phase of the feature. The phase is one of the integers 0, 1, or 2, indicating the number of bases that should be removed from the beginning of this feature to reach the first base of the next codon. `None` if not applicable (`"."` in GFF file).
453    pub fn phase(&self) -> &Phase {
454        &self.phase
455    }
456
457    /// Attribute of feature
458    pub fn attributes(&self) -> &MultiMap<String, String> {
459        &self.attributes
460    }
461
462    /// Get mutable reference on seqname of feature.
463    pub fn seqname_mut(&mut self) -> &mut String {
464        &mut self.seqname
465    }
466
467    /// Get mutable reference on source of feature.
468    pub fn source_mut(&mut self) -> &mut String {
469        &mut self.source
470    }
471
472    /// Get mutable reference on type of feature.
473    pub fn feature_type_mut(&mut self) -> &mut String {
474        &mut self.feature_type
475    }
476
477    /// Get mutable reference on start of feature.
478    pub fn start_mut(&mut self) -> &mut u64 {
479        &mut self.start
480    }
481
482    /// Get mutable reference on end of feature.
483    pub fn end_mut(&mut self) -> &mut u64 {
484        &mut self.end
485    }
486
487    /// Get mutable reference on score of feature.
488    pub fn score_mut(&mut self) -> &mut String {
489        &mut self.score
490    }
491
492    /// Get mutable reference on strand of feature.
493    pub fn strand_mut(&mut self) -> &mut String {
494        &mut self.strand
495    }
496
497    /// Get mutable reference on phase of feature.
498    pub fn phase_mut(&mut self) -> &mut Phase {
499        &mut self.phase
500    }
501
502    /// Get mutable reference on attributes of feature.
503    pub fn attributes_mut(&mut self) -> &mut MultiMap<String, String> {
504        &mut self.attributes
505    }
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511    use bio_types::strand::Strand;
512    use multimap::MultiMap;
513
514    const GFF_FILE: &[u8] = b"P0A7B8\tUniProtKB\tInitiator methionine\t1\t1\t.\t.\t.\t\
515Note=Removed,Obsolete;ID=test
516P0A7B8\tUniProtKB\tChain\t2\t176\t50\t+\t.\tNote=ATP-dependent protease subunit HslV;\
517ID=PRO_0000148105";
518    const GFF_FILE_WITH_COMMENT: &[u8] = b"#comment
519P0A7B8\tUniProtKB\tInitiator methionine\t1\t1\t.\t.\t.\t\
520Note=Removed,Obsolete;ID=test
521#comment
522P0A7B8\tUniProtKB\tChain\t2\t176\t50\t+\t.\tNote=ATP-dependent protease subunit HslV;\
523ID=PRO_0000148105";
524    //required because MultiMap iter on element randomly
525    const GFF_FILE_ONE_ATTRIB: &[u8] =
526        b"P0A7B8\tUniProtKB\tInitiator methionine\t1\t1\t.\t.\t.\tNote=Removed
527P0A7B8\tUniProtKB\tChain\t2\t176\t50\t+\t.\tID=PRO_0000148105
528";
529
530    const GTF_FILE: &[u8] =
531        b"P0A7B8\tUniProtKB\tInitiator methionine\t1\t1\t.\t.\t.\tNote Removed;ID test
532P0A7B8\tUniProtKB\tChain\t2\t176\t50\t+\t.\tNote ATP-dependent;ID PRO_0000148105
533";
534
535    // Another variant of GTF file, modified from a published GENCODE GTF file.
536    const GTF_FILE_2: &[u8] = b"chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\t\
537gene_id \"ENSG00000223972.5\"; gene_type \"transcribed_unprocessed_pseudogene\";
538chr1\tHAVANA\ttranscript\t11869\t14409\t.\t+\t.\tgene_id \"ENSG00000223972.5\";\
539transcript_id \"ENST00000456328.2\"; gene_type \"transcribed_unprocessed_pseudogene\"";
540
541    // GTF file with duplicate attribute keys, taken from a published GENCODE GTF file.
542    const GTF_FILE_DUP_ATTR_KEYS: &[u8] = b"chr1\tENSEMBL\ttranscript\t182393\t\
543184158\t.\t+\t.\tgene_id \"ENSG00000279928.1\"; transcript_id \"ENST00000624431.1\";\
544gene_type \"protein_coding\"; gene_status \"KNOWN\"; gene_name \"FO538757.2\";\
545transcript_type \"protein_coding\"; transcript_status \"KNOWN\";\
546transcript_name \"FO538757.2-201\"; level 3; protein_id \"ENSP00000485457.1\";\
547transcript_support_level \"1\"; tag \"basic\"; tag \"appris_principal_1\";";
548
549    //required because MultiMap iter on element randomly
550    const GTF_FILE_ONE_ATTRIB: &[u8] =
551        b"P0A7B8\tUniProtKB\tInitiator methionine\t1\t1\t.\t.\t.\tNote Removed
552P0A7B8\tUniProtKB\tChain\t2\t176\t50\t+\t.\tID PRO_0000148105
553";
554
555    #[test]
556    fn test_reader_gff3() {
557        let seqname = ["P0A7B8", "P0A7B8"];
558        let source = ["UniProtKB", "UniProtKB"];
559        let feature_type = ["Initiator methionine", "Chain"];
560        let starts = [1, 2];
561        let ends = [1, 176];
562        let scores = [None, Some(50)];
563        let strand = [None, Some(Strand::Forward)];
564        let phase = [Phase(None), Phase(None)];
565        let mut attributes = [MultiMap::new(), MultiMap::new()];
566        attributes[0].insert("ID".to_owned(), "test".to_owned());
567        attributes[0].insert("Note".to_owned(), "Removed".to_owned());
568        attributes[0].insert("Note".to_owned(), "Obsolete".to_owned());
569        attributes[1].insert("ID".to_owned(), "PRO_0000148105".to_owned());
570        attributes[1].insert(
571            "Note".to_owned(),
572            "ATP-dependent protease subunit HslV".to_owned(),
573        );
574
575        let mut reader = Reader::new(GFF_FILE, GffType::GFF3);
576        for (i, r) in reader.records().enumerate() {
577            let record = r.unwrap();
578            assert_eq!(record.seqname(), seqname[i]);
579            assert_eq!(record.source(), source[i]);
580            assert_eq!(record.feature_type(), feature_type[i]);
581            assert_eq!(*record.start(), starts[i]);
582            assert_eq!(*record.end(), ends[i]);
583            assert_eq!(record.score(), scores[i]);
584            assert_eq!(record.strand(), strand[i]);
585            assert_eq!(*record.phase(), phase[i]);
586            assert_eq!(record.attributes(), &attributes[i]);
587        }
588
589        let mut reader = Reader::new(GFF_FILE_WITH_COMMENT, GffType::GFF3);
590        for (i, r) in reader.records().enumerate() {
591            let record = r.unwrap();
592            assert_eq!(record.seqname(), seqname[i]);
593            assert_eq!(record.source(), source[i]);
594            assert_eq!(record.feature_type(), feature_type[i]);
595            assert_eq!(*record.start(), starts[i]);
596            assert_eq!(*record.end(), ends[i]);
597            assert_eq!(record.score(), scores[i]);
598            assert_eq!(record.strand(), strand[i]);
599            assert_eq!(*record.phase(), phase[i]);
600            assert_eq!(record.attributes(), &attributes[i]);
601        }
602    }
603
604    #[test]
605    fn test_reader_from_file_path_doesnt_exist_returns_err() {
606        let path = Path::new("/I/dont/exist.gff");
607        let error = Reader::from_file(path, GffType::GFF3)
608            .unwrap_err()
609            .downcast::<String>()
610            .unwrap();
611
612        assert_eq!(&error, "Failed to read GFF from \"/I/dont/exist.gff\"")
613    }
614
615    #[test]
616    fn test_gff_type_from_str() {
617        let gff3 = GffType::from_str("gff3").expect("Error parsing");
618        assert_eq!(gff3, GffType::GFF3);
619
620        let gff2 = GffType::from_str("gff2").expect("Error parsing");
621        assert_eq!(gff2, GffType::GFF2);
622
623        let gtf2 = GffType::from_str("gtf2").expect("Error parsing");
624        assert_eq!(gtf2, GffType::GTF2);
625
626        let unk = GffType::from_str("unknown").unwrap_err();
627        assert_eq!(
628            unk,
629            "String 'unknown' is not a valid GFFType (GFF/GTF format version)."
630        )
631    }
632
633    #[test]
634    fn test_reader_gtf2() {
635        let seqname = ["P0A7B8", "P0A7B8"];
636        let source = ["UniProtKB", "UniProtKB"];
637        let feature_type = ["Initiator methionine", "Chain"];
638        let starts = [1, 2];
639        let ends = [1, 176];
640        let scores = [None, Some(50)];
641        let strand = [None, Some(Strand::Forward)];
642        let phase = [Phase(None), Phase(None)];
643        let mut attributes = [MultiMap::new(), MultiMap::new()];
644        attributes[0].insert("ID".to_owned(), "test".to_owned());
645        attributes[0].insert("Note".to_owned(), "Removed".to_owned());
646        attributes[1].insert("ID".to_owned(), "PRO_0000148105".to_owned());
647        attributes[1].insert("Note".to_owned(), "ATP-dependent".to_owned());
648
649        let mut reader = Reader::new(GTF_FILE, GffType::GTF2);
650        for (i, r) in reader.records().enumerate() {
651            let record = r.unwrap();
652            assert_eq!(record.seqname(), seqname[i]);
653            assert_eq!(record.source(), source[i]);
654            assert_eq!(record.feature_type(), feature_type[i]);
655            assert_eq!(*record.start(), starts[i]);
656            assert_eq!(*record.end(), ends[i]);
657            assert_eq!(record.score(), scores[i]);
658            assert_eq!(record.strand(), strand[i]);
659            assert_eq!(*record.phase(), phase[i]);
660            assert_eq!(record.attributes(), &attributes[i]);
661        }
662    }
663
664    #[test]
665    fn test_reader_gtf2_2() {
666        let seqname = ["chr1", "chr1"];
667        let source = ["HAVANA", "HAVANA"];
668        let feature_type = ["gene", "transcript"];
669        let starts = [11869, 11869];
670        let ends = [14409, 14409];
671        let scores = [None, None];
672        let strand = [Some(Strand::Forward), Some(Strand::Forward)];
673        let phase = [Phase(None), Phase(None)];
674        let mut attributes = [MultiMap::new(), MultiMap::new()];
675        attributes[0].insert("gene_id".to_owned(), "ENSG00000223972.5".to_owned());
676        attributes[0].insert(
677            "gene_type".to_owned(),
678            "transcribed_unprocessed_pseudogene".to_owned(),
679        );
680        attributes[1].insert("gene_id".to_owned(), "ENSG00000223972.5".to_owned());
681        attributes[1].insert("transcript_id".to_owned(), "ENST00000456328.2".to_owned());
682        attributes[1].insert(
683            "gene_type".to_owned(),
684            "transcribed_unprocessed_pseudogene".to_owned(),
685        );
686
687        let mut reader = Reader::new(GTF_FILE_2, GffType::GTF2);
688        for (i, r) in reader.records().enumerate() {
689            let record = r.unwrap();
690            assert_eq!(record.seqname(), seqname[i]);
691            assert_eq!(record.source(), source[i]);
692            assert_eq!(record.feature_type(), feature_type[i]);
693            assert_eq!(*record.start(), starts[i]);
694            assert_eq!(*record.end(), ends[i]);
695            assert_eq!(record.score(), scores[i]);
696            assert_eq!(record.strand(), strand[i]);
697            assert_eq!(*record.phase(), phase[i]);
698            assert_eq!(record.attributes(), &attributes[i]);
699        }
700    }
701
702    #[test]
703    fn test_reader_gtf2_dup_attr_keys() {
704        let mut reader = Reader::new(GTF_FILE_DUP_ATTR_KEYS, GffType::GTF2);
705        let mut records = reader.records().collect::<Vec<_>>();
706        assert_eq!(records.len(), 1);
707        let record = records.pop().unwrap().expect("expected one record");
708        assert_eq!(record.attributes.get("tag"), Some(&"basic".to_owned()));
709        assert_eq!(
710            record.attributes.get_vec("tag"),
711            Some(&vec!["basic".to_owned(), "appris_principal_1".to_owned()])
712        );
713    }
714
715    #[test]
716    fn test_writer_gff3() {
717        let mut reader = Reader::new(GFF_FILE_ONE_ATTRIB, GffType::GFF3);
718        let mut writer = Writer::new(vec![], GffType::GFF3);
719        for r in reader.records() {
720            writer
721                .write(&r.expect("Error reading record"))
722                .expect("Error writing record");
723        }
724        assert_eq!(writer.inner.into_inner().unwrap(), GFF_FILE_ONE_ATTRIB)
725    }
726
727    #[test]
728    fn test_writer_gtf2() {
729        let mut reader = Reader::new(GTF_FILE_ONE_ATTRIB, GffType::GTF2);
730        let mut writer = Writer::new(vec![], GffType::GTF2);
731        for r in reader.records() {
732            writer
733                .write(&r.expect("Error reading record"))
734                .expect("Error writing record");
735        }
736        assert_eq!(writer.inner.into_inner().unwrap(), GTF_FILE_ONE_ATTRIB)
737    }
738
739    #[test]
740    fn test_convert_gtf2_to_gff3() {
741        let mut reader = Reader::new(GTF_FILE_ONE_ATTRIB, GffType::GTF2);
742        let mut writer = Writer::new(vec![], GffType::GFF3);
743        for r in reader.records() {
744            writer
745                .write(&r.expect("Error reading record"))
746                .expect("Error writing record");
747        }
748        assert_eq!(writer.inner.into_inner().unwrap(), GFF_FILE_ONE_ATTRIB)
749    }
750
751    #[test]
752    fn test_unknown_gff_type() {
753        assert_eq!(
754            GffType::from_str("xtf9"),
755            Err("String 'xtf9' is not a valid GFFType (GFF/GTF format version).".to_string())
756        )
757    }
758
759    #[test]
760    fn test_from_u8_creates_phase_with_value() {
761        let phase = Phase::from(1);
762        assert_eq!(phase, Phase(Some(1)));
763    }
764
765    #[test]
766    fn test_try_into_u8_returns_value_for_phase_with_value() {
767        let phase = Phase(Some(2));
768        let result: Result<u8, ()> = phase.try_into();
769        assert_eq!(result, Ok(2));
770    }
771
772    #[test]
773    fn test_try_into_u8_returns_error_for_phase_with_none() {
774        let phase = Phase(None);
775        let result: Result<u8, ()> = phase.try_into();
776        assert_eq!(result, Err(()));
777    }
778}