1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
use super::{AttributeRef, GtfRecord};
use crate::parse::parse_attributes;
use anyhow::Result;
use bstr::ByteSlice;
#[derive(Debug, Default)]
pub struct GtfRecordRef<'a> {
pub seqname: &'a [u8],
pub source: &'a [u8],
pub feature: &'a [u8],
pub start: &'a [u8],
pub end: &'a [u8],
pub score: &'a [u8],
pub strand: &'a [u8],
pub frame: &'a [u8],
pub attribute: AttributeRef<'a>,
}
impl<'a> GtfRecordRef<'a> {
pub fn to_owned(&self) -> GtfRecord {
let seqname = self.seqname.to_owned();
let source = self.source.to_owned();
let feature = self.feature.to_owned();
let start = self.start.to_owned();
let end = self.end.to_owned();
let score = self.score.to_owned();
let strand = self.strand.to_owned();
let frame = self.frame.to_owned();
let attribute = self.attribute.to_owned();
GtfRecord {
seqname,
source,
feature,
start,
end,
score,
strand,
frame,
attribute,
}
}
pub fn from_bytes(record: &'a [u8]) -> Result<Self> {
let mut it = record.split_str("\t");
let seqname = it.next().unwrap_or_default();
let source = it.next().unwrap_or_default();
let feature = it.next().unwrap_or_default();
let start = it.next().unwrap_or_default();
let end = it.next().unwrap_or_default();
let score = it.next().unwrap_or_default();
let strand = it.next().unwrap_or_default();
let frame = it.next().unwrap_or_default();
let attribute_bytes = it.next().unwrap_or_default();
let (_, attribute) = parse_attributes(attribute_bytes).expect(" ");
let gtf_record = Self {
seqname,
source,
feature,
start,
end,
score,
strand,
frame,
attribute,
};
Ok(gtf_record)
}
}
#[cfg(test)]
mod testing {
use super::GtfRecordRef;
#[test]
fn test_gtf_ref() {
let line: &[u8] = br#"1 ensembl_havana gene 1471765 1497848 . + . gene_id "ENSG00000160072"; gene_version "20"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding";"#;
let record = GtfRecordRef::from_bytes(line).unwrap();
assert_eq!(record.seqname, b"1");
assert_eq!(record.source, b"ensembl_havana");
assert_eq!(record.feature, b"gene");
assert_eq!(record.start, b"1471765");
assert_eq!(record.end, b"1497848");
assert_eq!(record.score, b".");
assert_eq!(record.strand, b"+");
assert_eq!(record.frame, b".");
assert_eq!(record.attribute.gene_name, Some("ATAD3B".as_bytes()));
assert_eq!(record.attribute.transcript_id, None);
}
}