1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
use crate::bam::HeaderView;
use linear_map::LinearMap;
use regex::Regex;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Header {
records: Vec<Vec<u8>>,
}
impl Default for Header {
fn default() -> Self {
Self::new()
}
}
impl Header {
pub fn new() -> Self {
Header {
records: Vec::new(),
}
}
pub fn from_template(header: &HeaderView) -> Self {
let mut record = header.as_bytes().to_owned();
while let Some(&last_char) = record.last() {
if last_char == b'\n' {
record.pop();
} else {
break;
}
}
Header {
records: vec![record],
}
}
pub fn push_record(&mut self, record: &HeaderRecord<'_>) -> &mut Self {
self.records.push(record.to_bytes());
self
}
pub fn push_comment(&mut self, comment: &[u8]) -> &mut Self {
self.records.push([&b"@CO"[..], comment].join(&b'\t'));
self
}
pub fn to_bytes(&self) -> Vec<u8> {
self.records.join(&b'\n')
}
pub fn to_hashmap(&self) -> HashMap<String, Vec<LinearMap<String, String>>> {
let mut header_map = HashMap::default();
let rec_type_re = Regex::new(r"@([A-Z][A-Z])").unwrap();
let tag_re = Regex::new(r"([A-Za-z][A-Za-z0-9]):([ -~]+)").unwrap();
let header_string = String::from_utf8(self.to_bytes()).unwrap();
for line in header_string.split('\n').filter(|x| !x.is_empty()) {
let parts: Vec<_> = line.split('\t').filter(|x| !x.is_empty()).collect();
let record_type = rec_type_re
.captures(parts[0])
.unwrap()
.get(1)
.unwrap()
.as_str()
.to_owned();
let mut field = LinearMap::default();
for part in parts.iter().skip(1) {
let cap = tag_re.captures(part).unwrap();
let tag = cap.get(1).unwrap().as_str().to_owned();
let value = cap.get(2).unwrap().as_str().to_owned();
field.insert(tag, value);
}
header_map
.entry(record_type)
.or_insert_with(Vec::new)
.push(field);
}
header_map
}
}
#[derive(Debug, Clone)]
pub struct HeaderRecord<'a> {
rec_type: Vec<u8>,
tags: Vec<(&'a [u8], Vec<u8>)>,
}
impl<'a> HeaderRecord<'a> {
pub fn new(rec_type: &'a [u8]) -> Self {
HeaderRecord {
rec_type: [&b"@"[..], rec_type].concat(),
tags: Vec::new(),
}
}
pub fn push_tag<V: ToString>(&mut self, tag: &'a [u8], value: &V) -> &mut Self {
self.tags.push((tag, value.to_string().into_bytes()));
self
}
fn to_bytes(&self) -> Vec<u8> {
let mut out = Vec::new();
out.extend(self.rec_type.iter());
for &(tag, ref value) in self.tags.iter() {
out.push(b'\t');
out.extend(tag.iter());
out.push(b':');
out.extend(value.iter());
}
out
}
}