1mod parser;
2
3use crate::{U8Vec, VCFError, VCFHeader};
4pub use parser::parse_record;
5use std::collections::HashMap;
6use std::io::{self, Write};
7use std::usize;
8
9pub const NOT_FOUND: usize = usize::MAX;
10
11#[derive(Debug, Clone, PartialEq)]
12pub struct VCFRecord {
13 header: VCFHeader,
14 pub chromosome: U8Vec,
15 pub position: u64,
16 pub id: Vec<U8Vec>,
17 pub reference: U8Vec,
18 pub alternative: Vec<U8Vec>,
19 pub qual: Option<f64>,
20 pub filter: Vec<U8Vec>,
21 pub info: Vec<(U8Vec, Vec<U8Vec>)>,
22 info_index: HashMap<U8Vec, usize>,
23 pub format: Vec<U8Vec>,
24 format_index: HashMap<U8Vec, usize>,
25 pub genotype: Vec<Vec<Vec<U8Vec>>>,
26}
27
28impl VCFRecord {
29 pub fn new(header: VCFHeader) -> Self {
30 VCFRecord {
31 header,
32 chromosome: vec![],
33 position: 0,
34 id: vec![],
35 reference: vec![],
36 alternative: vec![],
37 qual: None,
38 filter: vec![],
39 info: vec![],
40 info_index: HashMap::new(),
41 format: vec![],
42 format_index: HashMap::new(),
43 genotype: vec![],
44 }
45 }
46
47 pub fn from_bytes(line: &[u8], line_num: u64, header: VCFHeader) -> Result<Self, VCFError> {
48 let mut record = VCFRecord::new(header);
49 record.parse_bytes(line, line_num)?;
50 Ok(record)
51 }
52
53 pub fn parse_bytes(&mut self, line: &[u8], line_num: u64) -> Result<(), VCFError> {
54 parse_record::<nom::error::VerboseError<_>>(line, self)
55 .map_err(|_| -> VCFError { VCFError::RecordParseError(line_num).into() })?;
56 Ok(())
57 }
58
59 pub fn header(&self) -> &VCFHeader {
60 &self.header
61 }
62
63 pub fn info(&self, key: &[u8]) -> Option<&Vec<U8Vec>> {
64 self.info_index
65 .get(key)
66 .map(|x| self.info.get(*x).map(|y| &y.1))
67 .flatten()
68 }
69
70 pub fn info_mut(&mut self, key: &[u8]) -> Option<&mut Vec<U8Vec>> {
71 self.info_index
72 .get(key)
73 .cloned()
74 .map(move |x| self.info.get_mut(x).map(|y| &mut y.1))
75 .flatten()
76 }
77
78 pub fn insert_info(&mut self, key: &[u8], mut values: Vec<U8Vec>) -> Option<Vec<U8Vec>> {
79 if let Some(x) = self.info_mut(key) {
80 let mut ret = Vec::new();
81 ret.append(x);
82 x.append(&mut values);
83 Some(ret)
84 } else {
85 self.info.push((key.to_vec(), values));
86 self.info_index.insert(key.to_vec(), self.info.len() - 1);
87 None
88 }
89 }
90
91 pub fn genotype(&self, sample_name: &[u8], key: &[u8]) -> Option<&Vec<U8Vec>> {
92 self.header
93 .sample_index(sample_name)
94 .map(|x| {
95 self.format_index
96 .get(key)
97 .map(|y| self.genotype.get(x).map(|z| z.get(*y)))
98 })
99 .flatten()
100 .flatten()
101 .flatten()
102 }
103
104 pub fn genotype_mut(&mut self, sample_name: &[u8], key: &[u8]) -> Option<&mut Vec<U8Vec>> {
105 self.header
106 .sample_index(sample_name)
107 .map(move |x| {
108 self.format_index
109 .get(key)
110 .cloned()
111 .map(move |y| self.genotype.get_mut(x).map(|z| z.get_mut(y)))
112 })
113 .flatten()
114 .flatten()
115 .flatten()
116 }
117
118 pub fn insert_genotype(
119 &mut self,
120 sample_name: &[u8],
121 key: &[u8],
122 mut values: Vec<U8Vec>,
123 ) -> Option<Vec<U8Vec>> {
124 if let Some(sample_index) = self.header.sample_index(sample_name) {
125 if let Some(x) = self.genotype_mut(sample_name, key) {
126 let mut ret = Vec::new();
127 ret.append(x);
128 x.append(&mut values);
129 Some(ret)
130 } else {
131 if let Some(format_index) = self.format_index.get(key) {
132 while self.genotype[sample_index].len() < *format_index {
133 self.genotype[sample_index].push(vec![b".".to_vec()]);
134 }
135 } else {
136 self.format.push(key.to_vec());
137 self.format_index
138 .insert(key.to_vec(), self.format.len() - 1);
139 while self.genotype[sample_index].len() < self.format.len() - 1 {
140 self.genotype[sample_index].push(vec![b".".to_vec()]);
141 }
142 }
143 self.genotype[sample_index].push(values);
144 None
145 }
146 } else {
147 None
148 }
149 }
150
151 pub fn recreate_info_and_genotype_index(&mut self) {
154 for v in self.info_index.values_mut() {
156 *v = NOT_FOUND;
157 }
158 for (i, k) in self.info.iter().enumerate() {
159 if let Some(x) = self.info_index.get_mut(&k.0) {
160 *x = i;
161 } else {
162 self.info_index.insert(k.0.to_vec(), i);
163 }
164 }
165
166 for v in self.format_index.values_mut() {
168 *v = NOT_FOUND;
169 }
170 for (i, k) in self.format.iter().enumerate() {
171 if let Some(x) = self.format_index.get_mut(k) {
172 *x = i;
173 } else {
174 self.format_index.insert(k.to_vec(), i);
175 }
176 }
177 }
178}
179
180fn write_array(writer: &mut impl Write, array: &[Vec<u8>], delimiter: &[u8]) -> io::Result<()> {
181 if array.is_empty() {
182 writer.write_all(b".")?;
183 } else {
184 for (i, one) in array.iter().enumerate() {
185 if i != 0 {
186 writer.write_all(delimiter)?;
187 }
188 writer.write_all(one)?;
189 }
190 }
191
192 Ok(())
193}
194
195fn write_info(writer: &mut impl Write, info: &[(U8Vec, Vec<U8Vec>)]) -> io::Result<()> {
196 if info.is_empty() {
197 writer.write_all(b".")?;
198 } else {
199 for (i, (k, v)) in info.iter().enumerate() {
200 if i != 0 {
201 writer.write_all(b";")?;
202 }
203 writer.write_all(k)?;
204 if !v.is_empty() {
205 writer.write_all(b"=")?;
206 for (j, x) in v.iter().enumerate() {
207 if j != 0 {
208 writer.write_all(b",")?;
209 }
210 writer.write_all(x)?;
211 }
212 }
213 }
214 }
215
216 Ok(())
217}
218
219impl VCFRecord {
220 pub fn write_record<W: Write>(&self, mut writer: W) -> io::Result<()> {
221 writer.write_all(&self.chromosome)?;
222 writer.write_all(b"\t")?;
223 write!(writer, "{}\t", self.position)?;
224 write_array(&mut writer, &self.id, b",")?;
225 writer.write_all(b"\t")?;
226 writer.write_all(&self.reference)?;
227 writer.write_all(b"\t")?;
228 write_array(&mut writer, &self.alternative, b",")?;
229 writer.write_all(b"\t")?;
230 if let Some(qual) = self.qual.as_ref() {
231 if (qual.round() - *qual).abs() < 0.000_000_01 {
232 write!(writer, "{:.1}", qual)?;
233 } else {
234 write!(writer, "{}", qual)?;
235 }
236 } else {
237 writer.write_all(b".")?;
238 }
239 writer.write_all(b"\t")?;
240 write_array(&mut writer, &self.filter, b",")?;
241 writer.write_all(b"\t")?;
242 write_info(&mut writer, &self.info)?;
243 if !self.format.is_empty() {
244 writer.write_all(b"\t")?;
245 write_array(&mut writer, &self.format, b":")?;
246 for one_genotype in self.genotype.iter() {
247 writer.write_all(b"\t")?;
248 for (i, v) in one_genotype.iter().enumerate() {
249 if i != 0 {
250 writer.write_all(b":")?;
251 }
252 write_array(&mut writer, v, b",")?;
253 }
254 }
255 }
256
257 writer.write_all(b"\n")?;
258 Ok(())
259 }
260}
261
262#[cfg(test)]
263mod test;