vcf/record/
mod.rs

1mod parser;
2
3use crate::{U8Vec, VCFError, VCFHeader};
4pub use parser::parse_record;
5use std::collections::HashMap;
6use std::io::{self, Write};
7use std::usize;
8
9pub const NOT_FOUND: usize = usize::MAX;
10
11#[derive(Debug, Clone, PartialEq)]
12pub struct VCFRecord {
13    header: VCFHeader,
14    pub chromosome: U8Vec,
15    pub position: u64,
16    pub id: Vec<U8Vec>,
17    pub reference: U8Vec,
18    pub alternative: Vec<U8Vec>,
19    pub qual: Option<f64>,
20    pub filter: Vec<U8Vec>,
21    pub info: Vec<(U8Vec, Vec<U8Vec>)>,
22    info_index: HashMap<U8Vec, usize>,
23    pub format: Vec<U8Vec>,
24    format_index: HashMap<U8Vec, usize>,
25    pub genotype: Vec<Vec<Vec<U8Vec>>>,
26}
27
28impl VCFRecord {
29    pub fn new(header: VCFHeader) -> Self {
30        VCFRecord {
31            header,
32            chromosome: vec![],
33            position: 0,
34            id: vec![],
35            reference: vec![],
36            alternative: vec![],
37            qual: None,
38            filter: vec![],
39            info: vec![],
40            info_index: HashMap::new(),
41            format: vec![],
42            format_index: HashMap::new(),
43            genotype: vec![],
44        }
45    }
46
47    pub fn from_bytes(line: &[u8], line_num: u64, header: VCFHeader) -> Result<Self, VCFError> {
48        let mut record = VCFRecord::new(header);
49        record.parse_bytes(line, line_num)?;
50        Ok(record)
51    }
52
53    pub fn parse_bytes(&mut self, line: &[u8], line_num: u64) -> Result<(), VCFError> {
54        parse_record::<nom::error::VerboseError<_>>(line, self)
55            .map_err(|_| -> VCFError { VCFError::RecordParseError(line_num).into() })?;
56        Ok(())
57    }
58
59    pub fn header(&self) -> &VCFHeader {
60        &self.header
61    }
62
63    pub fn info(&self, key: &[u8]) -> Option<&Vec<U8Vec>> {
64        self.info_index
65            .get(key)
66            .map(|x| self.info.get(*x).map(|y| &y.1))
67            .flatten()
68    }
69
70    pub fn info_mut(&mut self, key: &[u8]) -> Option<&mut Vec<U8Vec>> {
71        self.info_index
72            .get(key)
73            .cloned()
74            .map(move |x| self.info.get_mut(x).map(|y| &mut y.1))
75            .flatten()
76    }
77
78    pub fn insert_info(&mut self, key: &[u8], mut values: Vec<U8Vec>) -> Option<Vec<U8Vec>> {
79        if let Some(x) = self.info_mut(key) {
80            let mut ret = Vec::new();
81            ret.append(x);
82            x.append(&mut values);
83            Some(ret)
84        } else {
85            self.info.push((key.to_vec(), values));
86            self.info_index.insert(key.to_vec(), self.info.len() - 1);
87            None
88        }
89    }
90
91    pub fn genotype(&self, sample_name: &[u8], key: &[u8]) -> Option<&Vec<U8Vec>> {
92        self.header
93            .sample_index(sample_name)
94            .map(|x| {
95                self.format_index
96                    .get(key)
97                    .map(|y| self.genotype.get(x).map(|z| z.get(*y)))
98            })
99            .flatten()
100            .flatten()
101            .flatten()
102    }
103
104    pub fn genotype_mut(&mut self, sample_name: &[u8], key: &[u8]) -> Option<&mut Vec<U8Vec>> {
105        self.header
106            .sample_index(sample_name)
107            .map(move |x| {
108                self.format_index
109                    .get(key)
110                    .cloned()
111                    .map(move |y| self.genotype.get_mut(x).map(|z| z.get_mut(y)))
112            })
113            .flatten()
114            .flatten()
115            .flatten()
116    }
117
118    pub fn insert_genotype(
119        &mut self,
120        sample_name: &[u8],
121        key: &[u8],
122        mut values: Vec<U8Vec>,
123    ) -> Option<Vec<U8Vec>> {
124        if let Some(sample_index) = self.header.sample_index(sample_name) {
125            if let Some(x) = self.genotype_mut(sample_name, key) {
126                let mut ret = Vec::new();
127                ret.append(x);
128                x.append(&mut values);
129                Some(ret)
130            } else {
131                if let Some(format_index) = self.format_index.get(key) {
132                    while self.genotype[sample_index].len() < *format_index {
133                        self.genotype[sample_index].push(vec![b".".to_vec()]);
134                    }
135                } else {
136                    self.format.push(key.to_vec());
137                    self.format_index
138                        .insert(key.to_vec(), self.format.len() - 1);
139                    while self.genotype[sample_index].len() < self.format.len() - 1 {
140                        self.genotype[sample_index].push(vec![b".".to_vec()]);
141                    }
142                }
143                self.genotype[sample_index].push(values);
144                None
145            }
146        } else {
147            None
148        }
149    }
150
151    /// Recreate info and genotype index cache.
152    /// Please call this method if you modify info and format field manually.
153    pub fn recreate_info_and_genotype_index(&mut self) {
154        // create info_index
155        for v in self.info_index.values_mut() {
156            *v = NOT_FOUND;
157        }
158        for (i, k) in self.info.iter().enumerate() {
159            if let Some(x) = self.info_index.get_mut(&k.0) {
160                *x = i;
161            } else {
162                self.info_index.insert(k.0.to_vec(), i);
163            }
164        }
165
166        // create format_index
167        for v in self.format_index.values_mut() {
168            *v = NOT_FOUND;
169        }
170        for (i, k) in self.format.iter().enumerate() {
171            if let Some(x) = self.format_index.get_mut(k) {
172                *x = i;
173            } else {
174                self.format_index.insert(k.to_vec(), i);
175            }
176        }
177    }
178}
179
180fn write_array(writer: &mut impl Write, array: &[Vec<u8>], delimiter: &[u8]) -> io::Result<()> {
181    if array.is_empty() {
182        writer.write_all(b".")?;
183    } else {
184        for (i, one) in array.iter().enumerate() {
185            if i != 0 {
186                writer.write_all(delimiter)?;
187            }
188            writer.write_all(one)?;
189        }
190    }
191
192    Ok(())
193}
194
195fn write_info(writer: &mut impl Write, info: &[(U8Vec, Vec<U8Vec>)]) -> io::Result<()> {
196    if info.is_empty() {
197        writer.write_all(b".")?;
198    } else {
199        for (i, (k, v)) in info.iter().enumerate() {
200            if i != 0 {
201                writer.write_all(b";")?;
202            }
203            writer.write_all(k)?;
204            if !v.is_empty() {
205                writer.write_all(b"=")?;
206                for (j, x) in v.iter().enumerate() {
207                    if j != 0 {
208                        writer.write_all(b",")?;
209                    }
210                    writer.write_all(x)?;
211                }
212            }
213        }
214    }
215
216    Ok(())
217}
218
219impl VCFRecord {
220    pub fn write_record<W: Write>(&self, mut writer: W) -> io::Result<()> {
221        writer.write_all(&self.chromosome)?;
222        writer.write_all(b"\t")?;
223        write!(writer, "{}\t", self.position)?;
224        write_array(&mut writer, &self.id, b",")?;
225        writer.write_all(b"\t")?;
226        writer.write_all(&self.reference)?;
227        writer.write_all(b"\t")?;
228        write_array(&mut writer, &self.alternative, b",")?;
229        writer.write_all(b"\t")?;
230        if let Some(qual) = self.qual.as_ref() {
231            if (qual.round() - *qual).abs() < 0.000_000_01 {
232                write!(writer, "{:.1}", qual)?;
233            } else {
234                write!(writer, "{}", qual)?;
235            }
236        } else {
237            writer.write_all(b".")?;
238        }
239        writer.write_all(b"\t")?;
240        write_array(&mut writer, &self.filter, b",")?;
241        writer.write_all(b"\t")?;
242        write_info(&mut writer, &self.info)?;
243        if !self.format.is_empty() {
244            writer.write_all(b"\t")?;
245            write_array(&mut writer, &self.format, b":")?;
246            for one_genotype in self.genotype.iter() {
247                writer.write_all(b"\t")?;
248                for (i, v) in one_genotype.iter().enumerate() {
249                    if i != 0 {
250                        writer.write_all(b":")?;
251                    }
252                    write_array(&mut writer, v, b",")?;
253                }
254            }
255        }
256
257        writer.write_all(b"\n")?;
258        Ok(())
259    }
260}
261
262#[cfg(test)]
263mod test;