sfs_core/input/genotype/reader/
vcf.rs1use std::io;
2
3use noodles_vcf as vcf;
4use vcf::record::{
5 genotypes::sample::value::genotype::Genotype as VcfGenotype, Record as VcfRecord,
6};
7
8use crate::input::{
9 genotype::{self, Genotype},
10 ReadStatus, Sample,
11};
12
13pub struct Reader<R> {
14 pub inner: vcf::Reader<R>,
15 pub header: vcf::Header,
16 pub samples: Vec<Sample>,
17 pub buf: VcfRecord,
18}
19
20impl<R> Reader<R>
21where
22 R: io::BufRead,
23{
24 pub fn new(inner: R) -> io::Result<Self> {
25 let mut inner = vcf::Reader::new(inner);
26
27 let header = inner.read_header()?;
28 let samples = header
29 .sample_names()
30 .iter()
31 .cloned()
32 .map(Sample::from)
33 .collect();
34
35 Ok(Self {
36 inner,
37 header,
38 samples,
39 buf: VcfRecord::default(),
40 })
41 }
42
43 fn read_genotypes(&mut self) -> ReadStatus<Vec<Option<VcfGenotype>>> {
44 match self.inner.read_record(&self.header, &mut self.buf) {
45 Ok(0) => ReadStatus::Done,
46 Ok(_) => {
47 let result = self
48 .buf
49 .genotypes()
50 .genotypes()
51 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e));
52
53 match result {
54 Ok(genotypes) => ReadStatus::Read(genotypes),
55 Err(e) => ReadStatus::Error(e),
56 }
57 }
58 Err(e) => ReadStatus::Error(e),
59 }
60 }
61}
62
63impl<R> super::Reader for Reader<R>
64where
65 R: io::BufRead,
66{
67 fn current_contig(&self) -> &str {
68 match self.buf.chromosome() {
69 vcf::record::Chromosome::Name(s) | vcf::record::Chromosome::Symbol(s) => s,
70 }
71 }
72
73 fn current_position(&self) -> usize {
74 self.buf.position().into()
75 }
76
77 fn read_genotypes(&mut self) -> ReadStatus<Vec<genotype::Result>> {
78 self.read_genotypes().map(|vcf_genotypes| {
79 vcf_genotypes
80 .into_iter()
81 .map(genotype::Result::from)
82 .collect()
83 })
84 }
85
86 fn samples(&self) -> &[Sample] {
87 &self.samples
88 }
89}
90
91impl From<Option<VcfGenotype>> for genotype::Result {
92 fn from(genotype: Option<VcfGenotype>) -> Self {
93 match genotype {
94 Some(genotype) => match &genotype[..] {
95 [a, b] => match (a.position(), b.position()) {
96 (Some(a), Some(b)) => match Genotype::try_from_raw(a + b) {
97 Some(genotype) => genotype::Result::Genotype(genotype),
98 None => genotype::Result::Skipped(genotype::Skipped::Multiallelic),
99 },
100 _ => genotype::Result::Skipped(genotype::Skipped::Missing),
101 },
102 _ => genotype::Result::Error(genotype::Error::PloidyError),
103 },
104 None => genotype::Result::Skipped(genotype::Skipped::Missing),
105 }
106 }
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 use std::str::FromStr;
114
115 #[test]
116 fn test_genotype_from_vcf_genotype() -> Result<(), Box<dyn std::error::Error>> {
117 assert_eq!(
118 genotype::Result::from(Some(VcfGenotype::from_str("0/0")?)),
119 genotype::Result::Genotype(Genotype::Zero)
120 );
121 assert_eq!(
122 genotype::Result::from(Some(VcfGenotype::from_str("0/1")?)),
123 genotype::Result::Genotype(Genotype::One)
124 );
125 assert_eq!(
126 genotype::Result::from(Some(VcfGenotype::from_str("1/1")?)),
127 genotype::Result::Genotype(Genotype::Two)
128 );
129
130 assert_eq!(
131 genotype::Result::from(Some(VcfGenotype::from_str("0|1")?)),
132 genotype::Result::Genotype(Genotype::One)
133 );
134 assert_eq!(
135 genotype::Result::from(Some(VcfGenotype::from_str("1|0")?)),
136 genotype::Result::Genotype(Genotype::One)
137 );
138
139 Ok(())
140 }
141
142 #[test]
143 fn test_genotype_from_vcf_genotype_missing_genotype() -> Result<(), Box<dyn std::error::Error>>
144 {
145 assert_eq!(
146 genotype::Result::from(Some(VcfGenotype::from_str("./.")?)),
147 genotype::Result::Skipped(genotype::Skipped::Missing),
148 );
149
150 Ok(())
151 }
152
153 #[test]
154 fn test_genotype_from_vcf_genotype_missing_allele() -> Result<(), Box<dyn std::error::Error>> {
155 assert_eq!(
156 genotype::Result::from(Some(VcfGenotype::from_str("./0")?)),
157 genotype::Result::Skipped(genotype::Skipped::Missing),
158 );
159
160 assert_eq!(
161 genotype::Result::from(Some(VcfGenotype::from_str("1|.")?)),
162 genotype::Result::Skipped(genotype::Skipped::Missing),
163 );
164
165 Ok(())
166 }
167
168 #[test]
169 fn test_genotype_from_vcf_genotype_multiallelic() -> Result<(), Box<dyn std::error::Error>> {
170 assert_eq!(
171 genotype::Result::from(Some(VcfGenotype::from_str("1/2")?)),
172 genotype::Result::Skipped(genotype::Skipped::Multiallelic),
173 );
174
175 Ok(())
176 }
177
178 #[test]
179 fn test_genotype_from_vcf_genotype_not_diploid() -> Result<(), Box<dyn std::error::Error>> {
180 assert_eq!(
181 genotype::Result::from(Some(VcfGenotype::from_str("0")?)),
182 genotype::Result::Error(genotype::Error::PloidyError),
183 );
184
185 assert_eq!(
186 genotype::Result::from(Some(VcfGenotype::from_str("0/0/0")?)),
187 genotype::Result::Error(genotype::Error::PloidyError),
188 );
189
190 Ok(())
191 }
192}