sfs_core/input/genotype/reader/
vcf.rs

1use std::io;
2
3use noodles_vcf as vcf;
4use vcf::record::{
5    genotypes::sample::value::genotype::Genotype as VcfGenotype, Record as VcfRecord,
6};
7
8use crate::input::{
9    genotype::{self, Genotype},
10    ReadStatus, Sample,
11};
12
13pub struct Reader<R> {
14    pub inner: vcf::Reader<R>,
15    pub header: vcf::Header,
16    pub samples: Vec<Sample>,
17    pub buf: VcfRecord,
18}
19
20impl<R> Reader<R>
21where
22    R: io::BufRead,
23{
24    pub fn new(inner: R) -> io::Result<Self> {
25        let mut inner = vcf::Reader::new(inner);
26
27        let header = inner.read_header()?;
28        let samples = header
29            .sample_names()
30            .iter()
31            .cloned()
32            .map(Sample::from)
33            .collect();
34
35        Ok(Self {
36            inner,
37            header,
38            samples,
39            buf: VcfRecord::default(),
40        })
41    }
42
43    fn read_genotypes(&mut self) -> ReadStatus<Vec<Option<VcfGenotype>>> {
44        match self.inner.read_record(&self.header, &mut self.buf) {
45            Ok(0) => ReadStatus::Done,
46            Ok(_) => {
47                let result = self
48                    .buf
49                    .genotypes()
50                    .genotypes()
51                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e));
52
53                match result {
54                    Ok(genotypes) => ReadStatus::Read(genotypes),
55                    Err(e) => ReadStatus::Error(e),
56                }
57            }
58            Err(e) => ReadStatus::Error(e),
59        }
60    }
61}
62
63impl<R> super::Reader for Reader<R>
64where
65    R: io::BufRead,
66{
67    fn current_contig(&self) -> &str {
68        match self.buf.chromosome() {
69            vcf::record::Chromosome::Name(s) | vcf::record::Chromosome::Symbol(s) => s,
70        }
71    }
72
73    fn current_position(&self) -> usize {
74        self.buf.position().into()
75    }
76
77    fn read_genotypes(&mut self) -> ReadStatus<Vec<genotype::Result>> {
78        self.read_genotypes().map(|vcf_genotypes| {
79            vcf_genotypes
80                .into_iter()
81                .map(genotype::Result::from)
82                .collect()
83        })
84    }
85
86    fn samples(&self) -> &[Sample] {
87        &self.samples
88    }
89}
90
91impl From<Option<VcfGenotype>> for genotype::Result {
92    fn from(genotype: Option<VcfGenotype>) -> Self {
93        match genotype {
94            Some(genotype) => match &genotype[..] {
95                [a, b] => match (a.position(), b.position()) {
96                    (Some(a), Some(b)) => match Genotype::try_from_raw(a + b) {
97                        Some(genotype) => genotype::Result::Genotype(genotype),
98                        None => genotype::Result::Skipped(genotype::Skipped::Multiallelic),
99                    },
100                    _ => genotype::Result::Skipped(genotype::Skipped::Missing),
101                },
102                _ => genotype::Result::Error(genotype::Error::PloidyError),
103            },
104            None => genotype::Result::Skipped(genotype::Skipped::Missing),
105        }
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    use std::str::FromStr;
114
115    #[test]
116    fn test_genotype_from_vcf_genotype() -> Result<(), Box<dyn std::error::Error>> {
117        assert_eq!(
118            genotype::Result::from(Some(VcfGenotype::from_str("0/0")?)),
119            genotype::Result::Genotype(Genotype::Zero)
120        );
121        assert_eq!(
122            genotype::Result::from(Some(VcfGenotype::from_str("0/1")?)),
123            genotype::Result::Genotype(Genotype::One)
124        );
125        assert_eq!(
126            genotype::Result::from(Some(VcfGenotype::from_str("1/1")?)),
127            genotype::Result::Genotype(Genotype::Two)
128        );
129
130        assert_eq!(
131            genotype::Result::from(Some(VcfGenotype::from_str("0|1")?)),
132            genotype::Result::Genotype(Genotype::One)
133        );
134        assert_eq!(
135            genotype::Result::from(Some(VcfGenotype::from_str("1|0")?)),
136            genotype::Result::Genotype(Genotype::One)
137        );
138
139        Ok(())
140    }
141
142    #[test]
143    fn test_genotype_from_vcf_genotype_missing_genotype() -> Result<(), Box<dyn std::error::Error>>
144    {
145        assert_eq!(
146            genotype::Result::from(Some(VcfGenotype::from_str("./.")?)),
147            genotype::Result::Skipped(genotype::Skipped::Missing),
148        );
149
150        Ok(())
151    }
152
153    #[test]
154    fn test_genotype_from_vcf_genotype_missing_allele() -> Result<(), Box<dyn std::error::Error>> {
155        assert_eq!(
156            genotype::Result::from(Some(VcfGenotype::from_str("./0")?)),
157            genotype::Result::Skipped(genotype::Skipped::Missing),
158        );
159
160        assert_eq!(
161            genotype::Result::from(Some(VcfGenotype::from_str("1|.")?)),
162            genotype::Result::Skipped(genotype::Skipped::Missing),
163        );
164
165        Ok(())
166    }
167
168    #[test]
169    fn test_genotype_from_vcf_genotype_multiallelic() -> Result<(), Box<dyn std::error::Error>> {
170        assert_eq!(
171            genotype::Result::from(Some(VcfGenotype::from_str("1/2")?)),
172            genotype::Result::Skipped(genotype::Skipped::Multiallelic),
173        );
174
175        Ok(())
176    }
177
178    #[test]
179    fn test_genotype_from_vcf_genotype_not_diploid() -> Result<(), Box<dyn std::error::Error>> {
180        assert_eq!(
181            genotype::Result::from(Some(VcfGenotype::from_str("0")?)),
182            genotype::Result::Error(genotype::Error::PloidyError),
183        );
184
185        assert_eq!(
186            genotype::Result::from(Some(VcfGenotype::from_str("0/0/0")?)),
187            genotype::Result::Error(genotype::Error::PloidyError),
188        );
189
190        Ok(())
191    }
192}