sfs_core/input/site/
reader.rs

1//! Site reader.
2
3use std::io;
4
5pub mod builder;
6pub use builder::Builder;
7
8use crate::{
9    input::{genotype, sample, ReadStatus, Sample},
10    spectrum::{project::PartialProjection, Count},
11    Scs,
12};
13
14use super::Site;
15
16/// A site reader.
17pub struct Reader {
18    reader: Box<dyn genotype::Reader>,
19    sample_map: sample::Map,
20    counts: Count,
21    totals: Count,
22    projection: Option<PartialProjection>,
23    skipped_samples: Vec<(sample::Id, genotype::Skipped)>,
24}
25
26impl Reader {
27    /// Returns a spectrum filled with zeros corresponding to the shape defined by the reader
28    /// configuration.
29    pub fn create_zero_scs(&self) -> Scs {
30        let shape = self
31            .projection
32            .clone()
33            .map(|projection| projection.project_to().clone().into_shape())
34            .unwrap_or_else(|| self.sample_map.shape());
35
36        Scs::from_zeros(shape)
37    }
38
39    /// Returns the current contig of the reader.
40    pub fn current_contig(&self) -> &str {
41        self.reader.current_contig()
42    }
43
44    /// Returns the current position of the reader within its current contig.
45    pub fn current_position(&self) -> usize {
46        self.reader.current_position()
47    }
48
49    /// Returns an iterator over the currently skipped genotypes in the reader, with their
50    /// associated samples.
51    pub fn current_skipped_samples(&self) -> impl Iterator<Item = (&Sample, &genotype::Skipped)> {
52        self.skipped_samples
53            .iter()
54            .map(|(i, s)| (self.sample_map.get_sample(*i).unwrap(), s))
55    }
56
57    fn new_unchecked(
58        reader: Box<dyn genotype::Reader>,
59        sample_map: sample::Map,
60        projection: Option<PartialProjection>,
61    ) -> Self {
62        let dimensions = sample_map.number_of_populations();
63
64        Self {
65            reader,
66            sample_map,
67            projection,
68            counts: Count::from_zeros(dimensions),
69            totals: Count::from_zeros(dimensions),
70            skipped_samples: Vec::new(),
71        }
72    }
73
74    /// Reads the next site in the reader.
75    pub fn read_site(&mut self) -> ReadStatus<Site<'_>> {
76        self.reset();
77
78        let genotypes = match self.reader.read_genotypes() {
79            ReadStatus::Read(genotypes) => genotypes,
80            ReadStatus::Error(e) => return ReadStatus::Error(e),
81            ReadStatus::Done => return ReadStatus::Done,
82        };
83
84        for (sample, genotype) in self.reader.samples().iter().zip(genotypes) {
85            let Some(population_id) = self.sample_map.get_population_id(sample).map(usize::from)
86            else {
87                continue;
88            };
89
90            match genotype {
91                genotype::Result::Genotype(genotype) => {
92                    self.counts[population_id] += genotype as u8 as usize;
93                    self.totals[population_id] += 2;
94                }
95                genotype::Result::Skipped(skip) => {
96                    self.skipped_samples
97                        .push((self.sample_map.get_sample_id(sample).unwrap(), skip));
98                }
99                genotype::Result::Error(e) => {
100                    return ReadStatus::Error(io::Error::new(io::ErrorKind::InvalidData, e));
101                }
102            }
103        }
104
105        let site = if let Some(projection) = self.projection.as_mut() {
106            let (exact, projectable) = self.totals.iter().zip(projection.project_to().iter()).fold(
107                (true, true),
108                |(exact, projectable), (&total, &to)| {
109                    (exact && total == to, projectable && total >= to)
110                },
111            );
112
113            if exact {
114                Site::Standard(&self.counts)
115            } else if projectable {
116                Site::Projected(projection.project_unchecked(&self.totals, &self.counts))
117            } else {
118                Site::InsufficientData
119            }
120        } else if self.skipped_samples.is_empty() {
121            Site::Standard(&self.counts)
122        } else {
123            Site::InsufficientData
124        };
125
126        ReadStatus::Read(site)
127    }
128
129    fn reset(&mut self) {
130        self.counts.set_zero();
131        self.totals.set_zero();
132        self.skipped_samples.clear();
133    }
134
135    /// Returns the samples defined by the reader.
136    pub fn samples(&self) -> &[Sample] {
137        self.reader.samples()
138    }
139}