sfs_core/input/
sample.rs

1//! Input samples.
2
3use std::{collections::HashMap, fs::File, io, path::Path};
4
5use indexmap::IndexMap;
6
7use crate::array::Shape;
8
9pub mod population;
10pub use population::Population;
11
12/// A sample.
13#[derive(Clone, Debug, Eq, Hash, PartialEq)]
14pub struct Sample(String);
15
16/// A numeric id for a sample.
17#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
18pub struct Id(pub usize);
19
20impl<S> From<S> for Sample
21where
22    S: ToString,
23{
24    fn from(sample: S) -> Self {
25        Self(sample.to_string())
26    }
27}
28
29impl AsRef<str> for Sample {
30    fn as_ref(&self) -> &str {
31        &self.0
32    }
33}
34
35/// A mapping from samples to populations.
36#[derive(Clone, Debug, Default, Eq, PartialEq)]
37pub struct Map(IndexMap<Sample, population::Id>);
38
39impl Map {
40    /// Creates a new mapping by mapping all samples to the same, unnamed population.
41    pub fn from_all<I>(samples: I) -> Self
42    where
43        I: IntoIterator<Item = Sample>,
44    {
45        Self::from_iter(
46            samples
47                .into_iter()
48                .map(|sample| (sample.as_ref().to_string(), Population::Unnamed)),
49        )
50    }
51
52    /// Creates a new mapping by reading a samples file at the provided path.
53    pub fn from_path<P>(path: P) -> io::Result<Self>
54    where
55        P: AsRef<Path>,
56    {
57        File::open(path).and_then(Self::from_reader)
58    }
59
60    /// Creates a new mapping by reading a samples file from the provided reader.
61    pub fn from_reader<R>(mut reader: R) -> io::Result<Self>
62    where
63        R: io::Read,
64    {
65        let mut s = String::new();
66        let _ = reader.read_to_string(&mut s)?;
67
68        Ok(Self::from_str(&s))
69    }
70
71    fn from_str(s: &str) -> Self {
72        s.lines()
73            .map(|line| match line.split_once('\t') {
74                Some((sample, population)) => (sample, Some(population)),
75                None => (line, None),
76            })
77            .collect()
78    }
79
80    /// Returns the population id of a sample if defined, otherwise `None`.
81    pub fn get_population_id(&self, sample: &Sample) -> Option<population::Id> {
82        self.0.get(sample).copied()
83    }
84
85    /// Returns the sample with the provided id if defined, otherwise `None`.
86    pub fn get_sample(&self, id: Id) -> Option<&Sample> {
87        self.0.get_index(id.0).map(|opt| opt.0)
88    }
89
90    /// Returns the id of the provided sample if defined, otherwise `None`.
91    pub fn get_sample_id(&self, sample: &Sample) -> Option<Id> {
92        self.0.get_index_of(sample).map(Id)
93    }
94
95    /// Returns true if no samples are defined, false otherwise.
96    pub fn is_empty(&self) -> bool {
97        self.0.is_empty()
98    }
99
100    /// Returns the number of populations in the mapping.
101    pub fn number_of_populations(&self) -> usize {
102        self.population_sizes().len()
103    }
104
105    /// Returns the number of samples defined for each population id.
106    pub fn population_sizes(&self) -> HashMap<population::Id, usize> {
107        let mut sizes = HashMap::new();
108        for &population_id in self.0.values() {
109            *sizes.entry(population_id).or_insert(0) += 1;
110        }
111        sizes
112    }
113
114    /// Returns an iterator over the samples in the mapping.
115    pub fn samples(&self) -> impl Iterator<Item = &Sample> {
116        self.0.keys()
117    }
118
119    pub(crate) fn shape(&self) -> Shape {
120        let population_sizes = self.population_sizes();
121
122        Shape(
123            (0..population_sizes.len())
124                .map(|id| 1 + 2 * population_sizes.get(&population::Id(id)).unwrap())
125                .collect(),
126        )
127    }
128}
129
130impl<S, P> FromIterator<(S, P)> for Map
131where
132    S: Into<Sample>,
133    P: Into<Population>,
134{
135    fn from_iter<I>(iter: I) -> Self
136    where
137        I: IntoIterator<Item = (S, P)>,
138    {
139        let mut population_map = population::Map::default();
140
141        Self(IndexMap::from_iter(iter.into_iter().map(
142            |(sample_name, population_name)| {
143                (
144                    sample_name.into(),
145                    population_map.get_or_insert(population_name.into()),
146                )
147            },
148        )))
149    }
150}