1#[cfg(all(feature = "rayon", not(feature = "internal-no-data")))]
2use rayon::prelude::*;
3use std::collections::HashSet;
4
5use crate::sequence::{
6 AnnotatedPeptide, Annotation, HasPeptidoformImpl, Peptidoform, Region, UnAmbiguous,
7};
8
9pub(super) use super::*;
10
11#[cfg(not(feature = "internal-no-data"))]
13pub fn get_germline(
14 species: Species,
15 gene: Gene,
16 allele: Option<usize>,
17) -> Option<Allele<'static>> {
18 germlines(species).and_then(|g| g.find(species, gene, allele))
19}
20
21#[derive(Clone, Debug, Eq, PartialEq)]
23pub struct Selection<S1: std::hash::BuildHasher, S2: std::hash::BuildHasher> {
24 pub species: Option<HashSet<Species, S1>>,
26 pub chains: Option<HashSet<ChainType, S2>>,
28 pub genes: Option<HashSet<GeneType>>,
30 pub allele: AlleleSelection,
32}
33
34impl<S1: std::hash::BuildHasher, S2: std::hash::BuildHasher> Selection<S1, S2> {
35 #[must_use]
37 pub fn species(self, species: impl Into<HashSet<Species, S1>>) -> Self {
38 Self {
39 species: Some(species.into()),
40 ..self
41 }
42 }
43
44 #[must_use]
46 pub fn chain(self, chains: impl Into<HashSet<ChainType, S2>>) -> Self {
47 Self {
48 chains: Some(chains.into()),
49 ..self
50 }
51 }
52
53 #[must_use]
55 pub fn gene(self, genes: impl Into<HashSet<GeneType>>) -> Self {
56 Self {
57 genes: Some(genes.into()),
58 ..self
59 }
60 }
61
62 #[must_use]
64 pub fn allele(self, allele: AlleleSelection) -> Self {
65 Self { allele, ..self }
66 }
67}
68
69impl<
70 S1: std::hash::BuildHasher + Clone + Send + Sync,
71 S2: std::hash::BuildHasher + Clone + Send + Sync,
72> Selection<S1, S2>
73{
74 #[cfg(not(feature = "internal-no-data"))]
76 pub fn germlines(self) -> impl Iterator<Item = Allele<'static>> {
77 all_germlines()
78 .filter(move |g| self.species.as_ref().is_none_or(|s| s.contains(&g.species)))
79 .flat_map(|g| g.into_iter().map(|c| (g.species, c.0, c.1)))
80 .filter(move |(_, kind, _)| self.chains.as_ref().is_none_or(|k| k.contains(kind)))
81 .flat_map(|(species, _, c)| c.into_iter().map(move |g| (species, g.0, g.1)))
82 .filter(move |(_, gene, _)| self.genes.as_ref().is_none_or(|s| contains_gene(s, *gene)))
83 .flat_map(|(species, _, germlines)| germlines.iter().map(move |a| (species, a)))
84 .flat_map(move |(species, germline)| {
85 germline
86 .into_iter()
87 .take(self.allele.take_num())
88 .map(move |(a, seq)| (species, &germline.name, *a, seq))
89 })
90 .map(Into::into)
91 }
92
93 #[cfg(all(feature = "rayon", not(feature = "internal-no-data")))]
94 pub fn par_germlines(self) -> impl ParallelIterator<Item = Allele<'static>> {
96 par_germlines()
97 .filter(move |g| self.species.as_ref().is_none_or(|s| s.contains(&g.species)))
98 .flat_map(|g| g.into_par_iter().map(|c| (g.species, c.0, c.1)))
99 .filter(move |(_, kind, _)| self.chains.as_ref().is_none_or(|k| k.contains(kind)))
100 .flat_map(|(species, _, c)| c.into_par_iter().map(move |g| (species, g.0, g.1)))
101 .filter(move |(_, gene, _)| self.genes.as_ref().is_none_or(|s| contains_gene(s, *gene)))
102 .flat_map(|(species, _, germlines)| {
103 germlines.into_par_iter().map(move |a| (species, a))
104 })
105 .flat_map(move |(species, germline)| {
106 germline
107 .into_par_iter()
108 .take(self.allele.take_num())
109 .map(move |(a, seq)| (species, &germline.name, *a, seq))
110 })
111 .map(Into::into)
112 }
113}
114
115#[cfg(not(feature = "internal-no-data"))]
116fn contains_gene(s: &HashSet<GeneType>, gene: GeneType) -> bool {
117 s.contains(&gene) || matches!(gene, GeneType::C(_)) && s.contains(&GeneType::C(None))
118}
119
120impl<S1: std::hash::BuildHasher, S2: std::hash::BuildHasher> Default for Selection<S1, S2> {
121 fn default() -> Self {
123 Self {
124 species: None,
125 chains: None,
126 genes: None,
127 allele: AlleleSelection::First,
128 }
129 }
130}
131
132#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
134pub enum AlleleSelection {
135 All,
137 First,
139}
140
141impl AlleleSelection {
142 #[cfg(not(feature = "internal-no-data"))]
143 const fn take_num(self) -> usize {
144 match self {
145 Self::First => 1,
146 Self::All => usize::MAX,
147 }
148 }
149}
150
151#[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)]
154pub struct Allele<'a> {
155 pub species: Species,
157 pub gene: std::borrow::Cow<'a, Gene>,
159 pub number: usize,
161 pub sequence: &'a Peptidoform<UnAmbiguous>,
163 pub regions: &'a [(Region, usize)],
165 pub annotations: &'a [(Annotation, usize)],
167}
168
169impl Allele<'_> {
170 pub fn name(&self) -> String {
172 format!("{}*{:02}", self.gene, self.number)
173 }
174
175 pub fn fancy_name(&self) -> String {
177 format!("{}*{:02}", self.gene.to_fancy_string(), self.number)
178 }
179}
180
181impl HasPeptidoformImpl for Allele<'_> {
182 type Complexity = UnAmbiguous;
183 fn peptidoform(&self) -> &Peptidoform<Self::Complexity> {
184 self.sequence
185 }
186}
187
188impl AnnotatedPeptide for Allele<'_> {
189 fn annotations(&self) -> &[(Annotation, usize)] {
190 self.annotations
191 }
192 fn regions(&self) -> &[(Region, usize)] {
193 self.regions
194 }
195}
196
197impl<'a> From<(Species, &'a Gene, usize, &'a AnnotatedSequence)> for Allele<'a> {
198 fn from(value: (Species, &'a Gene, usize, &'a AnnotatedSequence)) -> Self {
199 Self {
200 species: value.0,
201 gene: std::borrow::Cow::Borrowed(value.1),
202 number: value.2,
203 sequence: &value.3.sequence,
204 regions: &value.3.regions,
205 annotations: &value.3.annotations,
206 }
207 }
208}
209
210impl Germlines {
211 pub fn find(&self, species: Species, gene: Gene, allele: Option<usize>) -> Option<Allele<'_>> {
213 let chain = match gene.chain {
214 ChainType::Heavy => &self.h,
215 ChainType::LightKappa => &self.k,
216 ChainType::LightLambda => &self.l,
217 ChainType::Iota => &self.i,
218 };
219 let genes = match gene.kind {
220 GeneType::V => &chain.variable,
221 GeneType::J => &chain.joining,
222 GeneType::C(None) => &chain.c,
223 GeneType::C(Some(Constant::A)) => &chain.a,
224 GeneType::C(Some(Constant::D)) => &chain.d,
225 GeneType::C(Some(Constant::E)) => &chain.e,
226 GeneType::C(Some(Constant::G)) => &chain.g,
227 GeneType::C(Some(Constant::M)) => &chain.m,
228 GeneType::C(Some(Constant::O)) => &chain.o,
229 GeneType::C(Some(Constant::T)) => &chain.t,
230 };
231 genes
232 .binary_search_by(|g| g.name.cmp(&gene))
233 .ok()
234 .and_then(|g| {
235 let g = &genes[g];
236 allele.map_or_else(
237 || g.alleles.first(),
238 |a| g.alleles.iter().find(|(ga, _)| a == *ga),
239 )
240 })
241 .map(move |(a, seq)| Allele {
242 species,
243 gene: std::borrow::Cow::Owned(gene),
244 number: *a,
245 sequence: &seq.sequence,
246 regions: &seq.regions,
247 annotations: &seq.annotations,
248 })
249 }
250}
251
252#[cfg(all(test, not(feature = "internal-no-data")))]
253#[expect(clippy::missing_panics_doc)]
254mod tests {
255 use std::collections::HashSet;
256
257 use crate::imgt::select::contains_gene;
258
259 use super::Selection;
260 use super::{ChainType, GeneType, Species};
261
262 #[test]
263 fn try_first_human() {
264 let selection = Selection::default()
265 .species([Species::HomoSapiens])
266 .chain([ChainType::Heavy])
267 .gene([GeneType::V]);
268 let first = selection.germlines().next().unwrap();
269 assert_eq!(first.name(), "IGHV1-2*01");
270 }
271
272 #[test]
273 fn try_first_g_human() {
274 let selection = Selection::default()
275 .species([Species::HomoSapiens])
276 .chain([ChainType::Heavy])
277 .gene([GeneType::C(Some(crate::imgt::Constant::G))]);
278 let first = selection.germlines().next().unwrap();
279 assert_eq!(first.name(), "IGHGP*01");
280 }
281
282 #[test]
283 fn gene_selections() {
284 let constant = HashSet::from([GeneType::C(None)]);
285 assert!(contains_gene(&constant, GeneType::C(None)));
286 assert!(contains_gene(
287 &constant,
288 GeneType::C(Some(crate::imgt::Constant::G))
289 ));
290 assert!(contains_gene(
291 &constant,
292 GeneType::C(Some(crate::imgt::Constant::A))
293 ));
294 let constant_g = HashSet::from([GeneType::C(Some(crate::imgt::Constant::G))]);
295 assert!(!contains_gene(&constant_g, GeneType::C(None)));
296 assert!(contains_gene(
297 &constant_g,
298 GeneType::C(Some(crate::imgt::Constant::G))
299 ));
300 assert!(!contains_gene(
301 &constant_g,
302 GeneType::C(Some(crate::imgt::Constant::A))
303 ));
304 }
305}