holodeck_lib/vcf/
genotype.rs1use anyhow::{Result, bail};
2
3#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct Genotype {
9 alleles: Vec<Option<usize>>,
11 phased: bool,
13}
14
15impl Genotype {
16 pub fn parse(gt: &str) -> Result<Self> {
25 if gt.is_empty() {
26 bail!("Empty GT field");
27 }
28
29 let (sep, phased) = if gt.contains('|') {
32 ('|', true)
33 } else if gt.contains('/') {
34 ('/', false)
35 } else {
36 let allele = parse_allele(gt)?;
38 return Ok(Self { alleles: vec![allele], phased: true });
39 };
40
41 let alleles: Result<Vec<Option<usize>>> = gt.split(sep).map(parse_allele).collect();
42 let alleles = alleles?;
43
44 if alleles.is_empty() {
45 bail!("GT field has no alleles: {gt}");
46 }
47
48 Ok(Self { alleles, phased })
49 }
50
51 #[must_use]
53 pub fn alleles(&self) -> &[Option<usize>] {
54 &self.alleles
55 }
56
57 #[must_use]
59 pub fn is_phased(&self) -> bool {
60 self.phased
61 }
62
63 #[must_use]
65 pub fn ploidy(&self) -> usize {
66 self.alleles.len()
67 }
68
69 #[must_use]
71 pub fn has_alt(&self) -> bool {
72 self.alleles.iter().any(|a| matches!(a, Some(idx) if *idx > 0))
73 }
74
75 #[must_use]
77 pub fn is_hom_alt(&self) -> bool {
78 if !self.has_alt() {
79 return false;
80 }
81 let first = self.alleles[0];
82 first.is_some() && self.alleles.iter().all(|a| *a == first)
83 }
84
85 #[must_use]
87 pub fn is_hom_ref(&self) -> bool {
88 self.alleles.iter().all(|a| *a == Some(0))
89 }
90}
91
92fn parse_allele(s: &str) -> Result<Option<usize>> {
94 if s == "." {
95 Ok(None)
96 } else {
97 let idx: usize = s.parse().map_err(|_| anyhow::anyhow!("Invalid allele value: '{s}'"))?;
98 Ok(Some(idx))
99 }
100}
101
102#[derive(Debug, Clone)]
105pub struct VariantRecord {
106 pub position: u32,
108 pub ref_allele: Vec<u8>,
110 pub alt_alleles: Vec<Vec<u8>>,
113 pub genotype: Genotype,
115}
116
117impl VariantRecord {
118 #[must_use]
122 pub fn allele_bases(&self, allele_index: usize) -> Option<&[u8]> {
123 if allele_index == 0 {
124 Some(&self.ref_allele)
125 } else {
126 self.alt_alleles.get(allele_index - 1).map(Vec::as_slice)
127 }
128 }
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn test_parse_diploid_unphased_het() {
137 let gt = Genotype::parse("0/1").unwrap();
138 assert_eq!(gt.alleles(), &[Some(0), Some(1)]);
139 assert!(!gt.is_phased());
140 assert_eq!(gt.ploidy(), 2);
141 assert!(gt.has_alt());
142 assert!(!gt.is_hom_alt());
143 assert!(!gt.is_hom_ref());
144 }
145
146 #[test]
147 fn test_parse_diploid_phased_het() {
148 let gt = Genotype::parse("1|0").unwrap();
149 assert_eq!(gt.alleles(), &[Some(1), Some(0)]);
150 assert!(gt.is_phased());
151 assert!(gt.has_alt());
152 }
153
154 #[test]
155 fn test_parse_diploid_hom_ref() {
156 let gt = Genotype::parse("0/0").unwrap();
157 assert_eq!(gt.alleles(), &[Some(0), Some(0)]);
158 assert!(!gt.has_alt());
159 assert!(gt.is_hom_ref());
160 assert!(!gt.is_hom_alt());
161 }
162
163 #[test]
164 fn test_parse_diploid_hom_alt() {
165 let gt = Genotype::parse("1/1").unwrap();
166 assert_eq!(gt.alleles(), &[Some(1), Some(1)]);
167 assert!(gt.has_alt());
168 assert!(gt.is_hom_alt());
169 assert!(!gt.is_hom_ref());
170 }
171
172 #[test]
173 fn test_parse_triploid() {
174 let gt = Genotype::parse("0|1|2").unwrap();
175 assert_eq!(gt.alleles(), &[Some(0), Some(1), Some(2)]);
176 assert!(gt.is_phased());
177 assert_eq!(gt.ploidy(), 3);
178 }
179
180 #[test]
181 fn test_parse_missing_alleles() {
182 let gt = Genotype::parse("./.").unwrap();
183 assert_eq!(gt.alleles(), &[None, None]);
184 assert!(!gt.has_alt());
185 assert!(!gt.is_hom_ref());
186 }
187
188 #[test]
189 fn test_parse_haploid() {
190 let gt = Genotype::parse("0").unwrap();
191 assert_eq!(gt.alleles(), &[Some(0)]);
192 assert!(gt.is_phased()); assert_eq!(gt.ploidy(), 1);
194
195 let gt = Genotype::parse("1").unwrap();
196 assert!(gt.has_alt());
197 }
198
199 #[test]
200 fn test_parse_empty_errors() {
201 assert!(Genotype::parse("").is_err());
202 }
203
204 #[test]
205 fn test_parse_invalid_allele_errors() {
206 assert!(Genotype::parse("0/abc").is_err());
207 }
208
209 #[test]
210 fn test_variant_record_allele_bases() {
211 let vr = VariantRecord {
212 position: 100,
213 ref_allele: b"A".to_vec(),
214 alt_alleles: vec![b"T".to_vec(), b"GCC".to_vec()],
215 genotype: Genotype::parse("0/1").unwrap(),
216 };
217 assert_eq!(vr.allele_bases(0), Some(b"A".as_slice()));
218 assert_eq!(vr.allele_bases(1), Some(b"T".as_slice()));
219 assert_eq!(vr.allele_bases(2), Some(b"GCC".as_slice()));
220 assert_eq!(vr.allele_bases(3), None);
221 }
222}