1pub mod genotype;
2
3use std::path::Path;
4
5use anyhow::{Context, Result, bail};
6use noodles::vcf;
7use noodles::vcf::variant::record::AlternateBases;
8
9use crate::sequence_dict::SequenceDictionary;
10use genotype::{Genotype, VariantRecord};
11
12pub fn load_variants_for_contig(
28 path: &Path,
29 contig_name: &str,
30 sample_name: Option<&str>,
31 _dict: &SequenceDictionary,
32) -> Result<Vec<VariantRecord>> {
33 let mut reader = vcf::io::reader::Builder::default()
34 .build_from_path(path)
35 .with_context(|| format!("Failed to open VCF: {}", path.display()))?;
36
37 let header = reader.read_header()?;
38 let sample_index = resolve_sample_index(&header, sample_name)?;
39
40 let mut variants = Vec::new();
41
42 for result in reader.record_bufs(&header) {
46 let record = result.with_context(|| "Failed to read VCF record")?;
47
48 let chrom = record.reference_sequence_name();
50 if chrom != contig_name {
51 continue;
52 }
53
54 let Some(pos_1based) = record.variant_start() else {
56 continue;
57 };
58 #[expect(clippy::cast_possible_truncation, reason = "genomic positions fit in u32")]
59 let position = (usize::from(pos_1based) - 1) as u32;
60
61 let ref_allele: Vec<u8> = record.reference_bases().as_bytes().to_vec();
63 let alt_alleles: Vec<Vec<u8>> = record
64 .alternate_bases()
65 .iter()
66 .filter_map(Result::ok)
67 .map(|a| a.as_bytes().to_vec())
68 .collect();
69
70 let gt_str = extract_gt_string(&record, sample_index);
72 let Some(gt_str) = gt_str else {
73 continue;
74 };
75
76 let genotype = Genotype::parse(>_str)?;
77
78 if !genotype.has_alt() {
80 continue;
81 }
82
83 variants.push(VariantRecord { position, ref_allele, alt_alleles, genotype });
84 }
85
86 variants.sort_by_key(|v| v.position);
88
89 Ok(variants)
90}
91
92pub(crate) fn validate_vcf_sample(path: &Path, sample_name: Option<&str>) -> Result<String> {
106 let mut reader = vcf::io::reader::Builder::default()
107 .build_from_path(path)
108 .with_context(|| format!("Failed to open VCF: {}", path.display()))?;
109 let header = reader.read_header()?;
110 let idx = resolve_sample_index(&header, sample_name)?;
111 Ok(header
112 .sample_names()
113 .get_index(idx)
114 .expect("resolve_sample_index returned a valid index")
115 .clone())
116}
117
118fn resolve_sample_index(header: &vcf::Header, sample_name: Option<&str>) -> Result<usize> {
124 let sample_names = header.sample_names();
125
126 match sample_name {
127 Some(name) => {
128 let idx = sample_names.iter().position(|s| s == name).ok_or_else(|| {
129 if sample_names.is_empty() {
130 anyhow::anyhow!("Sample '{name}' not found in VCF (VCF has no sample columns)")
131 } else {
132 let available: Vec<&str> =
133 sample_names.iter().map(String::as_str).take(10).collect();
134 anyhow::anyhow!(
135 "Sample '{name}' not found in VCF. Available samples: {}",
136 available.join(", ")
137 )
138 }
139 })?;
140 Ok(idx)
141 }
142 None => {
143 if sample_names.len() == 1 {
144 Ok(0)
145 } else if sample_names.is_empty() {
146 bail!("VCF has no sample columns")
147 } else {
148 bail!(
149 "VCF has {} samples but no --sample was specified. \
150 Available samples: {}",
151 sample_names.len(),
152 sample_names.iter().take(10).cloned().collect::<Vec<_>>().join(", ")
153 )
154 }
155 }
156 }
157}
158
159fn extract_gt_string(record: &vcf::variant::RecordBuf, sample_index: usize) -> Option<String> {
163 use noodles::vcf::variant::record::samples::keys::key;
164 use noodles::vcf::variant::record_buf::samples::sample::Value;
165
166 let samples = record.samples();
167 let sample = samples.get_index(sample_index)?;
168 let gt_value = sample.get(key::GENOTYPE)?;
169
170 match gt_value {
171 Some(Value::Genotype(gt)) => Some(format_genotype(gt)),
172 _ => None,
173 }
174}
175
176fn format_genotype(
179 gt: &noodles::vcf::variant::record_buf::samples::sample::value::Genotype,
180) -> String {
181 use noodles::vcf::variant::record::samples::series::value::genotype::Phasing;
182
183 let alleles = gt.as_ref();
184 let mut parts = Vec::with_capacity(alleles.len());
185 let mut is_phased = false;
186
187 for (i, allele) in alleles.iter().enumerate() {
188 if i > 0 && allele.phasing() == Phasing::Phased {
189 is_phased = true;
190 }
191 match allele.position() {
192 Some(idx) => parts.push(idx.to_string()),
193 None => parts.push(".".to_string()),
194 }
195 }
196
197 let sep = if is_phased { "|" } else { "/" };
198 parts.join(sep)
199}