use noodles::vcf::variant::record::samples::keys::key as format_key;
use noodles::vcf::variant::record_buf::{Samples, samples::sample::Value};
use std::collections::HashMap;
use tracing;
pub fn remap_sample_genotypes(samples: &Samples, mapping: &HashMap<usize, usize>) -> Samples {
let keys = samples.keys();
let mut new_keys_vec = Vec::new();
let mut extraction_map: Vec<(usize, bool)> = Vec::new();
if let Some(gt_pos) = keys.as_ref().iter().position(|k| k == format_key::GENOTYPE) {
new_keys_vec.push(format_key::GENOTYPE.to_string());
extraction_map.push((gt_pos, true));
}
for (i, key) in keys.as_ref().iter().enumerate() {
if key == format_key::GENOTYPE {
continue;
}
if key == "GQ" || key == "DP" || key == "MIN_DP" {
new_keys_vec.push(key.clone());
extraction_map.push((i, false));
} else {
tracing::debug!("Dropping field {} during polarization/normalization", key);
}
}
let new_keys: noodles::vcf::variant::record_buf::samples::Keys =
new_keys_vec.into_iter().collect();
let mut new_values = Vec::new();
for sample in samples.values() {
let mut new_sample_vals = Vec::with_capacity(extraction_map.len());
for (idx, is_gt) in &extraction_map {
let val_opt = sample.values().get(*idx).cloned().flatten();
if *is_gt {
if let Some(Value::String(gt_str)) = &val_opt {
let new_gt = remap_gt_string(gt_str, mapping);
if !mapping.is_empty() && gt_str != &new_gt {
tracing::debug!(
"Remapped GT: {} -> {} (mapping: {:?})",
gt_str,
new_gt,
mapping
);
}
new_sample_vals.push(Some(Value::String(new_gt)));
} else {
new_sample_vals.push(val_opt);
}
} else {
new_sample_vals.push(val_opt);
}
}
new_values.push(new_sample_vals);
}
Samples::new(new_keys, new_values)
}
pub fn remap_gt_string(gt: &str, mapping: &HashMap<usize, usize>) -> String {
if gt
.as_bytes()
.iter()
.all(|&b| matches!(b, b'0'..=b'9' | b'.' | b'/' | b'|'))
{
let bytes = gt.as_bytes();
let mut has_multi_digit = false;
for w in bytes.windows(2) {
if w[0].is_ascii_digit() && w[1].is_ascii_digit() {
has_multi_digit = true;
break;
}
}
if !has_multi_digit {
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
for &b in bytes {
if b.is_ascii_digit() {
let idx = (b - b'0') as usize;
if let Some(&new_idx) = mapping.get(&idx)
&& new_idx < 10
{
out.push((new_idx as u8) + b'0');
} else {
out.push(b);
}
} else {
out.push(b);
}
}
return String::from_utf8(out).unwrap_or_else(|_| gt.to_string());
}
}
let mut result = String::with_capacity(gt.len());
let mut current_num = String::new();
for c in gt.chars() {
if c.is_ascii_digit() {
current_num.push(c);
} else {
if !current_num.is_empty() {
if let Ok(idx) = current_num.parse::<usize>() {
if let Some(new_idx) = mapping.get(&idx) {
result.push_str(&new_idx.to_string());
} else {
result.push_str(¤t_num);
}
} else {
result.push_str(¤t_num);
}
current_num.clear();
}
result.push(c);
}
}
if !current_num.is_empty() {
if let Ok(idx) = current_num.parse::<usize>() {
if let Some(new_idx) = mapping.get(&idx) {
result.push_str(&new_idx.to_string());
} else {
result.push_str(¤t_num);
}
} else {
result.push_str(¤t_num);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_remap_gt_string() {
let mut mapping = HashMap::new();
mapping.insert(0, 1); mapping.insert(1, 0); mapping.insert(2, 2);
assert_eq!(remap_gt_string("0/1", &mapping), "1/0");
assert_eq!(remap_gt_string("1|2", &mapping), "0|2");
assert_eq!(remap_gt_string("./0", &mapping), "./1");
assert_eq!(remap_gt_string("0", &mapping), "1");
}
}