use crate::panel::{PaddedPanel, PanelSite};
pub fn complement(base: char) -> char {
match base.to_ascii_uppercase() {
'A' => 'T',
'T' => 'A',
'C' => 'G',
'G' => 'C',
_ => base,
}
}
pub fn complement_seq(seq: &str) -> String {
seq.chars().map(complement).collect()
}
pub fn is_ambiguous_snp(allele1: &str, allele2: &str) -> bool {
if allele1.len() != 1 || allele2.len() != 1 {
return false;
}
let a = allele1.chars().next().unwrap().to_ascii_uppercase();
let b = allele2.chars().next().unwrap().to_ascii_uppercase();
matches!((a, b), ('A', 'T') | ('T', 'A') | ('C', 'G') | ('G', 'C'))
}
#[derive(Debug, Clone)]
pub enum HarmonizationResult {
Diploid(usize, usize),
Haploid(usize),
InvalidAlleles {
user_bases: Vec<String>,
ref_base: String,
},
Skip { reason: String },
}
pub fn harmonize_alleles(
alleles: &[String],
ref_base: &str,
chrom: &str,
pos: u64,
panel: &mut PaddedPanel,
) -> Result<Vec<usize>, String> {
let mut validated_bases = Vec::with_capacity(alleles.len());
let mut has_ref_match = false;
let mut has_ref_comp_match = false;
for base in alleles {
if base.eq_ignore_ascii_case(ref_base) {
has_ref_match = true;
} else if base.len() == 1 && ref_base.len() == 1 {
let comp = complement_seq(base);
if comp.eq_ignore_ascii_case(ref_base) {
has_ref_comp_match = true;
}
}
}
let needs_flip = !has_ref_match && has_ref_comp_match;
let original_site = panel.get_original(chrom, pos);
let panel_ref = original_site.map(|s| s.ref_allele.as_str());
for (i, base) in alleles.iter().enumerate() {
if needs_flip {
validated_bases.push(complement_seq(base));
continue;
}
let base_upper = base.to_uppercase();
if i == 0 {
validated_bases.push(base_upper);
continue;
}
let mut rescued = None;
if let Some(site) = original_site {
let direct_matches_alt = site
.alt_alleles
.iter()
.any(|a| a.eq_ignore_ascii_case(&base_upper));
if direct_matches_alt {
rescued = Some(base_upper.clone());
} else {
let comp = complement_seq(&base_upper);
let comp_matches_alt = site
.alt_alleles
.iter()
.any(|a| a.eq_ignore_ascii_case(&comp));
let comp_matches_panel_ref = panel_ref
.map(|r| r.eq_ignore_ascii_case(&comp))
.unwrap_or(false);
if comp_matches_alt && !comp_matches_panel_ref {
rescued = Some(comp);
}
}
}
validated_bases.push(rescued.unwrap_or(base_upper));
}
let mut indices = Vec::with_capacity(validated_bases.len());
for base in validated_bases {
let idx = panel.get_or_add_allele_index(chrom, pos, &base, ref_base);
indices.push(idx);
}
Ok(indices)
}
pub fn harmonize_genotype(
user_bases: &[String],
ref_base: &str,
chrom: &str,
pos: u64,
panel: &mut PaddedPanel,
) -> HarmonizationResult {
match harmonize_alleles(user_bases, ref_base, chrom, pos, panel) {
Ok(indices) => match indices.len() {
1 => HarmonizationResult::Haploid(indices[0]),
2 => HarmonizationResult::Diploid(indices[0], indices[1]),
_ => HarmonizationResult::InvalidAlleles {
user_bases: user_bases.to_vec(),
ref_base: ref_base.to_string(),
},
},
Err(e) => HarmonizationResult::Skip { reason: e },
}
}
pub fn get_merged_alts(site: &PanelSite, added: Option<&Vec<String>>) -> Vec<String> {
let mut alts = site.alt_alleles.clone();
if let Some(added_alts) = added {
alts.extend(added_alts.iter().cloned());
}
alts
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_complement() {
assert_eq!(complement('A'), 'T');
assert_eq!(complement('T'), 'A');
assert_eq!(complement('C'), 'G');
assert_eq!(complement('G'), 'C');
assert_eq!(complement('a'), 'T');
}
#[test]
fn test_complement_seq() {
assert_eq!(complement_seq("ATCG"), "TAGC");
assert_eq!(complement_seq("atcg"), "TAGC");
}
#[test]
fn test_is_ambiguous_snp() {
assert!(is_ambiguous_snp("A", "T"));
assert!(is_ambiguous_snp("T", "A"));
assert!(is_ambiguous_snp("C", "G"));
assert!(is_ambiguous_snp("G", "C"));
assert!(!is_ambiguous_snp("A", "G"));
assert!(!is_ambiguous_snp("A", "C"));
assert!(!is_ambiguous_snp("AT", "TA")); }
#[test]
fn test_ambiguous_flip_prevention() {
let chrom = "1";
let pos = 100;
let original_index = crate::panel::PanelIndex::default();
let mut panel = PaddedPanel::new(original_index);
let res = harmonize_genotype(
&["A".to_string(), "T".to_string()],
"A",
chrom,
pos,
&mut panel,
);
match res {
HarmonizationResult::Diploid(idx1, idx2) => {
assert_eq!(idx1, 0, "First allele should be Ref (A)");
assert_eq!(idx2, 1, "Second allele should be Alt (T)");
}
_ => panic!("Expected Diploid result"),
}
}
#[test]
fn test_flip_when_no_ref_match() {
let chrom = "1";
let pos = 100;
let original_index = crate::panel::PanelIndex::default();
let mut panel = PaddedPanel::new(original_index);
let res = harmonize_genotype(
&["T".to_string(), "C".to_string()],
"A",
chrom,
pos,
&mut panel,
);
match res {
HarmonizationResult::Diploid(idx1, idx2) => {
assert_eq!(idx1, 0, "First allele should be flipped to Ref (A)");
assert_eq!(idx2, 1, "Second allele should be flipped to Alt (G)");
}
_ => panic!("Expected Diploid result"),
}
}
}