rsvart 0.1.5

A small library for representing genomic variants and regions.
Documentation
#[derive(PartialEq)]
pub enum VariantType {
    Unknown,
    SingleNucleotide,
    MultiNucleotide,
    Symbolic,
    Deletion,
    DeletionME,
    DeletionALU,
    DeletionL1,
    DeletionSVA,
    DeletionHERV,
    Insertion,
    InsertionME,
    InsertionALU,
    InsertionL1,
    InsertionSVA,
    InsertionHERV,

    Duplication,
    DuplicationTandem,
    DuplicationInversionBefore,
    DuplicationInversionAfter,

    Inversion,
    CopyNumber,
    Breakend,

    CopyNumberGain,
    CopyNumberLoss,
    CopyNumberLOH,
    CopyNumberComplex,
    ShortTandemRepeat,
    Translocation,
}
impl VariantType {
    pub fn parse_type_vcf(alt: &str) -> VariantType {
        if alt.is_empty() {
            return VariantType::Unknown;
        }
        let stripped: &str = VariantType::trim_angle_brackets(alt);
        let variant_type = match stripped {
            "SNP" | "SNV" => VariantType::SingleNucleotide,
            "MNP" | "MNV" => VariantType::MultiNucleotide,
            "DEL" => VariantType::Deletion,
            "INS" => VariantType::Insertion,
            "DUP" => VariantType::Duplication,
            "INV" => VariantType::Inversion,
            "CNV" => VariantType::CopyNumber,
            "BND" => VariantType::Breakend,
            "STR" => VariantType::ShortTandemRepeat,
            "TRA" => VariantType::Translocation,

            "DEL:ME" => VariantType::DeletionME,
            "DEL:ME:ALU" => VariantType::DeletionALU,
            "DEL:ME:LINE1" => VariantType::DeletionL1,
            "DEL:ME:SVA" => VariantType::DeletionSVA,
            "DEL:ME:HERV" => VariantType::DeletionHERV,

            "INS:ME" => VariantType::InsertionME,
            "INS:ME:ALU" => VariantType::InsertionALU,
            "INS:ME:LINE1" => VariantType::InsertionL1,
            "INS:ME:SVA" => VariantType::InsertionSVA,
            "INS:ME:HERV" => VariantType::InsertionHERV,

            "DUP:TANDEM" => VariantType::DuplicationTandem,
            "DUP:INV-BEFORE" => VariantType::DuplicationInversionBefore,
            "DUP:INV-AFTER" => VariantType::DuplicationInversionAfter,

            "CNV:GAIN" => VariantType::CopyNumberGain,
            "CNV:LOSS" => VariantType::CopyNumberLoss,
            &_ => VariantType::Unknown,
        };

        if variant_type == VariantType::Unknown {
            return if stripped.starts_with("BND") || VariantType::is_breakend(stripped) {
                VariantType::Breakend
            } else if stripped.starts_with("DEL:ME") {
                VariantType::DeletionME
            } else if stripped.starts_with("DEL") {
                VariantType::Deletion
            } else if stripped.starts_with("INS:ME") {
                VariantType::InsertionME
            } else if stripped.starts_with("DUP:TANDEM") {
                VariantType::DuplicationTandem
            } else if stripped.starts_with("DUP") {
                VariantType::Duplication
            } else if stripped.starts_with("CNV") {
                VariantType::CopyNumber
            } else if stripped.starts_with("STR") {
                VariantType::ShortTandemRepeat
            } else if VariantType::is_symbolic(alt) {
                VariantType::Symbolic
            } else {
                VariantType::Unknown
            };
        }
        return variant_type;
    }

    pub fn parse_type(refr: &str, alt: &str) -> VariantType {
        if VariantType::is_symbolic_alleles(refr, alt) {
            return VariantType::parse_type_vcf(alt);
        }

        if refr.len() == alt.len() {
            if alt.len() == 1 {
                return VariantType::SingleNucleotide;
            }
            return VariantType::MultiNucleotide;
        }

        return if refr.len() < alt.len() {
            VariantType::Insertion
        } else {
            VariantType::Deletion
        };
    }

    pub fn is_symbolic_alleles(refr: &str, alt: &str) -> bool {
        return VariantType::is_symbolic(alt) || VariantType::is_symbolic(refr);
    }

    pub fn is_symbolic(allele: &str) -> bool {
        return VariantType::is_large_symbolic(allele) || VariantType::is_breakend(allele);
    }

    pub fn is_breakend(allele: &str) -> bool {
        return VariantType::is_single_breakend(allele) || VariantType::is_mated_breakend(allele);
    }

    pub fn is_large_symbolic(allele: &str) -> bool {
        return allele.len() > 1
            && (allele.chars().next().unwrap() == '<' || allele.chars().last().unwrap() == '>');
    }

    pub fn is_single_breakend(allele: &str) -> bool {
        return allele.len() > 1
            && (allele.chars().next().unwrap() == '.' || allele.chars().last().unwrap() == '.');
    }

    pub fn is_mated_breakend(allele: &str) -> bool {
        return allele.len() > 1 && (allele.contains("[") || allele.contains("]"));
    }

    pub fn require_non_symbolic(alt: &str) -> &str {
        if alt.is_empty() || VariantType::is_symbolic(alt) {
            panic!("Illegal symbolic alt allele {item}", item = alt);
        } else if alt.contains(",") {
            panic!("Illegal multi-allelic alt allele {item}", item = alt);
        }
        alt
    }

    pub fn require_symbolic(alt: &str) -> &str {
        if alt.is_empty() || !VariantType::is_large_symbolic(alt) {
            panic!(
                "Illegal non-symbolic or breakend alt allele {item}",
                item = alt
            );
        }
        alt
    }

    pub fn require_breakend(alt: &str) -> &str {
        if alt.is_empty() || !VariantType::is_breakend(alt) {
            panic!("Illegal non-breakend allele {item}", item = alt);
        }
        alt
    }

    pub fn require_non_breakend(alt: &str) -> &str {
        if alt.is_empty() || VariantType::is_breakend(alt) {
            panic!("Illegal breakend allele {item}", item = alt);
        }
        alt
    }

    pub fn is_missing_upstream_deletion(allele: &str) -> bool {
        return allele.eq("*");
    }

    pub fn is_missing(allele: &str) -> bool {
        return allele.eq(".");
    }

    fn trim_angle_brackets(value: &str) -> &str {
        if value.starts_with("<") && value.ends_with(">") {
            return &value[1..value.len() - 1];
        }
        value
    }
}

#[cfg(test)]
mod test {
    use super::VariantType;
    use rstest::rstest;

    #[rstest]
    #[case("AT", VariantType::SingleNucleotide)]
    fn test_parse_ref_alt(#[case] input: &str, #[case] expected: VariantType) {}
}