use super::align::RawEdit;
use super::classify::EditClassification;
#[derive(Debug, Clone)]
pub struct ExtractionResult {
pub reference_length: u64,
pub observed_length: u64,
pub variants: Vec<ExtractedVariant>,
pub hgvs_strings: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct ExtractedVariant {
pub position: u64,
pub ref_seq: String,
pub obs_seq: String,
pub classification: EditClassification,
pub hgvs: String,
}
pub fn generate_hgvs(
edit: &RawEdit,
classification: &EditClassification,
accession: Option<&str>,
) -> String {
let position_str = format_position(edit);
let edit_str = format_edit(classification);
match accession {
Some(acc) => format!("{}:g.{}{}", acc, position_str, edit_str),
None => format!("{}{}", position_str, edit_str),
}
}
fn format_position(edit: &RawEdit) -> String {
let ref_len = edit.ref_seq.len();
let obs_len = edit.obs_seq.len();
match (ref_len, obs_len) {
(1, _) => format!("{}", edit.ref_start),
(r, _) if r > 1 => format!("{}_{}", edit.ref_start, edit.ref_start + r as u64 - 1),
(0, _) => {
if edit.ref_start == 0 {
"0_1".to_string()
} else {
format!("{}_{}", edit.ref_start, edit.ref_start + 1)
}
}
_ => format!("{}", edit.ref_start),
}
}
fn format_edit(classification: &EditClassification) -> String {
match classification {
EditClassification::Substitution { ref_base, alt_base } => {
format!("{}>{}", ref_base, alt_base)
}
EditClassification::Deletion { deleted: _ } => "del".to_string(),
EditClassification::Insertion { inserted } => {
format!("ins{}", inserted)
}
EditClassification::Duplication { sequence: _ } => "dup".to_string(),
EditClassification::Delins {
deleted: _,
inserted,
} => {
format!("delins{}", inserted)
}
EditClassification::Inversion { sequence: _ } => "inv".to_string(),
EditClassification::Repeat { unit, count } => {
format!("{}[{}]", unit, count)
}
}
}
#[allow(dead_code)]
pub fn format_duplication_hgvs(ref_start: u64, sequence: &str, accession: Option<&str>) -> String {
let len = sequence.len() as u64;
let dup_start = ref_start - len;
let dup_end = ref_start - 1;
let position_str = if len == 1 {
format!("{}", dup_start)
} else {
format!("{}_{}", dup_start, dup_end)
};
match accession {
Some(acc) => format!("{}:g.{}dup", acc, position_str),
None => format!("{}dup", position_str),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_edit(ref_start: u64, ref_seq: &str, obs_seq: &str) -> RawEdit {
RawEdit {
ref_start,
ref_end: ref_start + ref_seq.len() as u64,
obs_start: ref_start,
obs_end: ref_start + obs_seq.len() as u64,
ref_seq: ref_seq.to_string(),
obs_seq: obs_seq.to_string(),
}
}
#[test]
fn test_generate_substitution() {
let edit = make_edit(3, "G", "A");
let class = EditClassification::Substitution {
ref_base: 'G',
alt_base: 'A',
};
assert_eq!(generate_hgvs(&edit, &class, None), "3G>A");
}
#[test]
fn test_generate_substitution_with_accession() {
let edit = make_edit(3, "G", "A");
let class = EditClassification::Substitution {
ref_base: 'G',
alt_base: 'A',
};
assert_eq!(
generate_hgvs(&edit, &class, Some("NC_000001.11")),
"NC_000001.11:g.3G>A"
);
}
#[test]
fn test_generate_single_deletion() {
let edit = make_edit(3, "G", "");
let class = EditClassification::Deletion {
deleted: "G".to_string(),
};
assert_eq!(generate_hgvs(&edit, &class, None), "3del");
}
#[test]
fn test_generate_multi_deletion() {
let edit = make_edit(3, "GC", "");
let class = EditClassification::Deletion {
deleted: "GC".to_string(),
};
assert_eq!(generate_hgvs(&edit, &class, None), "3_4del");
}
#[test]
fn test_generate_insertion() {
let edit = make_edit(3, "", "T");
let class = EditClassification::Insertion {
inserted: "T".to_string(),
};
assert_eq!(generate_hgvs(&edit, &class, None), "3_4insT");
}
#[test]
fn test_generate_delins() {
let edit = make_edit(3, "GC", "TT");
let class = EditClassification::Delins {
deleted: "GC".to_string(),
inserted: "TT".to_string(),
};
assert_eq!(generate_hgvs(&edit, &class, None), "3_4delinsTT");
}
#[test]
fn test_generate_inversion() {
let edit = make_edit(3, "ATG", "CAT");
let class = EditClassification::Inversion {
sequence: "ATG".to_string(),
};
assert_eq!(generate_hgvs(&edit, &class, None), "3_5inv");
}
#[test]
fn test_format_duplication() {
assert_eq!(format_duplication_hgvs(4, "G", None), "3dup");
assert_eq!(format_duplication_hgvs(4, "TG", None), "2_3dup");
assert_eq!(
format_duplication_hgvs(4, "G", Some("NC_000001.11")),
"NC_000001.11:g.3dup"
);
}
}