use std::fmt::Display;
use std::ops::RangeInclusive;
use std::str::FromStr;
use crate::imgt_gene::IMGTGene;
use crate::{AnnotatedSequence, Gene, Species};
use mzcore::sequence::{AminoAcid, IsAminoAcid};
#[derive(Debug)]
pub(crate) struct DataItem {
pub id: String,
pub genes: Vec<IMGTGene>,
pub regions: Vec<Region>,
pub species: Species,
pub sequence: String,
}
impl Display for DataItem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{}\t{}\n{}", self.id, self.species, self.sequence)?;
for gene in &self.genes {
writeln!(f, "G {gene}")?;
}
for region in &self.regions {
writeln!(f, "R {region}")?;
}
Ok(())
}
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub(crate) struct Region {
pub acc: String,
pub key: String,
pub location: Location,
pub reported_seq: String,
pub found_seq: Result<(String, AASequence), String>,
pub allele: String,
pub functional: bool,
pub partial: bool,
pub shift: usize,
pub splice_aa: Option<AminoAcid>,
}
impl Display for Region {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}\t{}\t{}\t{}\t{}\t{}",
self.key,
self.location,
self.allele,
self.functional,
self.partial,
self.found_seq.as_ref().map_or_else(
|e| format!("<NO SEQ!>: {e}"),
|seq| seq
.1
.0
.iter()
.map(|a| a.pro_forma_definition())
.collect::<String>()
),
)
}
}
pub(crate) type SequenceRegion = (mzcore::sequence::Region, (Vec<AminoAcid>, Location, String));
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub(crate) enum Location {
Normal(RangeInclusive<usize>),
Complement(RangeInclusive<usize>),
SingleNormal(usize),
SingleComplement(usize),
}
impl Location {
pub(crate) fn overlaps(&self, other: &Self) -> bool {
match (self, other) {
(Self::Complement(s), Self::Complement(o)) | (Self::Normal(s), Self::Normal(o)) => {
*s.start() <= o.end() + 1 && s.end() + 1 >= *o.start()
}
(Self::Complement(s), Self::SingleComplement(o))
| (Self::Normal(s), Self::SingleNormal(o)) => s.contains(o),
_ => false,
}
}
pub(crate) fn contains(&self, other: &Self) -> bool {
match (self, other) {
(Self::Complement(s), Self::Complement(o)) | (Self::Normal(s), Self::Normal(o)) => {
s.start() <= o.start() && s.end() >= o.end()
}
(Self::Complement(s), Self::SingleComplement(o))
| (Self::Normal(s), Self::SingleNormal(o)) => s.contains(o),
_ => false,
}
}
pub(crate) fn find_aa_location(&self, sections: &[SequenceRegion]) -> Option<usize> {
let mut start = 0;
for section in sections {
if let Some(index) = section.1.1.get_aa_loc(self) {
return Some(start + index.start());
}
start += section.1.0.len();
}
None
}
fn get_aa_loc(&self, inner: &Self) -> Option<RangeInclusive<usize>> {
if self.contains(inner) {
match (self, inner) {
(Self::Complement(s), Self::Complement(o)) | (Self::Normal(s), Self::Normal(o)) => {
Some((o.start() - s.start()) / 3..=(o.end() - s.start()) / 3)
}
(Self::Normal(s), Self::SingleNormal(o))
| (Self::Complement(s), Self::SingleComplement(o)) => {
Some((o - s.start()) / 3..=(o - s.start()) / 3)
}
_ => None,
}
} else {
None
}
}
pub(crate) const fn splice(&self, position: usize) -> Option<(Self, Self)> {
match self {
Self::Normal(s) => {
let mid_point = *s.start() + position * 3;
if mid_point >= *s.end() {
None
} else {
Some((
Self::Complement((*s.start())..=mid_point),
Self::Complement(mid_point..=*s.end()),
))
}
}
Self::Complement(s) => {
let mid_point = *s.end() - position * 3;
if mid_point <= *s.start() {
None
} else {
Some((
Self::Complement((*s.start())..=mid_point),
Self::Complement(mid_point..=*s.end()),
))
}
}
_ => None,
}
}
}
impl Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Complement(range) => write!(f, "c{}..{}", range.start(), range.end()),
Self::Normal(range) => write!(f, "{}..{}", range.start(), range.end()),
Self::SingleComplement(loc) => write!(f, "c{loc}"),
Self::SingleNormal(loc) => write!(f, "{loc}"),
}
}
}
impl FromStr for Location {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = s.trim();
if s.contains("join") || s.contains('^') {
return Err("Location is complex, joined or it uses ^".to_string());
}
s.strip_prefix("complement(").map_or_else(
|| {
s.split_once("..").map_or_else(
|| {
Ok(Self::SingleNormal(
s.parse()
.map_err(|err| format!("Invalid single number: {err}"))?,
))
},
|(start, end)| {
Ok(Self::Normal(
start
.trim_start_matches('<')
.parse::<usize>()
.map_err(|err| format!("Invalid start number: {err}"))?
- 1
..=end
.trim_start_matches('>')
.parse::<usize>()
.map_err(|err| format!("Invalid end number: {err}"))?
- 1,
))
},
)
},
|tail| {
tail.trim_end_matches(')').split_once("..").map_or_else(
|| {
Ok(Self::SingleComplement(
tail.trim_end_matches(')')
.parse()
.map_err(|err| format!("Invalid single number: {err}"))?,
))
},
|(start, end)| {
Ok(Self::Complement(
start
.trim_start_matches('<')
.parse::<usize>()
.map_err(|err| format!("Invalid start number: {err}"))?
- 1
..=end
.trim_start_matches('>')
.parse::<usize>()
.map_err(|err| format!("Invalid end number: {err}"))?
- 1,
))
},
)
},
)
}
}
#[derive(Clone, Eq, Hash, PartialEq)]
pub(crate) struct AASequence(pub Vec<AminoAcid>);
impl std::fmt::Debug for AASequence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}]",
self.0
.iter()
.map(|a| a.pro_forma_definition())
.collect::<String>()
)
}
}
#[derive(Debug)]
pub(crate) struct SingleSeq {
pub name: Gene,
pub allele: usize,
pub acc: String,
pub sequence: AnnotatedSequence,
pub dna: String,
}