use std::str::FromStr;
use context_error::*;
use serde::{Deserialize, Serialize};
use thin_vec::ThinVec;
use crate::{
ontology::Ontology,
parse_json::{ParseJson, use_serde},
sequence::{
AminoAcid, Modification, SequenceElement, SequencePosition, SimpleModificationInner,
},
space::{Space, UsedSpace},
};
#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
pub enum PlacementRule {
AminoAcid(ThinVec<AminoAcid>, Position),
PsiModification(u32, Position),
Terminal(Position),
Anywhere,
}
impl Space for PlacementRule {
fn space(&self) -> UsedSpace {
(UsedSpace::stack(1)
+ match self {
Self::AminoAcid(a, p) => a.space() + p.space(),
Self::PsiModification(i, p) => i.space() + p.space(),
Self::Terminal(p) => p.space(),
Self::Anywhere => UsedSpace::default(),
})
.set_total::<Self>()
}
}
impl ParseJson for PlacementRule {
fn from_json_value(value: serde_json::Value) -> Result<Self, BoxedError<'static, BasicKind>> {
use_serde(value)
}
}
#[derive(
Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default, Serialize, Deserialize,
)]
pub enum Position {
#[default]
Anywhere,
AnyNTerm,
AnyCTerm,
ProteinNTerm,
ProteinCTerm,
}
impl std::fmt::Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Self::Anywhere => "Anywhere",
Self::AnyNTerm => "AnyNTerm",
Self::AnyCTerm => "AnyCTerm",
Self::ProteinNTerm => "ProteinNTerm",
Self::ProteinCTerm => "ProteinCTerm",
},
)
}
}
impl PlacementRule {
pub fn is_possible<T>(&self, seq: &SequenceElement<T>, position: SequencePosition) -> bool {
match self {
Self::AminoAcid(aa, r_pos) => {
aa.iter().any(|a| *a == seq.aminoacid.aminoacid()) && r_pos.is_possible(position)
}
Self::PsiModification(mod_index, r_pos) => {
seq.modifications.iter().any(|m| {
if let Modification::Simple(sim) = m {
if let SimpleModificationInner::Database { id, .. } = &**sim
&& id.ontology == Ontology::Psimod
&& let Some(i) = id.id()
{
i == *mod_index
} else {
false
}
} else {
false
}
}) && r_pos.is_possible(position)
}
Self::Terminal(r_pos) => {
r_pos.is_possible(position)
&& (position == SequencePosition::NTerm || position == SequencePosition::CTerm)
}
Self::Anywhere => true,
}
}
pub fn is_possible_aa(&self, aa: AminoAcid, position: Position) -> bool {
match self {
Self::AminoAcid(allowed_aa, r_pos) => {
allowed_aa.contains(&aa) && r_pos.is_possible_position(position)
}
Self::PsiModification(_, _) => false,
Self::Terminal(r_pos) => {
r_pos.is_possible_position(position) && (position != Position::Anywhere)
}
Self::Anywhere => true,
}
}
pub fn any_possible<T>(
rules: &[Self],
seq: &SequenceElement<T>,
position: SequencePosition,
) -> bool {
rules.iter().any(|r| r.is_possible(seq, position))
}
pub fn any_possible_aa(rules: &[Self], aa: AminoAcid, position: Position) -> bool {
rules.iter().any(|r| r.is_possible_aa(aa, position))
}
}
impl FromStr for PlacementRule {
type Err = BoxedError<'static, BasicKind>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Some((head, tail)) = s.split_once('@') {
let aa: Vec<AminoAcid> = head
.chars()
.enumerate()
.map(|(i, c)| {
AminoAcid::try_from(c).map_err(|()| {
BoxedError::new(
BasicKind::Error,
"Invalid amino acid",
"Invalid amino acid in specified amino acids in placement rule",
Context::line(None, s, i, 1).to_owned(),
)
})
})
.collect::<Result<Vec<_>, _>>()?;
tail.parse().map_or_else(
|()| {
Err(BoxedError::new(BasicKind::Error,
"Invalid position",
"Use any of the following for the position: Anywhere, AnyNTerm, ProteinNTerm, AnyCTerm, ProteinCTerm",
Context::line(None, s, head.len() + 1, tail.len()).to_owned(),
))
},
|position| Ok(Self::AminoAcid(aa.into(), position)),
)
} else if let Ok(position) = s.parse() {
Ok(match position {
Position::Anywhere => Self::Anywhere,
pos => Self::Terminal(pos),
})
} else {
Err(BoxedError::new(
BasicKind::Error,
"Invalid position",
"Use any of the following for the position: Anywhere, AnyNTerm, ProteinNTerm, AnyCTerm, ProteinCTerm",
Context::full_line(0, s).to_owned(),
))
}
}
}
impl Position {
pub fn is_possible(self, position: SequencePosition) -> bool {
match self {
Self::Anywhere => true,
Self::AnyNTerm | Self::ProteinNTerm => position == SequencePosition::NTerm,
Self::AnyCTerm | Self::ProteinCTerm => position == SequencePosition::CTerm,
}
}
pub fn is_possible_position(self, position: Self) -> bool {
match self {
Self::Anywhere => true,
Self::AnyNTerm => position == Self::AnyNTerm || position == Self::ProteinNTerm,
Self::ProteinNTerm => position == Self::ProteinNTerm,
Self::AnyCTerm => position == Self::AnyCTerm || position == Self::ProteinCTerm,
Self::ProteinCTerm => position == Self::ProteinCTerm,
}
}
}
impl FromStr for Position {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().as_str() {
"" | "anywhere" => Ok(Self::Anywhere),
"anynterm" | "anyn-term" | "any n-term" => Ok(Self::AnyNTerm),
"proteinnterm" | "proteinn-term" | "protein n-term" => Ok(Self::ProteinNTerm),
"anycterm" | "anyc-term" | "any c-term" => Ok(Self::AnyCTerm),
"proteincterm" | "proteinc-term" | "protein c-term" => Ok(Self::ProteinCTerm),
_ => Err(()),
}
}
}
#[cfg(test)]
#[expect(clippy::missing_panics_doc)]
mod tests {
use crate::sequence::{CheckedAminoAcid, RulePossible};
use super::*;
#[test]
fn multi_level_rule() {
let ontologies = &crate::ontology::STATIC_ONTOLOGIES;
assert!(
!PlacementRule::PsiModification(30, Position::Anywhere).is_possible(
&SequenceElement::new(CheckedAminoAcid::Alanine, None),
SequencePosition::Index(0)
),
"Multi level mod cannot be placed if the dependent mod is not present"
);
let mut seq = SequenceElement::new(CheckedAminoAcid::Alanine, None);
seq.modifications
.push(ontologies.psimod().get_by_index(&30).unwrap().into());
assert!(
PlacementRule::PsiModification(30, Position::Anywhere)
.is_possible(&seq, SequencePosition::Index(0)),
"Multi level mod can be placed if the dependent mod is present"
);
}
#[test]
fn place_anywhere() {
let ontologies = &crate::ontology::STATIC_ONTOLOGIES;
assert!(
PlacementRule::AminoAcid(vec![AminoAcid::Glutamine].into(), Position::Anywhere)
.is_possible(
&SequenceElement::new(CheckedAminoAcid::Q, None),
SequencePosition::NTerm
),
"start"
);
assert!(
PlacementRule::AminoAcid(vec![AminoAcid::Glutamine].into(), Position::Anywhere)
.is_possible(
&SequenceElement::new(CheckedAminoAcid::Q, None),
SequencePosition::Index(2)
),
"middle"
);
assert!(
PlacementRule::AminoAcid(vec![AminoAcid::Glutamine].into(), Position::Anywhere)
.is_possible(
&SequenceElement::new(CheckedAminoAcid::Q, None),
SequencePosition::CTerm
),
"end"
);
assert_eq!(
ontologies.unimod().get_by_index(&7).unwrap().is_possible(
&SequenceElement::new(CheckedAminoAcid::Q, None),
SequencePosition::CTerm
),
RulePossible::Symmetric(std::collections::BTreeSet::from([0])),
"unimod deamidated at end"
);
}
}