use core::{fmt::Debug, iter::Peekable};
use elements_rs::{Isotope, isotopes::HydrogenIsotope};
mod subtokens;
pub use subtokens::*;
mod inchi_tokens;
pub use inchi_tokens::InchiToken;
use crate::{
ChargedMolecularFormulaMetadata, ChemicalFormula, ChemicalTree, SequenceNode, TokenLike,
display_charge, display_isotope, errors::ParserError, parsable::ParsableMolecularTree,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
pub enum Token<Count: CountLike, Charge: ChargeLike, Extension> {
Inchi(InchiToken<Count>),
Isotope(Isotope),
Charge(Charge),
Complex(Complex),
Radical,
OpenBracket(Bracket),
CloseBracket(Bracket),
Extension(Extension),
}
impl<Count: CountLike, Charge: ChargeLike, Extension> From<elements_rs::Element>
for Token<Count, Charge, Extension>
{
fn from(element: elements_rs::Element) -> Self {
Token::Inchi(InchiToken::Element(element))
}
}
impl<Count: CountLike, Charge: ChargeLike, Extension> From<elements_rs::Isotope>
for Token<Count, Charge, Extension>
{
fn from(isotope: elements_rs::Isotope) -> Self {
Token::Isotope(isotope)
}
}
impl<Count: CountLike, Charge: ChargeLike, Extension: Debug + Eq + Copy> TokenLike
for Token<Count, Charge, Extension>
{
type Count = Count;
fn is_mixture_separator(&self) -> bool {
match self {
Token::Inchi(token) => token.is_mixture_separator(),
_ => false,
}
}
fn mixture_separator() -> Self {
Token::Inchi(InchiToken::mixture_separator())
}
fn as_count(&self) -> Option<Count> {
match self {
Token::Inchi(token) => token.as_count(),
_ => None,
}
}
fn as_element(&self) -> Option<elements_rs::Element> {
match self {
Token::Inchi(token) => token.as_element(),
_ => None,
}
}
}
impl<Count: CountLike, Charge: ChargeLike, Extension: Debug + Eq + Copy> From<HydrogenIsotope>
for Token<Count, Charge, Extension>
{
fn from(hydrogen_isotope: HydrogenIsotope) -> Self {
Token::Isotope(hydrogen_isotope.into())
}
}
pub(crate) struct Tokens<I: Iterator<Item = char>, M: ChargedMolecularFormulaMetadata, Extension>
where
Extension: TryFrom<char> + Debug,
{
stream: core::iter::Peekable<SubTokens<I, M, Extension>>,
}
impl<I: Iterator<Item = char>, M: ChargedMolecularFormulaMetadata, Extension> From<Peekable<I>>
for Tokens<I, M, Extension>
where
Extension: TryFrom<char> + Debug,
{
fn from(iter: Peekable<I>) -> Self {
Self { stream: SubTokens::from(iter).peekable() }
}
}
impl<I: Iterator<Item = char>, M: ChargedMolecularFormulaMetadata, Extension: Debug + Copy + Eq>
Iterator for Tokens<I, M, Extension>
where
Isotope: TryFrom<(elements_rs::Element, M::Count), Error = elements_rs::errors::Error>,
Extension: TryFrom<char>,
{
type Item = Result<Token<M::Count, M::Charge, Extension>, ParserError>;
fn next(&mut self) -> Option<Self::Item> {
let next_subtoken = match self.stream.next() {
Some(Ok(subtoken)) => subtoken,
Some(Err(e)) => return Some(Err(e)),
None => return None,
};
Some(Ok(match next_subtoken {
SubToken::Inchi(token) => Token::Inchi(token),
SubToken::HydrogenIsotope(isotope) => isotope.into(),
SubToken::Charge(charge) => Token::Charge(charge),
SubToken::Complex(complex) => Token::Complex(complex),
SubToken::Radical => Token::Radical,
SubToken::OpenBracket(bracket) => Token::OpenBracket(bracket),
SubToken::CloseBracket(bracket) => Token::CloseBracket(bracket),
SubToken::Extension(extension) => Token::Extension(extension),
SubToken::SuperscriptDigit(candidate_isotopic_number) => {
let next = match self.stream.next() {
Some(Ok(subtoken)) => subtoken,
Some(Err(e)) => return Some(Err(e)),
None => {
return Some(Err(ParserError::UnexpectedEndOfInput));
}
};
if let SubToken::Inchi(InchiToken::Element(element)) = next {
match Isotope::try_from((element, candidate_isotopic_number)) {
Ok(isotope) => isotope.into(),
Err(err) => {
return Some(Err(err.into()));
}
}
} else {
return Some(Err(ParserError::UnprocessableNumber));
}
}
}))
}
}
impl<Count: CountLike, Charge: ChargeLike, Extension: Copy + Debug + Eq>
ParsableMolecularTree<Count> for ChemicalTree<Count, Charge, Extension>
where
Isotope: TryFrom<(elements_rs::Element, Count), Error = elements_rs::errors::Error>,
Charge: TryFrom<Count>,
Extension: TryFrom<char>,
{
type Token = Token<Count, Charge, Extension>;
type Tokens<I>
= Tokens<I, ChemicalFormula<Count, Charge>, Extension>
where
I: Iterator<Item = char>;
#[inline]
fn empty() -> Self {
ChemicalTree::Sequence(SequenceNode::empty())
}
#[inline]
fn is_empty(&self) -> bool {
match self {
ChemicalTree::Sequence(sequence) => sequence.is_empty(),
_ => false,
}
}
#[inline]
fn element(self, element: elements_rs::Element) -> Self {
self.push(Self::Element(element))
}
}
impl<Count, Charge, Extension> core::fmt::Display for Token<Count, Charge, Extension>
where
Count: CountLike + core::fmt::Display,
Charge: ChargeLike + core::fmt::Display,
Extension: core::fmt::Display,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Token::Inchi(t) => write!(f, "{t}"),
Token::Isotope(iso) => display_isotope(*iso, f),
Token::Charge(c) => display_charge(*c, f),
Token::Complex(c) => write!(f, "{c}"),
Token::Radical => write!(f, "."), Token::OpenBracket(b) => write!(f, "{}", b.opening()),
Token::CloseBracket(b) => write!(f, "{}", b.closing()),
Token::Extension(e) => write!(f, "{e}"),
}
}
}
#[cfg(test)]
mod tests {
use alloc::format;
use elements_rs::Element;
use super::*;
#[test]
fn test_display() {
let element = Token::<u32, i32, char>::from(Element::C);
assert_eq!(format!("{element}"), "C");
let count = Token::<u32, i32, char>::Inchi(InchiToken::Count(42));
assert_eq!(format!("{count}"), "42");
let dot = Token::<u32, i32, char>::Inchi(InchiToken::Dot);
assert_eq!(format!("{dot}"), ".");
let isotope =
Token::<u32, i32, char>::from(Isotope::try_from((Element::C, 13_u16)).unwrap());
assert_eq!(format!("{isotope}"), "[¹³C]");
let charge = Token::<u32, i32, char>::Charge(2);
assert_eq!(format!("{charge}"), "²⁺");
let complex = Token::<u32, i32, char>::Complex(Complex::Methyl);
assert_eq!(format!("{complex}"), "Me");
let radical = Token::<u32, i32, char>::Radical;
assert_eq!(format!("{radical}"), ".");
let open = Token::<u32, i32, char>::OpenBracket(Bracket::Round);
assert_eq!(format!("{open}"), "(");
let close = Token::<u32, i32, char>::CloseBracket(Bracket::Square);
assert_eq!(format!("{close}"), "]");
let ext = Token::<u32, i32, char>::Extension('X');
assert_eq!(format!("{ext}"), "X");
}
#[cfg(feature = "fuzzing")]
#[test]
#[allow(clippy::cast_possible_truncation)]
fn test_arbitrary() {
use arbitrary::{Arbitrary, Unstructured};
for i in 0u8..16 {
let mut raw_data = [0u8; 256];
for (j, byte) in raw_data.iter_mut().enumerate() {
*byte = j.wrapping_add(i as usize).wrapping_mul(31) as u8;
}
let mut u = Unstructured::new(&raw_data);
while Token::<u32, i32, char>::arbitrary(&mut u).is_ok() {
if u.is_empty() {
break;
}
}
}
}
}