use-chemical-formula 0.1.0

Chemical formula primitives and lightweight parsing for RustUse
Documentation
use crate::{
    ChemicalFormula, ElementCount, ElementSymbol, FormulaGroup, FormulaMultiplier,
    FormulaParseError, FormulaPart, FormulaTerm, HydratePart,
};

pub(crate) fn parse_formula(input: &str) -> Result<ChemicalFormula, FormulaParseError> {
    let trimmed = input.trim();
    if trimmed.is_empty() {
        return Err(FormulaParseError::EmptyFormula);
    }

    let mut parser = Parser::new(trimmed);
    let main_part = parser.parse_part(false)?;
    let mut hydrate_parts = Vec::new();

    while let Some(separator) = parser.peek() {
        if !is_hydrate_separator(separator) {
            return Err(FormulaParseError::UnexpectedCharacter(separator));
        }

        parser.advance();
        if parser.peek().is_none() {
            return Err(FormulaParseError::TrailingSeparator);
        }

        let multiplier = parser.parse_optional_multiplier()?.unwrap_or_default();
        let part = parser.parse_part(false)?;
        hydrate_parts.push(HydratePart::new(multiplier, part));
    }

    Ok(ChemicalFormula::new(main_part, hydrate_parts))
}

struct Parser {
    characters: Vec<char>,
    index: usize,
}

impl Parser {
    fn new(input: &str) -> Self {
        Self {
            characters: input.chars().collect(),
            index: 0,
        }
    }

    fn peek(&self) -> Option<char> {
        self.characters.get(self.index).copied()
    }

    fn advance(&mut self) -> Option<char> {
        let character = self.peek()?;
        self.index += 1;
        Some(character)
    }

    fn parse_part(&mut self, stop_at_group_close: bool) -> Result<FormulaPart, FormulaParseError> {
        let mut terms = Vec::new();

        while let Some(character) = self.peek() {
            if is_hydrate_separator(character) {
                break;
            }

            match character {
                ')' if stop_at_group_close => break,
                ')' => return Err(FormulaParseError::UnmatchedCloseGroup),
                '(' => terms.push(FormulaTerm::group(self.parse_group()?)),
                character if character.is_ascii_uppercase() => {
                    terms.push(self.parse_element_term()?);
                },
                character if character.is_ascii_lowercase() => {
                    return Err(FormulaParseError::InvalidSymbol(character.to_string()));
                },
                character => return Err(FormulaParseError::UnexpectedCharacter(character)),
            }
        }

        FormulaPart::new(terms).map_err(Into::into)
    }

    fn parse_group(&mut self) -> Result<FormulaGroup, FormulaParseError> {
        self.advance();

        if self.peek().is_none() {
            return Err(FormulaParseError::UnmatchedOpenGroup);
        }

        if self.peek() == Some(')') {
            return Err(FormulaParseError::EmptyGroup);
        }

        let part = self.parse_part(true)?;

        match self.peek() {
            Some(')') => {
                self.advance();
            },
            None => return Err(FormulaParseError::UnmatchedOpenGroup),
            Some(character) => return Err(FormulaParseError::UnexpectedCharacter(character)),
        }

        let multiplier = self.parse_optional_multiplier()?.unwrap_or_default();
        FormulaGroup::new(part.terms().to_vec(), multiplier).map_err(Into::into)
    }

    fn parse_element_term(&mut self) -> Result<FormulaTerm, FormulaParseError> {
        let symbol = self.parse_symbol()?;
        let count = self.parse_optional_count()?.unwrap_or_default();
        Ok(FormulaTerm::element(symbol, count))
    }

    fn parse_symbol(&mut self) -> Result<ElementSymbol, FormulaParseError> {
        let Some(first) = self.advance() else {
            return Err(FormulaParseError::UnexpectedEnd);
        };

        if !first.is_ascii_uppercase() {
            return Err(FormulaParseError::InvalidSymbol(first.to_string()));
        }

        let mut symbol = first.to_string();
        if let Some(second) = self.peek()
            && second.is_ascii_lowercase()
        {
            symbol.push(second);
            self.advance();
        }

        ElementSymbol::new(&symbol).map_err(Into::into)
    }

    fn parse_optional_count(&mut self) -> Result<Option<ElementCount>, FormulaParseError> {
        let Some(number) = self.parse_optional_number()? else {
            return Ok(None);
        };

        Ok(Some(ElementCount::new(number)?))
    }

    fn parse_optional_multiplier(
        &mut self,
    ) -> Result<Option<FormulaMultiplier>, FormulaParseError> {
        let Some(number) = self.parse_optional_number()? else {
            return Ok(None);
        };

        Ok(Some(FormulaMultiplier::new(number)?))
    }

    fn parse_optional_number(&mut self) -> Result<Option<u32>, FormulaParseError> {
        let mut number = String::new();

        while let Some(character) = self.peek() {
            if character.is_ascii_digit() {
                number.push(character);
                self.advance();
            } else {
                break;
            }
        }

        if number.is_empty() {
            return Ok(None);
        }

        number
            .parse::<u32>()
            .map(Some)
            .map_err(|_| FormulaParseError::InvalidNumber(number))
    }
}

fn is_hydrate_separator(character: char) -> bool {
    matches!(character, '.' | 'ยท')
}