Skip to main content

ratex_parser/mhchem/
mod.rs

1//! mhchem (`\ce`, `\pu`): pure Rust port of KaTeX mhchem 3.3.0.
2//!
3//! Data (`machines.json`, `patterns_regex.json`) is generated from `tools/mhchem_reference.js`;
4//! update workflow: `docs/MHCHEM_DATA.md`.
5
6mod actions;
7mod buffer;
8mod data;
9mod engine;
10mod error;
11mod json;
12mod patterns;
13mod texify;
14
15pub use data::data;
16pub use error::{MhchemError, MhchemResult};
17
18use crate::mhchem::data::MhchemData;
19use serde_json::Value;
20
21/// Context for recursive `go` (used by actions).
22pub struct ParserCtx<'a> {
23    pub data: &'a MhchemData,
24}
25
26impl ParserCtx<'_> {
27    pub fn go(&self, input: &str, machine: &str) -> MhchemResult<Vec<Value>> {
28        engine::go_machine(self, input, machine)
29    }
30}
31
32/// Parse `\ce` / `\pu` argument to TeX fragment (wrap `\mathrm` etc. is done here).
33pub fn chem_parse_str(input: &str, mode: &str) -> MhchemResult<String> {
34    let d = data();
35    let ctx = ParserCtx { data: d };
36    let sm = match mode {
37        "ce" => "ce",
38        "pu" => "pu",
39        _ => {
40            return Err(MhchemError::msg(format!(
41                "unknown mhchem mode (expected ce|pu): {mode}"
42            )));
43        }
44    };
45    let ast = ctx.go(input.trim(), sm)?;
46    texify::go(&ast, false)
47}
48
49/// Rebuild a macro argument string from tokens ([KaTeX `chemParse`]).
50pub fn mhchem_arg_tokens_to_string(tokens: &[ratex_lexer::token::Token]) -> String {
51    if tokens.is_empty() {
52        return String::new();
53    }
54    let mut expected_loc = tokens.last().unwrap().loc.start;
55    let mut out = String::new();
56    for i in (0..tokens.len()).rev() {
57        let t = &tokens[i];
58        if t.loc.start > expected_loc {
59            out.push(' ');
60            expected_loc = t.loc.start;
61        }
62        out.push_str(&t.text);
63        expected_loc += t.text.len();
64    }
65    out
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn h2o_ce() {
74        let t = chem_parse_str("H2O", "ce").expect("mhchem");
75        assert!(!t.is_empty());
76        assert!(t.contains('H'));
77    }
78
79    #[test]
80    fn reaction_arrow() {
81        let t = chem_parse_str("2H + O -> H2O", "ce").expect("mhchem");
82        assert!(
83            t.contains("rightarrow") || t.contains("->"),
84            "{}",
85            t
86        );
87    }
88
89    #[test]
90    fn pu_simple() {
91        let t = chem_parse_str("123 kJ/mol", "pu").expect("mhchem");
92        assert!(!t.is_empty());
93    }
94
95    #[test]
96    fn pu_scientific_lowercase_e_cdot_uppercase_e_times() {
97        for src in ["1.2e3 kJ", "1,2e3 kJ"] {
98            let t = chem_parse_str(src, "pu").expect("mhchem");
99            assert!(
100                t.contains("\\cdot") && t.contains("10^{3}") && !t.contains("\\times"),
101                "expected \\cdot for lowercase e: {src:?} → {t:?}"
102            );
103        }
104        for src in ["1.2E3 kJ", "1,2E3 kJ"] {
105            let t = chem_parse_str(src, "pu").expect("mhchem");
106            assert!(
107                t.contains("\\times") && t.contains("10^{3}") && !t.contains("\\cdot"),
108                "expected \\times for uppercase E: {src:?} → {t:?}"
109            );
110        }
111    }
112
113    #[test]
114    fn dollar_underset_inner_ce_tex_is_valid_latex() {
115        let inner = r"$\underset{\mathrm{red}}{\ce{HgI2}}$";
116        let tex = chem_parse_str(inner, "ce").expect("mhchem");
117        crate::parser::parse(&tex).expect("mhchem TeX should parse");
118    }
119
120}