Skip to main content

math_core/
lib.rs

1//! Convert LaTeX math to MathML Core.
2//!
3//! For more background on what that means and on what to do with the resulting MathML code,
4//! see the repo's README: https://github.com/tmke8/math-core
5//!
6//! # Usage
7//!
8//! The main struct of this library is [`LatexToMathML`]. In order to use the library, create an
9//! instance of this struct and then call one of the convert functions. The constructor of the
10//! struct expects a config object in the form of an instance of [`MathCoreConfig`].
11//!
12//! Basic use looks like this:
13//!
14//! ```rust
15//! use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
16//!
17//! let latex = r#"\erf ( x ) = \frac{ 2 }{ \sqrt{ \pi } } \int_0^x e^{- t^2} \, dt"#;
18//! let config = MathCoreConfig::default();
19//! let converter = LatexToMathML::new(config).unwrap();
20//! let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
21//! println!("{}", mathml);
22//! ```
23//!
24//! # Features
25//!
26//! - `serde`: With this feature, `MathCoreConfig` implements serde's `Deserialize`.
27//!
28mod atof;
29mod character_class;
30mod color_defs;
31mod commands;
32mod environments;
33mod error;
34mod html_utils;
35mod lexer;
36mod parser;
37mod predefined;
38mod specifications;
39mod text_parser;
40mod token;
41mod token_queue;
42
43use rustc_hash::FxHashMap;
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Serialize};
46
47use mathml_renderer::{arena::Arena, ast::Node};
48
49pub use self::error::{LatexErrKind, LatexError};
50pub use self::token::Token;
51use self::{lexer::Lexer, parser::Parser};
52
53/// Display mode for the LaTeX math equations.
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum MathDisplay {
56    /// For inline equations, like those in `$...$` in LaTeX.
57    Inline,
58    /// For block equations (or "display style" equations), like those in `$$...$$` in LaTeX.
59    Block,
60}
61
62/// Configuration for pretty-printing the MathML output.
63///
64/// Pretty-printing means that newlines and indentation is added to the MathML output, to make it
65/// easier to read.
66#[derive(Debug, Clone, Copy, Default)]
67#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
68#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
69#[non_exhaustive]
70pub enum PrettyPrint {
71    /// Never pretty print.
72    #[default]
73    Never,
74    /// Always pretty print.
75    Always,
76    /// Pretty print for block equations only.
77    Auto,
78}
79
80/// Configuration object for the LaTeX to MathML conversion.
81///
82/// # Example usage
83///
84/// ```rust
85/// use math_core::{MathCoreConfig, PrettyPrint};
86///
87/// // Default values
88/// let config = MathCoreConfig::default();
89///
90/// // Specifying pretty-print behavior
91/// let config = MathCoreConfig {
92///     pretty_print: PrettyPrint::Always,
93///     ..Default::default()
94///  };
95///
96/// // Specifying pretty-print behavior and custom macros
97/// let macros = vec![
98///     ("d".to_string(), r"\mathrm{d}".to_string()),
99///     ("bb".to_string(), r"\mathbb{#1}".to_string()), // with argument
100/// ];
101/// let config = MathCoreConfig {
102///     pretty_print: PrettyPrint::Auto,
103///     macros,
104///     ..Default::default()
105/// };
106/// ```
107///
108#[derive(Debug, Default)]
109#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
110#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
111pub struct MathCoreConfig {
112    /// A configuration for pretty-printing the MathML output. See [`PrettyPrint`] for details.
113    pub pretty_print: PrettyPrint,
114    /// A list of LaTeX macros; each tuple contains (macro_name, macro_definition).
115    #[cfg_attr(feature = "serde", serde(with = "tuple_vec_map"))]
116    pub macros: Vec<(String, String)>,
117    /// If `true`, include `xmlns="http://www.w3.org/1998/Math/MathML"` in the `<math>` tag.
118    pub xml_namespace: bool,
119    /// If `true`, unknown commands will be rendered as red text in the output, instead of
120    /// returning an error.
121    pub ignore_unknown_commands: bool,
122}
123
124#[derive(Debug, Default)]
125struct CommandConfig {
126    custom_cmd_tokens: Vec<Token<'static>>,
127    custom_cmd_map: FxHashMap<String, (u8, (usize, usize))>,
128    ignore_unknown_commands: bool,
129}
130
131impl CommandConfig {
132    pub fn get_command<'config>(&'config self, command: &str) -> Option<Token<'config>> {
133        let (num_args, slice) = *self.custom_cmd_map.get(command)?;
134        let tokens = self.custom_cmd_tokens.get(slice.0..slice.1)?;
135        Some(Token::CustomCmd(num_args, tokens))
136    }
137}
138
139/// This struct contains those fields from `MathCoreConfig` that are simple flags.
140#[derive(Debug, Default)]
141struct Flags {
142    pretty_print: PrettyPrint,
143    xml_namespace: bool,
144}
145
146impl From<&MathCoreConfig> for Flags {
147    fn from(config: &MathCoreConfig) -> Self {
148        // TODO: can we use a macro here to avoid repeating the field names?
149        Self {
150            pretty_print: config.pretty_print,
151            xml_namespace: config.xml_namespace,
152        }
153    }
154}
155
156/// A converter that transforms LaTeX math equations into MathML Core.
157#[derive(Debug, Default)]
158pub struct LatexToMathML {
159    flags: Flags,
160    /// This is used for numbering equations in the document.
161    equation_count: u16,
162    cmd_cfg: Option<CommandConfig>,
163}
164
165impl LatexToMathML {
166    /// Create a new `LatexToMathML` converter with the given configuration.
167    ///
168    /// This function returns an error if the custom macros in the given configuration could not
169    /// be parsed. The error contains both the parsing error and the macro definition that caused
170    /// the error.
171    pub fn new(config: MathCoreConfig) -> Result<Self, (Box<LatexError>, String)> {
172        Ok(Self {
173            flags: Flags::from(&config),
174            equation_count: 0,
175            cmd_cfg: Some(parse_custom_commands(
176                config.macros,
177                config.ignore_unknown_commands,
178            )?),
179        })
180    }
181
182    /// Convert LaTeX text to MathML with a global equation counter.
183    ///
184    /// For basic usage, see the documentation of [`convert_with_local_counter`].
185    ///
186    /// This conversion function maintains state, in order to count equations correctly across
187    /// different calls to this function.
188    ///
189    /// The counter can be reset with [`reset_global_counter`].
190    pub fn convert_with_global_counter(
191        &mut self,
192        latex: &str,
193        display: MathDisplay,
194    ) -> Result<String, Box<LatexError>> {
195        convert(
196            latex,
197            display,
198            self.cmd_cfg.as_ref(),
199            &mut self.equation_count,
200            &self.flags,
201        )
202    }
203
204    /// Convert LaTeX text to MathML.
205    ///
206    /// The second argument specifies whether it is inline-equation or block-equation.
207    ///
208    /// ```rust
209    /// use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
210    ///
211    /// let latex = r#"(n + 1)! = \Gamma ( n + 1 )"#;
212    /// let config = MathCoreConfig::default();
213    /// let converter = LatexToMathML::new(config).unwrap();
214    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Inline).unwrap();
215    /// println!("{}", mathml);
216    ///
217    /// let latex = r#"x = \frac{ - b \pm \sqrt{ b^2 - 4 a c } }{ 2 a }"#;
218    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
219    /// println!("{}", mathml);
220    /// ```
221    ///
222    #[inline]
223    pub fn convert_with_local_counter(
224        &self,
225        latex: &str,
226        display: MathDisplay,
227    ) -> Result<String, Box<LatexError>> {
228        let mut equation_count = 0;
229        convert(
230            latex,
231            display,
232            self.cmd_cfg.as_ref(),
233            &mut equation_count,
234            &self.flags,
235        )
236    }
237
238    /// Reset the equation counter to zero.
239    ///
240    /// This should normally be done at the beginning of a new document or section.
241    pub fn reset_global_counter(&mut self) {
242        self.equation_count = 0;
243    }
244}
245
246fn convert(
247    latex: &str,
248    display: MathDisplay,
249    cmd_cfg: Option<&CommandConfig>,
250    equation_count: &mut u16,
251    flags: &Flags,
252) -> Result<String, Box<LatexError>> {
253    let arena = Arena::new();
254    let ast = parse(latex, &arena, cmd_cfg, equation_count)?;
255
256    let mut output = String::new();
257    output.push_str("<math");
258    if flags.xml_namespace {
259        output.push_str(" xmlns=\"http://www.w3.org/1998/Math/MathML\"");
260    }
261    if matches!(display, MathDisplay::Block) {
262        output.push_str(" display=\"block\"");
263    };
264    output.push('>');
265
266    let pretty_print = matches!(flags.pretty_print, PrettyPrint::Always)
267        || (matches!(flags.pretty_print, PrettyPrint::Auto) && display == MathDisplay::Block);
268
269    let base_indent = if pretty_print { 1 } else { 0 };
270    for node in ast {
271        // We ignore the result of `emit` here, because the only possible error is a formatting
272        // error when writing to the string, and that can only happen if the string's `write_str`
273        // implementation returns an error. Since `String`'s `write_str` implementation never
274        // returns an error, we can safely ignore the result of `emit`.
275        let _ = node.emit(&mut output, base_indent);
276    }
277    if pretty_print {
278        output.push('\n');
279    }
280    output.push_str("</math>");
281    Ok(output)
282}
283
284fn parse<'arena, 'source, 'config>(
285    latex: &'source str,
286    arena: &'arena Arena,
287    cmd_cfg: Option<&'config CommandConfig>,
288    equation_count: &mut u16,
289) -> Result<Vec<&'arena Node<'arena>>, Box<LatexError>>
290where
291    'config: 'source,
292    'source: 'arena,
293{
294    let lexer = Lexer::new(latex, false, cmd_cfg);
295    let mut p = Parser::new(lexer, arena, equation_count)?;
296    let nodes = p.parse()?;
297    Ok(nodes)
298}
299
300fn parse_custom_commands(
301    macros: Vec<(String, String)>,
302    ignore_unknown_commands: bool,
303) -> Result<CommandConfig, (Box<LatexError>, String)> {
304    let mut map = FxHashMap::with_capacity_and_hasher(macros.len(), Default::default());
305    let mut tokens = Vec::new();
306    for (name, definition) in macros {
307        if !is_valid_macro_name(name.as_str()) {
308            return Err((
309                Box::new(LatexError(0..0, LatexErrKind::InvalidMacroName(name))),
310                definition,
311            ));
312        }
313
314        // In order to be able to return `definition` in case of an error, we need to ensure
315        // that the lexer (which borrows `definition`) is dropped before we return the error.
316        // Therefore, we put the whole lexing process into its own block.
317        let value = 'value: {
318            let mut lexer: Lexer<'static, '_> = Lexer::new(definition.as_str(), true, None);
319            let start = tokens.len();
320            loop {
321                match lexer.next_token_no_unknown_command() {
322                    Ok(tokloc) => {
323                        if matches!(tokloc.token(), Token::Eof) {
324                            break;
325                        }
326                        tokens.push(tokloc.into_token());
327                    }
328                    Err(err) => {
329                        break 'value Err(err);
330                    }
331                }
332            }
333            let end = tokens.len();
334            let num_args = lexer.parse_cmd_args().unwrap_or(0);
335            Ok((num_args, (start, end)))
336        };
337
338        match value {
339            Err(err) => {
340                return Err((err, definition));
341            }
342            Ok(v) => {
343                map.insert(name, v);
344            }
345        };
346    }
347    Ok(CommandConfig {
348        custom_cmd_tokens: tokens,
349        custom_cmd_map: map,
350        ignore_unknown_commands,
351    })
352}
353
354fn is_valid_macro_name(s: &str) -> bool {
355    if s.is_empty() {
356        return false;
357    }
358    let mut chars = s.chars();
359    match (chars.next(), chars.next()) {
360        // If the name contains only one character, any character is valid.
361        (Some(_), None) => true,
362        // If the name contains more than one character, all characters must be ASCII alphabetic.
363        _ => s.bytes().all(|b| b.is_ascii_alphabetic()),
364    }
365}