Skip to main content

math_core/
lib.rs

1//! Convert LaTeX math to MathML Core.
2//!
3//! For more background on what that means and on what to do with the resulting MathML code,
4//! see the repo's README: https://github.com/tmke8/math-core
5//!
6//! # Usage
7//!
8//! The main struct of this library is [`LatexToMathML`]. In order to use the library, create an
9//! instance of this struct and then call one of the convert functions. The constructor of the
10//! struct expects a config object in the form of an instance of [`MathCoreConfig`].
11//!
12//! Basic use looks like this:
13//!
14//! ```rust
15//! use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
16//!
17//! let latex = r#"\erf ( x ) = \frac{ 2 }{ \sqrt{ \pi } } \int_0^x e^{- t^2} \, dt"#;
18//! let config = MathCoreConfig::default();
19//! let converter = LatexToMathML::new(config).unwrap();
20//! let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
21//! println!("{}", mathml);
22//! ```
23//!
24//! # Features
25//!
26//! - `serde`: With this feature, `MathCoreConfig` implements serde's `Deserialize`.
27//!
28mod atof;
29mod character_class;
30mod color_defs;
31mod commands;
32mod environments;
33mod error;
34mod html_utils;
35mod lexer;
36mod parser;
37mod predefined;
38mod specifications;
39mod split_on_ascii;
40mod text_parser;
41mod token;
42mod token_queue;
43
44use std::num::NonZeroU16;
45
46use rustc_hash::{FxBuildHasher, FxHashMap};
47#[cfg(feature = "serde")]
48use serde::{Deserialize, Serialize};
49
50use mathml_renderer::{arena::Arena, ast::Node, attribute::Style, fmt::new_line_and_indent};
51
52pub use self::error::LatexError;
53use self::{error::LatexErrKind, lexer::Lexer, parser::Parser, token::Token};
54
55/// Display mode for the LaTeX math equations.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum MathDisplay {
58    /// For inline equations, like those in `$...$` in LaTeX.
59    Inline,
60    /// For block equations (or "display style" equations), like those in `$$...$$` in LaTeX.
61    Block,
62}
63
64/// Configuration for pretty-printing the MathML output.
65///
66/// Pretty-printing means that newlines and indentation is added to the MathML output, to make it
67/// easier to read.
68#[derive(Debug, Clone, Copy, Default)]
69#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
70#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
71#[non_exhaustive]
72pub enum PrettyPrint {
73    /// Never pretty print.
74    #[default]
75    Never,
76    /// Always pretty print.
77    Always,
78    /// Pretty print for block equations only.
79    Auto,
80}
81
82/// Configuration object for the LaTeX to MathML conversion.
83///
84/// # Example usage
85///
86/// ```rust
87/// use math_core::{MathCoreConfig, PrettyPrint};
88///
89/// // Default values
90/// let config = MathCoreConfig::default();
91///
92/// // Specifying pretty-print behavior
93/// let config = MathCoreConfig {
94///     pretty_print: PrettyPrint::Always,
95///     ..Default::default()
96///  };
97///
98/// // Specifying pretty-print behavior and custom macros
99/// let macros = vec![
100///     ("d".to_string(), r"\mathrm{d}".to_string()),
101///     ("bb".to_string(), r"\mathbb{#1}".to_string()), // with argument
102/// ];
103/// let config = MathCoreConfig {
104///     pretty_print: PrettyPrint::Auto,
105///     macros,
106///     ..Default::default()
107/// };
108/// ```
109///
110#[derive(Debug, Default)]
111#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
112#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
113pub struct MathCoreConfig {
114    /// A configuration for pretty-printing the MathML output. See [`PrettyPrint`] for details.
115    pub pretty_print: PrettyPrint,
116    /// A list of LaTeX macros; each tuple contains (macro_name, macro_definition).
117    #[cfg_attr(feature = "serde", serde(with = "tuple_vec_map"))]
118    pub macros: Vec<(String, String)>,
119    /// If `true`, include `xmlns="http://www.w3.org/1998/Math/MathML"` in the `<math>` tag.
120    pub xml_namespace: bool,
121    /// If `true`, unknown commands will be rendered as red text in the output, instead of
122    /// returning an error.
123    pub ignore_unknown_commands: bool,
124    /// If `true`, wrap the MathML output in `<semantics>` tags with an
125    /// `<annotation encoding="application/x-tex">` child containing the original LaTeX source.
126    pub annotation: bool,
127    /// If `true`, allow rendering commands that produce MathML Core output that is unreliably
128    /// rendered by browsers.
129    pub allow_unreliable_rendering: bool,
130}
131
132#[derive(Debug, Default)]
133struct CommandConfig {
134    custom_cmd_tokens: Vec<Token<'static>>,
135    custom_cmd_map: FxHashMap<String, (u8, (usize, usize))>,
136    ignore_unknown_commands: bool,
137    allow_unreliable_rendering: bool,
138}
139
140impl CommandConfig {
141    pub fn get_command<'config>(&'config self, command: &str) -> Option<Token<'config>> {
142        let (num_args, slice) = *self.custom_cmd_map.get(command)?;
143        let tokens = self.custom_cmd_tokens.get(slice.0..slice.1)?;
144        Some(Token::CustomCmd(num_args, tokens))
145    }
146}
147
148/// This struct contains those fields from `MathCoreConfig` that are simple flags.
149#[derive(Debug, Default)]
150struct Flags {
151    pretty_print: PrettyPrint,
152    xml_namespace: bool,
153    annotation: bool,
154}
155
156impl From<&MathCoreConfig> for Flags {
157    fn from(config: &MathCoreConfig) -> Self {
158        // TODO: can we use a macro here to avoid repeating the field names?
159        Self {
160            pretty_print: config.pretty_print,
161            xml_namespace: config.xml_namespace,
162            annotation: config.annotation,
163        }
164    }
165}
166
167/// A converter that transforms LaTeX math equations into MathML Core.
168#[derive(Debug, Default)]
169pub struct LatexToMathML {
170    flags: Flags,
171    /// This is used for numbering equations in the document.
172    equation_count: u16,
173    label_map: FxHashMap<Box<str>, NonZeroU16>,
174    cmd_cfg: Option<CommandConfig>,
175}
176
177impl LatexToMathML {
178    /// Create a new `LatexToMathML` converter with the given configuration.
179    ///
180    /// This function returns an error if the custom macros in the given configuration could not
181    /// be parsed. The error contains the parsing error, the macro index and the macro definition
182    /// that caused the error.
183    pub fn new(config: MathCoreConfig) -> Result<Self, (Box<LatexError>, usize, String)> {
184        Ok(Self {
185            flags: Flags::from(&config),
186            equation_count: 0,
187            label_map: FxHashMap::default(),
188            cmd_cfg: Some(parse_custom_commands(config)?),
189        })
190    }
191
192    /// Convert LaTeX text to MathML with a global equation counter.
193    ///
194    /// For basic usage, see the documentation of [`convert_with_local_counter`].
195    ///
196    /// This conversion function maintains state, in order to count equations correctly across
197    /// different calls to this function.
198    ///
199    /// The counter can be reset with [`reset_global_counter`].
200    pub fn convert_with_global_counter(
201        &mut self,
202        latex: &str,
203        display: MathDisplay,
204    ) -> Result<String, Box<LatexError>> {
205        convert(
206            latex,
207            display,
208            self.cmd_cfg.as_ref(),
209            &mut self.equation_count,
210            &mut self.label_map,
211            &self.flags,
212        )
213    }
214
215    /// Convert LaTeX text to MathML.
216    ///
217    /// The second argument specifies whether it is inline-equation or block-equation.
218    ///
219    /// ```rust
220    /// use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
221    ///
222    /// let latex = r#"(n + 1)! = \Gamma ( n + 1 )"#;
223    /// let config = MathCoreConfig::default();
224    /// let converter = LatexToMathML::new(config).unwrap();
225    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Inline).unwrap();
226    /// println!("{}", mathml);
227    ///
228    /// let latex = r#"x = \frac{ - b \pm \sqrt{ b^2 - 4 a c } }{ 2 a }"#;
229    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
230    /// println!("{}", mathml);
231    /// ```
232    ///
233    #[inline]
234    pub fn convert_with_local_counter(
235        &self,
236        latex: &str,
237        display: MathDisplay,
238    ) -> Result<String, Box<LatexError>> {
239        let mut equation_count = 0;
240        let mut label_map = FxHashMap::default();
241        convert(
242            latex,
243            display,
244            self.cmd_cfg.as_ref(),
245            &mut equation_count,
246            &mut label_map,
247            &self.flags,
248        )
249    }
250
251    /// Reset the equation counter to zero.
252    ///
253    /// This should normally be done at the beginning of a new document or section.
254    pub fn reset_global_counter(&mut self) {
255        self.equation_count = 0;
256    }
257}
258
259fn convert(
260    latex: &str,
261    display: MathDisplay,
262    cmd_cfg: Option<&CommandConfig>,
263    equation_count: &mut u16,
264    label_map: &mut FxHashMap<Box<str>, NonZeroU16>,
265    flags: &Flags,
266) -> Result<String, Box<LatexError>> {
267    let arena = Arena::new();
268    let ast = parse(latex, &arena, cmd_cfg, equation_count, label_map, display)?;
269
270    let mut output = String::new();
271    output.push_str("<math");
272    if flags.xml_namespace {
273        output.push_str(" xmlns=\"http://www.w3.org/1998/Math/MathML\"");
274    }
275    if matches!(display, MathDisplay::Block) {
276        output.push_str(" display=\"block\"");
277    }
278    output.push('>');
279
280    let pretty_print = matches!(flags.pretty_print, PrettyPrint::Always)
281        || (matches!(flags.pretty_print, PrettyPrint::Auto) && display == MathDisplay::Block);
282
283    let base_indent = if pretty_print { 1 } else { 0 };
284    if flags.annotation {
285        let children_indent = if pretty_print { 2 } else { 0 };
286        new_line_and_indent(&mut output, base_indent);
287        output.push_str("<semantics>");
288        let node = parser::node_vec_to_node(&arena, &ast, false);
289        let _ = node.emit(&mut output, children_indent);
290        new_line_and_indent(&mut output, children_indent);
291        output.push_str("<annotation encoding=\"application/x-tex\">");
292        html_utils::escape_html_content(&mut output, latex);
293        output.push_str("</annotation>");
294        new_line_and_indent(&mut output, base_indent);
295        output.push_str("</semantics>");
296    } else {
297        for node in ast {
298            // We ignore the result of `emit` here, because the only possible error is a formatting
299            // error when writing to the string, and that can only happen if the string's `write_str`
300            // implementation returns an error. Since `String`'s `write_str` implementation never
301            // returns an error, we can safely ignore the result of `emit`.
302            let _ = node.emit(&mut output, base_indent);
303        }
304    }
305    if pretty_print {
306        output.push('\n');
307    }
308    output.push_str("</math>");
309    Ok(output)
310}
311
312fn parse<'config, 'source, 'arena>(
313    latex: &'source str,
314    arena: &'arena Arena,
315    cmd_cfg: Option<&'config CommandConfig>,
316    equation_count: &'arena mut u16,
317    label_map: &'arena mut FxHashMap<Box<str>, NonZeroU16>,
318    display: MathDisplay,
319) -> Result<Vec<&'arena Node<'arena>>, Box<LatexError>>
320where
321    'config: 'source,
322    'source: 'arena,
323{
324    let style = match display {
325        MathDisplay::Inline => Style::Text,
326        MathDisplay::Block => Style::Display,
327    };
328    let lexer = Lexer::new(latex, false, cmd_cfg);
329    let mut p = Parser::new(lexer, arena, equation_count, label_map, style)?;
330    let nodes = p.parse()?;
331    Ok(nodes)
332}
333
334fn parse_custom_commands(
335    cfg: MathCoreConfig,
336) -> Result<CommandConfig, (Box<LatexError>, usize, String)> {
337    let macros = cfg.macros;
338    let mut map = FxHashMap::with_capacity_and_hasher(macros.len(), FxBuildHasher);
339    let mut tokens = Vec::new();
340    for (idx, (name, definition)) in macros.into_iter().enumerate() {
341        if !is_valid_macro_name(name.as_str()) {
342            return Err((
343                Box::new(LatexError(0..0, LatexErrKind::InvalidMacroName(name))),
344                idx,
345                definition,
346            ));
347        }
348
349        // In order to be able to return `definition` in case of an error, we need to ensure
350        // that the lexer (which borrows `definition`) is dropped before we return the error.
351        // Therefore, we put the whole lexing process into its own block.
352        let value = 'value: {
353            let mut lexer: Lexer<'static, '_> = Lexer::new(definition.as_str(), true, None);
354            let start = tokens.len();
355            loop {
356                match lexer.next_token_no_unknown_command() {
357                    Ok(tokloc) => {
358                        if matches!(tokloc.token(), Token::Eoi) {
359                            break;
360                        }
361                        tokens.push(tokloc.into_token());
362                    }
363                    Err(err) => {
364                        break 'value Err(err);
365                    }
366                }
367            }
368            let end = tokens.len();
369            let num_args = lexer.parse_cmd_args().unwrap_or(0);
370            Ok((num_args, (start, end)))
371        };
372
373        match value {
374            Err(err) => {
375                return Err((err, idx, definition));
376            }
377            Ok(v) => {
378                map.insert(name, v);
379            }
380        }
381    }
382    Ok(CommandConfig {
383        custom_cmd_tokens: tokens,
384        custom_cmd_map: map,
385        ignore_unknown_commands: cfg.ignore_unknown_commands,
386        allow_unreliable_rendering: cfg.allow_unreliable_rendering,
387    })
388}
389
390fn is_valid_macro_name(s: &str) -> bool {
391    if s.is_empty() {
392        return false;
393    }
394    let mut chars = s.chars();
395    match (chars.next(), chars.next()) {
396        // If the name contains only one character, any character is valid.
397        (Some(_), None) => true,
398        // If the name contains more than one character, all characters must be ASCII alphabetic.
399        _ => s.bytes().all(|b| b.is_ascii_alphabetic()),
400    }
401}