Skip to main content

math_core/
lib.rs

1//! Convert LaTeX math to MathML Core.
2//!
3//! For more background on what that means and on what to do with the resulting MathML code,
4//! see the repo's README: https://github.com/tmke8/math-core
5//!
6//! # Usage
7//!
8//! The main struct of this library is [`LatexToMathML`]. In order to use the library, create an
9//! instance of this struct and then call one of the convert functions. The constructor of the
10//! struct expects a config object in the form of an instance of [`MathCoreConfig`].
11//!
12//! Basic use looks like this:
13//!
14//! ```rust
15//! use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
16//!
17//! let latex = r#"\erf ( x ) = \frac{ 2 }{ \sqrt{ \pi } } \int_0^x e^{- t^2} \, dt"#;
18//! let config = MathCoreConfig::default();
19//! let converter = LatexToMathML::new(config).unwrap();
20//! let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
21//! println!("{}", mathml);
22//! ```
23//!
24//! # Features
25//!
26//! - `serde`: With this feature, `MathCoreConfig` implements serde's `Deserialize`.
27//!
28mod atof;
29mod character_class;
30mod color_defs;
31mod commands;
32mod environments;
33mod error;
34mod html_utils;
35mod lexer;
36mod parser;
37mod predefined;
38mod specifications;
39mod text_parser;
40mod token;
41mod token_queue;
42
43use rustc_hash::FxHashMap;
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Serialize};
46
47use mathml_renderer::{arena::Arena, ast::Node, fmt::new_line_and_indent};
48
49pub use self::error::LatexError;
50use self::{error::LatexErrKind, lexer::Lexer, parser::Parser, token::Token};
51
52/// Display mode for the LaTeX math equations.
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum MathDisplay {
55    /// For inline equations, like those in `$...$` in LaTeX.
56    Inline,
57    /// For block equations (or "display style" equations), like those in `$$...$$` in LaTeX.
58    Block,
59}
60
61/// Configuration for pretty-printing the MathML output.
62///
63/// Pretty-printing means that newlines and indentation is added to the MathML output, to make it
64/// easier to read.
65#[derive(Debug, Clone, Copy, Default)]
66#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
67#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
68#[non_exhaustive]
69pub enum PrettyPrint {
70    /// Never pretty print.
71    #[default]
72    Never,
73    /// Always pretty print.
74    Always,
75    /// Pretty print for block equations only.
76    Auto,
77}
78
79/// Configuration object for the LaTeX to MathML conversion.
80///
81/// # Example usage
82///
83/// ```rust
84/// use math_core::{MathCoreConfig, PrettyPrint};
85///
86/// // Default values
87/// let config = MathCoreConfig::default();
88///
89/// // Specifying pretty-print behavior
90/// let config = MathCoreConfig {
91///     pretty_print: PrettyPrint::Always,
92///     ..Default::default()
93///  };
94///
95/// // Specifying pretty-print behavior and custom macros
96/// let macros = vec![
97///     ("d".to_string(), r"\mathrm{d}".to_string()),
98///     ("bb".to_string(), r"\mathbb{#1}".to_string()), // with argument
99/// ];
100/// let config = MathCoreConfig {
101///     pretty_print: PrettyPrint::Auto,
102///     macros,
103///     ..Default::default()
104/// };
105/// ```
106///
107#[derive(Debug, Default)]
108#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
109#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
110pub struct MathCoreConfig {
111    /// A configuration for pretty-printing the MathML output. See [`PrettyPrint`] for details.
112    pub pretty_print: PrettyPrint,
113    /// A list of LaTeX macros; each tuple contains (macro_name, macro_definition).
114    #[cfg_attr(feature = "serde", serde(with = "tuple_vec_map"))]
115    pub macros: Vec<(String, String)>,
116    /// If `true`, include `xmlns="http://www.w3.org/1998/Math/MathML"` in the `<math>` tag.
117    pub xml_namespace: bool,
118    /// If `true`, unknown commands will be rendered as red text in the output, instead of
119    /// returning an error.
120    pub ignore_unknown_commands: bool,
121    /// If `true`, wrap the MathML output in `<semantics>` tags with an
122    /// `<annotation encoding="application/x-tex">` child containing the original LaTeX source.
123    pub annotation: bool,
124}
125
126#[derive(Debug, Default)]
127struct CommandConfig {
128    custom_cmd_tokens: Vec<Token<'static>>,
129    custom_cmd_map: FxHashMap<String, (u8, (usize, usize))>,
130    ignore_unknown_commands: bool,
131}
132
133impl CommandConfig {
134    pub fn get_command<'config>(&'config self, command: &str) -> Option<Token<'config>> {
135        let (num_args, slice) = *self.custom_cmd_map.get(command)?;
136        let tokens = self.custom_cmd_tokens.get(slice.0..slice.1)?;
137        Some(Token::CustomCmd(num_args, tokens))
138    }
139}
140
141/// This struct contains those fields from `MathCoreConfig` that are simple flags.
142#[derive(Debug, Default)]
143struct Flags {
144    pretty_print: PrettyPrint,
145    xml_namespace: bool,
146    annotation: bool,
147}
148
149impl From<&MathCoreConfig> for Flags {
150    fn from(config: &MathCoreConfig) -> Self {
151        // TODO: can we use a macro here to avoid repeating the field names?
152        Self {
153            pretty_print: config.pretty_print,
154            xml_namespace: config.xml_namespace,
155            annotation: config.annotation,
156        }
157    }
158}
159
160/// A converter that transforms LaTeX math equations into MathML Core.
161#[derive(Debug, Default)]
162pub struct LatexToMathML {
163    flags: Flags,
164    /// This is used for numbering equations in the document.
165    equation_count: u16,
166    cmd_cfg: Option<CommandConfig>,
167}
168
169impl LatexToMathML {
170    /// Create a new `LatexToMathML` converter with the given configuration.
171    ///
172    /// This function returns an error if the custom macros in the given configuration could not
173    /// be parsed. The error contains the parsing error, the macro index and the macro definition
174    /// that caused the error.
175    pub fn new(config: MathCoreConfig) -> Result<Self, (Box<LatexError>, usize, String)> {
176        Ok(Self {
177            flags: Flags::from(&config),
178            equation_count: 0,
179            cmd_cfg: Some(parse_custom_commands(
180                config.macros,
181                config.ignore_unknown_commands,
182            )?),
183        })
184    }
185
186    /// Convert LaTeX text to MathML with a global equation counter.
187    ///
188    /// For basic usage, see the documentation of [`convert_with_local_counter`].
189    ///
190    /// This conversion function maintains state, in order to count equations correctly across
191    /// different calls to this function.
192    ///
193    /// The counter can be reset with [`reset_global_counter`].
194    pub fn convert_with_global_counter(
195        &mut self,
196        latex: &str,
197        display: MathDisplay,
198    ) -> Result<String, Box<LatexError>> {
199        convert(
200            latex,
201            display,
202            self.cmd_cfg.as_ref(),
203            &mut self.equation_count,
204            &self.flags,
205        )
206    }
207
208    /// Convert LaTeX text to MathML.
209    ///
210    /// The second argument specifies whether it is inline-equation or block-equation.
211    ///
212    /// ```rust
213    /// use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
214    ///
215    /// let latex = r#"(n + 1)! = \Gamma ( n + 1 )"#;
216    /// let config = MathCoreConfig::default();
217    /// let converter = LatexToMathML::new(config).unwrap();
218    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Inline).unwrap();
219    /// println!("{}", mathml);
220    ///
221    /// let latex = r#"x = \frac{ - b \pm \sqrt{ b^2 - 4 a c } }{ 2 a }"#;
222    /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
223    /// println!("{}", mathml);
224    /// ```
225    ///
226    #[inline]
227    pub fn convert_with_local_counter(
228        &self,
229        latex: &str,
230        display: MathDisplay,
231    ) -> Result<String, Box<LatexError>> {
232        let mut equation_count = 0;
233        convert(
234            latex,
235            display,
236            self.cmd_cfg.as_ref(),
237            &mut equation_count,
238            &self.flags,
239        )
240    }
241
242    /// Reset the equation counter to zero.
243    ///
244    /// This should normally be done at the beginning of a new document or section.
245    pub fn reset_global_counter(&mut self) {
246        self.equation_count = 0;
247    }
248}
249
250fn convert(
251    latex: &str,
252    display: MathDisplay,
253    cmd_cfg: Option<&CommandConfig>,
254    equation_count: &mut u16,
255    flags: &Flags,
256) -> Result<String, Box<LatexError>> {
257    let arena = Arena::new();
258    let ast = parse(latex, &arena, cmd_cfg, equation_count)?;
259
260    let mut output = String::new();
261    output.push_str("<math");
262    if flags.xml_namespace {
263        output.push_str(" xmlns=\"http://www.w3.org/1998/Math/MathML\"");
264    }
265    if matches!(display, MathDisplay::Block) {
266        output.push_str(" display=\"block\"");
267    };
268    output.push('>');
269
270    let pretty_print = matches!(flags.pretty_print, PrettyPrint::Always)
271        || (matches!(flags.pretty_print, PrettyPrint::Auto) && display == MathDisplay::Block);
272
273    let base_indent = if pretty_print { 1 } else { 0 };
274    if flags.annotation {
275        new_line_and_indent(&mut output, base_indent);
276        output.push_str("<semantics>");
277        let node = parser::node_vec_to_node(&arena, ast, false);
278        let _ = node.emit(&mut output, base_indent + 1);
279        new_line_and_indent(&mut output, base_indent + 1);
280        output.push_str("<annotation encoding=\"application/x-tex\">");
281        html_utils::escape_html_content(&mut output, latex);
282        output.push_str("</annotation>");
283        new_line_and_indent(&mut output, base_indent);
284        output.push_str("</semantics>");
285    } else {
286        for node in ast {
287            // We ignore the result of `emit` here, because the only possible error is a formatting
288            // error when writing to the string, and that can only happen if the string's `write_str`
289            // implementation returns an error. Since `String`'s `write_str` implementation never
290            // returns an error, we can safely ignore the result of `emit`.
291            let _ = node.emit(&mut output, base_indent);
292        }
293    }
294    if pretty_print {
295        output.push('\n');
296    }
297    output.push_str("</math>");
298    Ok(output)
299}
300
301fn parse<'arena, 'source, 'config>(
302    latex: &'source str,
303    arena: &'arena Arena,
304    cmd_cfg: Option<&'config CommandConfig>,
305    equation_count: &mut u16,
306) -> Result<Vec<&'arena Node<'arena>>, Box<LatexError>>
307where
308    'config: 'source,
309    'source: 'arena,
310{
311    let lexer = Lexer::new(latex, false, cmd_cfg);
312    let mut p = Parser::new(lexer, arena, equation_count)?;
313    let nodes = p.parse()?;
314    Ok(nodes)
315}
316
317fn parse_custom_commands(
318    macros: Vec<(String, String)>,
319    ignore_unknown_commands: bool,
320) -> Result<CommandConfig, (Box<LatexError>, usize, String)> {
321    let mut map = FxHashMap::with_capacity_and_hasher(macros.len(), Default::default());
322    let mut tokens = Vec::new();
323    for (idx, (name, definition)) in macros.into_iter().enumerate() {
324        if !is_valid_macro_name(name.as_str()) {
325            return Err((
326                Box::new(LatexError(0..0, LatexErrKind::InvalidMacroName(name))),
327                idx,
328                definition,
329            ));
330        }
331
332        // In order to be able to return `definition` in case of an error, we need to ensure
333        // that the lexer (which borrows `definition`) is dropped before we return the error.
334        // Therefore, we put the whole lexing process into its own block.
335        let value = 'value: {
336            let mut lexer: Lexer<'static, '_> = Lexer::new(definition.as_str(), true, None);
337            let start = tokens.len();
338            loop {
339                match lexer.next_token_no_unknown_command() {
340                    Ok(tokloc) => {
341                        if matches!(tokloc.token(), Token::Eof) {
342                            break;
343                        }
344                        tokens.push(tokloc.into_token());
345                    }
346                    Err(err) => {
347                        break 'value Err(err);
348                    }
349                }
350            }
351            let end = tokens.len();
352            let num_args = lexer.parse_cmd_args().unwrap_or(0);
353            Ok((num_args, (start, end)))
354        };
355
356        match value {
357            Err(err) => {
358                return Err((err, idx, definition));
359            }
360            Ok(v) => {
361                map.insert(name, v);
362            }
363        };
364    }
365    Ok(CommandConfig {
366        custom_cmd_tokens: tokens,
367        custom_cmd_map: map,
368        ignore_unknown_commands,
369    })
370}
371
372fn is_valid_macro_name(s: &str) -> bool {
373    if s.is_empty() {
374        return false;
375    }
376    let mut chars = s.chars();
377    match (chars.next(), chars.next()) {
378        // If the name contains only one character, any character is valid.
379        (Some(_), None) => true,
380        // If the name contains more than one character, all characters must be ASCII alphabetic.
381        _ => s.bytes().all(|b| b.is_ascii_alphabetic()),
382    }
383}