math_core/lib.rs
1//! Convert LaTeX math to MathML Core.
2//!
3//! For more background on what that means and on what to do with the resulting MathML code,
4//! see the repo's README: https://github.com/tmke8/math-core
5//!
6//! # Usage
7//!
8//! The main struct of this library is [`LatexToMathML`]. In order to use the library, create an
9//! instance of this struct and then call one of the convert functions. The constructor of the
10//! struct expects a config object in the form of an instance of [`MathCoreConfig`].
11//!
12//! Basic use looks like this:
13//!
14//! ```rust
15//! use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
16//!
17//! let latex = r#"\erf ( x ) = \frac{ 2 }{ \sqrt{ \pi } } \int_0^x e^{- t^2} \, dt"#;
18//! let config = MathCoreConfig::default();
19//! let converter = LatexToMathML::new(config).unwrap();
20//! let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
21//! println!("{}", mathml);
22//! ```
23//!
24//! # Features
25//!
26//! - `serde`: With this feature, `MathCoreConfig` implements serde's `Deserialize`.
27//!
28mod atof;
29mod character_class;
30mod color_defs;
31mod commands;
32mod environments;
33mod error;
34mod html_utils;
35mod lexer;
36mod parser;
37mod predefined;
38mod specifications;
39mod text_parser;
40mod token;
41mod token_queue;
42
43use rustc_hash::FxHashMap;
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Serialize};
46
47use mathml_renderer::{arena::Arena, ast::Node};
48
49pub use self::error::{LatexErrKind, LatexError};
50pub use self::token::Token;
51use self::{lexer::Lexer, parser::Parser};
52
53/// Display mode for the LaTeX math equations.
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum MathDisplay {
56 /// For inline equations, like those in `$...$` in LaTeX.
57 Inline,
58 /// For block equations (or "display style" equations), like those in `$$...$$` in LaTeX.
59 Block,
60}
61
62/// Configuration for pretty-printing the MathML output.
63///
64/// Pretty-printing means that newlines and indentation is added to the MathML output, to make it
65/// easier to read.
66#[derive(Debug, Clone, Copy, Default)]
67#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
68#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
69#[non_exhaustive]
70pub enum PrettyPrint {
71 /// Never pretty print.
72 #[default]
73 Never,
74 /// Always pretty print.
75 Always,
76 /// Pretty print for block equations only.
77 Auto,
78}
79
80/// Configuration object for the LaTeX to MathML conversion.
81///
82/// # Example usage
83///
84/// ```rust
85/// use math_core::{MathCoreConfig, PrettyPrint};
86///
87/// // Default values
88/// let config = MathCoreConfig::default();
89///
90/// // Specifying pretty-print behavior
91/// let config = MathCoreConfig {
92/// pretty_print: PrettyPrint::Always,
93/// ..Default::default()
94/// };
95///
96/// // Specifying pretty-print behavior and custom macros
97/// let macros = vec![
98/// ("d".to_string(), r"\mathrm{d}".to_string()),
99/// ("bb".to_string(), r"\mathbb{#1}".to_string()), // with argument
100/// ];
101/// let config = MathCoreConfig {
102/// pretty_print: PrettyPrint::Auto,
103/// macros,
104/// ..Default::default()
105/// };
106/// ```
107///
108#[derive(Debug, Default)]
109#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
110#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
111pub struct MathCoreConfig {
112 /// A configuration for pretty-printing the MathML output. See [`PrettyPrint`] for details.
113 pub pretty_print: PrettyPrint,
114 /// A list of LaTeX macros; each tuple contains (macro_name, macro_definition).
115 #[cfg_attr(feature = "serde", serde(with = "tuple_vec_map"))]
116 pub macros: Vec<(String, String)>,
117 /// If `true`, include `xmlns="http://www.w3.org/1998/Math/MathML"` in the `<math>` tag.
118 pub xml_namespace: bool,
119}
120
121#[derive(Debug, Default)]
122struct CustomCmds {
123 tokens: Vec<Token<'static>>,
124 map: FxHashMap<String, (u8, (usize, usize))>,
125}
126
127impl CustomCmds {
128 pub fn get_command<'config>(&'config self, command: &str) -> Option<Token<'config>> {
129 let (num_args, slice) = *self.map.get(command)?;
130 let tokens = self.tokens.get(slice.0..slice.1)?;
131 Some(Token::CustomCmd(num_args, tokens))
132 }
133}
134
135/// This struct contains those fields from `MathCoreConfig` that are simple flags.
136#[derive(Debug, Default)]
137struct Flags {
138 pretty_print: PrettyPrint,
139 xml_namespace: bool,
140}
141
142impl From<&MathCoreConfig> for Flags {
143 fn from(config: &MathCoreConfig) -> Self {
144 // TODO: can we use a macro here to avoid repeating the field names?
145 Self {
146 pretty_print: config.pretty_print,
147 xml_namespace: config.xml_namespace,
148 }
149 }
150}
151
152/// A converter that transforms LaTeX math equations into MathML Core.
153#[derive(Debug, Default)]
154pub struct LatexToMathML {
155 flags: Flags,
156 /// This is used for numbering equations in the document.
157 equation_count: u16,
158 custom_cmds: Option<CustomCmds>,
159}
160
161impl LatexToMathML {
162 /// Create a new `LatexToMathML` converter with the given configuration.
163 ///
164 /// This function returns an error if the custom macros in the given configuration could not
165 /// be parsed. The error contains both the parsing error and the macro definition that caused
166 /// the error.
167 pub fn new(config: MathCoreConfig) -> Result<Self, (Box<LatexError<'static>>, String)> {
168 Ok(Self {
169 flags: Flags::from(&config),
170 equation_count: 0,
171 custom_cmds: Some(parse_custom_commands(config.macros)?),
172 })
173 }
174
175 /// Convert LaTeX text to MathML with a global equation counter.
176 ///
177 /// For basic usage, see the documentation of [`convert_with_local_counter`].
178 ///
179 /// This conversion function maintains state, in order to count equations correctly across
180 /// different calls to this function.
181 ///
182 /// The counter can be reset with [`reset_global_counter`].
183 pub fn convert_with_global_counter<'config>(
184 &'config mut self,
185 latex: &str,
186 display: MathDisplay,
187 ) -> Result<String, Box<LatexError<'config>>> {
188 convert(
189 latex,
190 display,
191 self.custom_cmds.as_ref(),
192 &mut self.equation_count,
193 &self.flags,
194 )
195 }
196
197 /// Convert LaTeX text to MathML.
198 ///
199 /// The second argument specifies whether it is inline-equation or block-equation.
200 ///
201 /// ```rust
202 /// use math_core::{LatexToMathML, MathCoreConfig, MathDisplay};
203 ///
204 /// let latex = r#"(n + 1)! = \Gamma ( n + 1 )"#;
205 /// let config = MathCoreConfig::default();
206 /// let converter = LatexToMathML::new(config).unwrap();
207 /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Inline).unwrap();
208 /// println!("{}", mathml);
209 ///
210 /// let latex = r#"x = \frac{ - b \pm \sqrt{ b^2 - 4 a c } }{ 2 a }"#;
211 /// let mathml = converter.convert_with_local_counter(latex, MathDisplay::Block).unwrap();
212 /// println!("{}", mathml);
213 /// ```
214 ///
215 #[inline]
216 pub fn convert_with_local_counter<'config>(
217 &'config self,
218 latex: &str,
219 display: MathDisplay,
220 ) -> Result<String, Box<LatexError<'config>>> {
221 let mut equation_count = 0;
222 convert(
223 latex,
224 display,
225 self.custom_cmds.as_ref(),
226 &mut equation_count,
227 &self.flags,
228 )
229 }
230
231 /// Reset the equation counter to zero.
232 ///
233 /// This should normally be done at the beginning of a new document or section.
234 pub fn reset_global_counter(&mut self) {
235 self.equation_count = 0;
236 }
237}
238
239fn convert<'config>(
240 latex: &str,
241 display: MathDisplay,
242 custom_cmds: Option<&'config CustomCmds>,
243 equation_count: &mut u16,
244 flags: &Flags,
245) -> Result<String, Box<LatexError<'config>>> {
246 let arena = Arena::new();
247 let ast = parse(latex, &arena, custom_cmds, equation_count)?;
248
249 let mut output = String::new();
250 output.push_str("<math");
251 if flags.xml_namespace {
252 output.push_str(" xmlns=\"http://www.w3.org/1998/Math/MathML\"");
253 }
254 if matches!(display, MathDisplay::Block) {
255 output.push_str(" display=\"block\"");
256 };
257 output.push('>');
258
259 let pretty_print = matches!(flags.pretty_print, PrettyPrint::Always)
260 || (matches!(flags.pretty_print, PrettyPrint::Auto) && display == MathDisplay::Block);
261
262 let base_indent = if pretty_print { 1 } else { 0 };
263 for node in ast {
264 node.emit(&mut output, base_indent)
265 .map_err(|_| LatexError(0, LatexErrKind::RenderError))?;
266 }
267 if pretty_print {
268 output.push('\n');
269 }
270 output.push_str("</math>");
271 Ok(output)
272}
273
274fn parse<'arena, 'config>(
275 latex: &str,
276 arena: &'arena Arena,
277 custom_cmds: Option<&'config CustomCmds>,
278 equation_count: &mut u16,
279) -> Result<Vec<&'arena Node<'arena>>, Box<LatexError<'config>>> {
280 let lexer = Lexer::new(latex, false, custom_cmds);
281 let mut p = Parser::new(lexer, arena, equation_count)?;
282 let nodes = p.parse()?;
283 Ok(nodes)
284}
285
286fn parse_custom_commands(
287 macros: Vec<(String, String)>,
288) -> Result<CustomCmds, (Box<LatexError<'static>>, String)> {
289 let mut map = FxHashMap::with_capacity_and_hasher(macros.len(), Default::default());
290 let mut tokens = Vec::new();
291 for (name, definition) in macros {
292 if !is_valid_macro_name(name.as_str()) {
293 return Err((
294 Box::new(LatexError(0, LatexErrKind::InvalidMacroName(name))),
295 definition,
296 ));
297 }
298
299 // In order to be able to return `definition` in case of an error, we need to ensure
300 // that the lexer (which borrows `definition`) is dropped before we return the error.
301 // Therefore, we put the whole lexing process into its own block.
302 let value = 'value: {
303 let mut lexer: Lexer<'static, '_> = Lexer::new(definition.as_str(), true, None);
304 let start = tokens.len();
305 loop {
306 match lexer.next_token() {
307 Ok(tokloc) => {
308 if matches!(tokloc.token(), Token::Eof) {
309 break;
310 }
311 tokens.push(tokloc.into_token());
312 }
313 Err(err) => {
314 break 'value Err(err);
315 }
316 }
317 }
318 let end = tokens.len();
319 let num_args = lexer.parse_cmd_args().unwrap_or(0);
320 Ok((num_args, (start, end)))
321 };
322
323 match value {
324 Err(err) => {
325 return Err((err, definition));
326 }
327 Ok(v) => {
328 map.insert(name, v);
329 }
330 };
331 }
332 Ok(CustomCmds { tokens, map })
333}
334
335fn is_valid_macro_name(s: &str) -> bool {
336 if s.is_empty() {
337 return false;
338 }
339 let mut chars = s.chars();
340 match (chars.next(), chars.next()) {
341 // If the name contains only one character, any character is valid.
342 (Some(_), None) => true,
343 // If the name contains more than one character, all characters must be ASCII alphabetic.
344 _ => s.bytes().all(|b| b.is_ascii_alphabetic()),
345 }
346}