mod atof;
mod character_class;
mod color_defs;
mod commands;
mod environments;
mod error;
mod html_utils;
mod lexer;
mod parser;
mod predefined;
mod specifications;
mod text_parser;
mod token;
mod token_queue;
use std::num::NonZeroU16;
use rustc_hash::{FxBuildHasher, FxHashMap};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use mathml_renderer::{arena::Arena, ast::Node, fmt::new_line_and_indent};
pub use self::error::LatexError;
use self::{error::LatexErrKind, lexer::Lexer, parser::Parser, token::Token};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MathDisplay {
Inline,
Block,
}
#[derive(Debug, Clone, Copy, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
#[non_exhaustive]
pub enum PrettyPrint {
#[default]
Never,
Always,
Auto,
}
#[derive(Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
pub struct MathCoreConfig {
pub pretty_print: PrettyPrint,
#[cfg_attr(feature = "serde", serde(with = "tuple_vec_map"))]
pub macros: Vec<(String, String)>,
pub xml_namespace: bool,
pub ignore_unknown_commands: bool,
pub annotation: bool,
pub allow_unreliable_rendering: bool,
}
#[derive(Debug, Default)]
struct CommandConfig {
custom_cmd_tokens: Vec<Token<'static>>,
custom_cmd_map: FxHashMap<String, (u8, (usize, usize))>,
ignore_unknown_commands: bool,
allow_unreliable_rendering: bool,
}
impl CommandConfig {
pub fn get_command<'config>(&'config self, command: &str) -> Option<Token<'config>> {
let (num_args, slice) = *self.custom_cmd_map.get(command)?;
let tokens = self.custom_cmd_tokens.get(slice.0..slice.1)?;
Some(Token::CustomCmd(num_args, tokens))
}
}
#[derive(Debug, Default)]
struct Flags {
pretty_print: PrettyPrint,
xml_namespace: bool,
annotation: bool,
}
impl From<&MathCoreConfig> for Flags {
fn from(config: &MathCoreConfig) -> Self {
Self {
pretty_print: config.pretty_print,
xml_namespace: config.xml_namespace,
annotation: config.annotation,
}
}
}
#[derive(Debug, Default)]
pub struct LatexToMathML {
flags: Flags,
equation_count: u16,
label_map: FxHashMap<Box<str>, NonZeroU16>,
cmd_cfg: Option<CommandConfig>,
}
impl LatexToMathML {
pub fn new(config: MathCoreConfig) -> Result<Self, (Box<LatexError>, usize, String)> {
Ok(Self {
flags: Flags::from(&config),
equation_count: 0,
label_map: FxHashMap::default(),
cmd_cfg: Some(parse_custom_commands(config)?),
})
}
pub fn convert_with_global_counter(
&mut self,
latex: &str,
display: MathDisplay,
) -> Result<String, Box<LatexError>> {
convert(
latex,
display,
self.cmd_cfg.as_ref(),
&mut self.equation_count,
&mut self.label_map,
&self.flags,
)
}
#[inline]
pub fn convert_with_local_counter(
&self,
latex: &str,
display: MathDisplay,
) -> Result<String, Box<LatexError>> {
let mut equation_count = 0;
let mut label_map = FxHashMap::default();
convert(
latex,
display,
self.cmd_cfg.as_ref(),
&mut equation_count,
&mut label_map,
&self.flags,
)
}
pub fn reset_global_counter(&mut self) {
self.equation_count = 0;
}
}
fn convert(
latex: &str,
display: MathDisplay,
cmd_cfg: Option<&CommandConfig>,
equation_count: &mut u16,
label_map: &mut FxHashMap<Box<str>, NonZeroU16>,
flags: &Flags,
) -> Result<String, Box<LatexError>> {
let arena = Arena::new();
let ast = parse(latex, &arena, cmd_cfg, equation_count, label_map)?;
let mut output = String::new();
output.push_str("<math");
if flags.xml_namespace {
output.push_str(" xmlns=\"http://www.w3.org/1998/Math/MathML\"");
}
if matches!(display, MathDisplay::Block) {
output.push_str(" display=\"block\"");
}
output.push('>');
let pretty_print = matches!(flags.pretty_print, PrettyPrint::Always)
|| (matches!(flags.pretty_print, PrettyPrint::Auto) && display == MathDisplay::Block);
let base_indent = if pretty_print { 1 } else { 0 };
if flags.annotation {
let children_indent = if pretty_print { 2 } else { 0 };
new_line_and_indent(&mut output, base_indent);
output.push_str("<semantics>");
let node = parser::node_vec_to_node(&arena, &ast, false);
let _ = node.emit(&mut output, children_indent);
new_line_and_indent(&mut output, children_indent);
output.push_str("<annotation encoding=\"application/x-tex\">");
html_utils::escape_html_content(&mut output, latex);
output.push_str("</annotation>");
new_line_and_indent(&mut output, base_indent);
output.push_str("</semantics>");
} else {
for node in ast {
let _ = node.emit(&mut output, base_indent);
}
}
if pretty_print {
output.push('\n');
}
output.push_str("</math>");
Ok(output)
}
fn parse<'config, 'source, 'arena>(
latex: &'source str,
arena: &'arena Arena,
cmd_cfg: Option<&'config CommandConfig>,
equation_count: &'arena mut u16,
label_map: &'arena mut FxHashMap<Box<str>, NonZeroU16>,
) -> Result<Vec<&'arena Node<'arena>>, Box<LatexError>>
where
'config: 'source,
'source: 'arena,
{
let lexer = Lexer::new(latex, false, cmd_cfg);
let mut p = Parser::new(lexer, arena, equation_count, label_map)?;
let nodes = p.parse()?;
Ok(nodes)
}
fn parse_custom_commands(
cfg: MathCoreConfig,
) -> Result<CommandConfig, (Box<LatexError>, usize, String)> {
let macros = cfg.macros;
let mut map = FxHashMap::with_capacity_and_hasher(macros.len(), FxBuildHasher);
let mut tokens = Vec::new();
for (idx, (name, definition)) in macros.into_iter().enumerate() {
if !is_valid_macro_name(name.as_str()) {
return Err((
Box::new(LatexError(0..0, LatexErrKind::InvalidMacroName(name))),
idx,
definition,
));
}
let value = 'value: {
let mut lexer: Lexer<'static, '_> = Lexer::new(definition.as_str(), true, None);
let start = tokens.len();
loop {
match lexer.next_token_no_unknown_command() {
Ok(tokloc) => {
if matches!(tokloc.token(), Token::Eoi) {
break;
}
tokens.push(tokloc.into_token());
}
Err(err) => {
break 'value Err(err);
}
}
}
let end = tokens.len();
let num_args = lexer.parse_cmd_args().unwrap_or(0);
Ok((num_args, (start, end)))
};
match value {
Err(err) => {
return Err((err, idx, definition));
}
Ok(v) => {
map.insert(name, v);
}
}
}
Ok(CommandConfig {
custom_cmd_tokens: tokens,
custom_cmd_map: map,
ignore_unknown_commands: cfg.ignore_unknown_commands,
allow_unreliable_rendering: cfg.allow_unreliable_rendering,
})
}
fn is_valid_macro_name(s: &str) -> bool {
if s.is_empty() {
return false;
}
let mut chars = s.chars();
match (chars.next(), chars.next()) {
(Some(_), None) => true,
_ => s.bytes().all(|b| b.is_ascii_alphabetic()),
}
}