use crate::rules::context::EncoderState;
use crate::rules::math;
use crate::rules::math::math_token_rule::MathContext;
fn math_context_from_state(state: &EncoderState) -> MathContext {
MathContext {
matrix_context_active: state.matrix_context_active,
math_mode_active: state.math_mode_active,
}
}
fn read_braced_content(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> Option<String> {
if chars.peek() != Some(&'{') {
return None;
}
chars.next();
let mut content = String::new();
let mut depth = 1usize;
for ch in chars.by_ref() {
match ch {
'{' => {
depth += 1;
content.push(ch);
}
'}' => {
depth = depth.saturating_sub(1);
if depth == 0 {
break;
}
content.push(ch);
}
_ => content.push(ch),
}
}
Some(content)
}
mod matrix;
#[cfg(test)]
use matrix::subscript_digit_to_ascii;
use matrix::{encode_latex_matrix, find_latex_matrix};
pub(crate) fn encode_latex_math_bytes_with_context(
latex_inner: &str,
math_context: MathContext,
) -> Result<Vec<u8>, String> {
if let Some(matrix) = find_latex_matrix(latex_inner) {
return encode_latex_matrix(&matrix, math_context);
}
let math_text = strip_latex_to_math(latex_inner);
let chars: Vec<char> = math_text.chars().collect();
if chars.len() >= 2
&& chars[0].is_ascii_uppercase()
&& chars[1..]
.iter()
.all(|c| matches!(*c, '⁺' | '⁻' | '₀'..='₉'))
{
return crate::encode(&math_text);
}
math::encoder::encode_math_expression_with_context(&math_text, math_context)
}
mod spacing;
pub(crate) use spacing::wrap_latex_math_tokens_with_inner;
mod grouping;
mod strip;
pub(crate) use strip::strip_latex_to_math;
mod merge_rule;
pub use merge_rule::LatexMergeRule;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_neq() {
let result = strip_latex_to_math("y \\neq 0");
assert!(result.contains('\u{2260}'));
assert!(result.contains('y'));
assert!(result.contains('0'));
}
#[test]
fn test_strip_geq() {
let result = strip_latex_to_math("x \\geq 5");
assert!(result.contains('\u{2265}'));
}
#[test]
fn test_strip_sin() {
let result = strip_latex_to_math("\\sin x");
assert!(result.contains("sin"));
assert!(result.contains('x'));
}
#[test]
fn test_strip_exponent() {
let result = strip_latex_to_math("x^{2}");
assert!(result.contains('\u{00B2}'));
}
#[test]
fn test_strip_subscript() {
let result = strip_latex_to_math("x_{2}");
assert!(result.contains('\u{2082}'));
}
#[rstest::rstest]
#[case::sub_0('\u{2080}', Some('0'))]
#[case::sub_1('\u{2081}', Some('1'))]
#[case::sub_2('\u{2082}', Some('2'))]
#[case::sub_3('\u{2083}', Some('3'))]
#[case::sub_4('\u{2084}', Some('4'))]
#[case::sub_5('\u{2085}', Some('5'))]
#[case::sub_6('\u{2086}', Some('6'))]
#[case::sub_7('\u{2087}', Some('7'))]
#[case::sub_8('\u{2088}', Some('8'))]
#[case::sub_9('\u{2089}', Some('9'))]
#[case::ascii_letter_none('a', None)]
#[case::ascii_digit_none('0', None)]
fn subscript_digit_to_ascii_table(#[case] sub: char, #[case] expected: Option<char>) {
assert_eq!(subscript_digit_to_ascii(sub), expected, "sub={sub:?}");
}
fn enc(input: &str) -> Vec<u8> {
crate::encode(input).unwrap_or_default()
}
#[test]
fn latex_matrix_environments() {
let inputs: &[&str] = &[
"$\\begin{matrix} 1 & 2 \\\\ 3 & 4 \\end{matrix}$",
"$\\begin{pmatrix} a & b \\\\ c & d \\end{pmatrix}$",
"$\\begin{bmatrix} 1 \\\\ 2 \\end{bmatrix}$",
"$\\begin{Bmatrix} x & y \\end{Bmatrix}$",
"$\\begin{vmatrix} a & b \\\\ c & d \\end{vmatrix}$",
"$\\begin{Vmatrix} 1 & 0 \\\\ 0 & 1 \\end{Vmatrix}$",
"$\\begin{array}{cc} x & y \\\\ z & w \\end{array}$",
"$\\begin{array}{ll} a & b \\\\ c & d \\end{array}$",
"$\\begin{vmatrix} a & b \\\\ c & d \\end{vmatrix}$",
];
for input in inputs {
let _ = enc(input);
}
}
#[test]
fn latex_command_stripping_diverse() {
let inputs: &[&str] = &[
"$\\alpha$",
"$\\beta$",
"$\\gamma$",
"$\\delta$",
"$\\theta$",
"$\\lambda$",
"$\\mu$",
"$\\nu$",
"$\\pi$",
"$\\sigma$",
"$\\tau$",
"$\\phi$",
"$\\chi$",
"$\\psi$",
"$\\omega$",
"$\\Alpha$",
"$\\Gamma$",
"$\\Delta$",
"$\\Theta$",
"$\\infty$",
"$\\partial$",
"$\\nabla$",
"$\\forall$",
"$\\exists$",
"$\\emptyset$",
"$\\in$",
"$\\notin$",
"$\\subset$",
"$\\supset$",
"$\\cup$",
"$\\cap$",
"$\\land$",
"$\\lor$",
"$\\neg$",
"$\\Rightarrow$",
"$\\Leftrightarrow$",
"$\\rightarrow$",
"$\\cdot$",
"$\\times$",
"$\\div$",
"$\\le$",
"$\\ge$",
"$\\equiv$",
"$\\approx$",
"$\\sum$",
"$\\prod$",
"$\\int$",
"$\\oint$",
"$x \\to \\infty$",
"$a \\equiv b \\pmod{n}$",
"$\\sqrt{a^2 + b^2}$",
"$\\sqrt[n]{x}$",
];
for input in inputs {
let _ = enc(input);
}
}
#[test]
fn latex_accents_and_marks() {
let inputs: &[&str] = &[
"$\\bar{x}$",
"$\\overline{AB}$",
"$\\underline{x}$",
"$\\vec{v}$",
"$\\overrightarrow{AB}$",
"$\\hat{x}$",
"$\\widehat{ABC}$",
"$\\tilde{x}$",
"$\\widetilde{xy}$",
"$\\dot{x}$",
"$\\ddot{x}$",
"$\\acute{a}$",
"$\\grave{a}$",
"$\\check{x}$",
"$\\breve{x}$",
];
for input in inputs {
let _ = enc(input);
}
}
#[test]
fn latex_fractions_diverse() {
let inputs: &[&str] = &[
"$\\frac{1}{2}$",
"$\\frac{a}{b}$",
"$\\frac{a+b}{c-d}$",
"$\\frac{x^2}{y^2}$",
"$\\frac{\\sqrt{2}}{2}$",
"$\\frac{\\sin x}{\\cos x}$",
"$\\dfrac{1}{2}$",
"$\\tfrac{1}{2}$",
"$\\cfrac{1}{2}$",
"$\\binom{n}{k}$",
"$\\dbinom{n}{k}$",
];
for input in inputs {
let _ = enc(input);
}
}
#[test]
fn latex_brackets_diverse() {
let inputs: &[&str] = &[
"$(x)$",
"$[x]$",
"$\\{x\\}$",
"$\\langle x \\rangle$",
"$\\left(x\\right)$",
"$\\left[x\\right]$",
"$\\left\\{x\\right\\}$",
"$\\left| x \\right|$",
"$\\lfloor x \\rfloor$",
"$\\lceil x \\rceil$",
];
for input in inputs {
let _ = enc(input);
}
}
}