use crate::rules::math::parser::{BracketKind, MathToken};
use super::math_token_rule::{MathEncodeState, MathTokenEngine, MathTokenResult, MathTokenRule};
use super::rule_1;
fn prev_non_space(tokens: &[MathToken], mut idx: usize) -> Option<&MathToken> {
while idx > 0 {
idx -= 1;
let t = tokens.get(idx)?;
if !matches!(t, MathToken::Space) {
return Some(t);
}
}
None
}
fn next_non_space(tokens: &[MathToken], mut idx: usize) -> Option<&MathToken> {
loop {
idx += 1;
let t = tokens.get(idx)?;
if !matches!(t, MathToken::Space) {
return Some(t);
}
}
}
fn is_left_superscript_position(tokens: &[MathToken], index: usize) -> bool {
let prev_blocks = matches!(
prev_non_space(tokens, index),
Some(MathToken::Variable(_))
| Some(MathToken::UpperVariable(_))
| Some(MathToken::Number(_))
| Some(MathToken::CloseParen(_))
| Some(MathToken::Prime)
| Some(MathToken::FunctionName(_))
| Some(MathToken::Subscript(_))
);
if prev_blocks {
return false;
}
if let Some(MathToken::MathSymbol('\u{2202}' | '\u{2207}' | '\u{210F}' | '\u{2135}')) =
prev_non_space(tokens, index)
{
return false;
}
let mut i = index;
while i > 0 {
i -= 1;
let tok = tokens.get(i);
if is_quantifier_symbol(tok) || is_function_name_token(tok) {
return false;
}
if !is_space_or_subscript(tok) {
break;
}
}
matches!(
next_non_space(tokens, index),
Some(MathToken::Variable(_)) | Some(MathToken::UpperVariable(_))
)
}
fn is_space_or_subscript(tok: Option<&MathToken>) -> bool {
matches!(tok, Some(MathToken::Space | MathToken::Subscript(_)))
}
fn is_quantifier_symbol(tok: Option<&MathToken>) -> bool {
matches!(
tok,
Some(MathToken::MathSymbol(
'\u{222B}'
| '\u{222C}'
| '\u{222D}'
| '\u{222E}'
| '\u{2211}'
| '\u{220F}'
| '\u{2200}'
| '\u{2203}'
))
)
}
fn is_function_name_token(tok: Option<&MathToken>) -> bool {
matches!(tok, Some(MathToken::FunctionName(_)))
}
fn is_simple_signed_number(content: &[MathToken]) -> bool {
if content.len() != 2 {
return false;
}
let is_minus = matches!(
content[0],
MathToken::Operator('\u{2212}') | MathToken::Operator('-')
);
let is_simple_term = matches!(content[1], MathToken::Number(_) | MathToken::Variable(_));
is_minus && is_simple_term
}
pub fn should_group_superscript(content: &[MathToken]) -> bool {
if content.len() <= 1 {
return false;
}
if is_simple_signed_number(content) {
return false;
}
content.iter().any(|token| {
matches!(
token,
MathToken::Operator(_)
| MathToken::OpenParen(_)
| MathToken::CloseParen(_)
| MathToken::Space
| MathToken::Subscript(_)
| MathToken::Superscript(_)
)
}) || content.len() >= 3
}
pub fn encode_superscript(
tokens: &[MathToken],
i: &mut usize,
content: &[MathToken],
result: &mut Vec<u8>,
engine: &MathTokenEngine,
) -> Result<bool, String> {
if *i >= 2
&& matches!(tokens.get(*i - 1), Some(MathToken::Subscript(_)))
&& matches!(
tokens.get(*i - 2),
Some(MathToken::MathSymbol(
'\u{222B}' | '\u{222C}' | '\u{222D}' | '\u{222E}' | '\u{2211}' | '\u{220F}'
))
)
{
result.push(0);
engine.encode_tokens(content, result)?;
if !matches!(tokens.get(*i + 1), Some(MathToken::Space) | None) {
result.push(0);
}
*i += 1;
return Ok(true);
}
if *i >= 2
&& matches!(tokens.get(*i - 1), Some(MathToken::Subscript(_)))
&& matches!(
tokens.get(*i - 2),
Some(MathToken::CloseParen(BracketKind::Square))
)
{
result.push(0);
engine.encode_tokens(content, result)?;
if !matches!(tokens.get(*i + 1), Some(MathToken::Space) | None) {
result.push(0);
}
*i += 1;
return Ok(true);
}
if matches!(tokens.get(*i + 1), Some(MathToken::MathSymbol('\u{221A}'))) {
if content.len() > 1 {
result.push(55);
engine.encode_tokens(content, result)?;
result.push(62);
} else {
engine.encode_tokens(content, result)?;
}
result.push(59);
*i += 2;
return Ok(true);
}
if let [MathToken::Number(left)] = content
&& matches!(tokens.get(*i + 1), Some(MathToken::MathSymbol('\u{00B7}')))
&& let Some(MathToken::Superscript(right_content)) = tokens.get(*i + 2)
&& let [MathToken::Number(right)] = right_content.as_slice()
{
result.push(24);
result.push(60);
for ch in left.chars() {
result.extend(crate::number::encode_number(ch));
}
result.push(50);
for ch in right.chars() {
result.extend(crate::number::encode_number(ch));
}
*i += 3;
return Ok(true);
}
if let [MathToken::Number(left)] = content
&& matches!(tokens.get(*i + 1), Some(MathToken::Operator('/')))
&& let Some(MathToken::Superscript(right_content)) = tokens.get(*i + 2)
&& let [MathToken::Number(right)] = right_content.as_slice()
{
result.push(24);
result.push(55);
rule_1::encode_number_literal(left, result);
result.push(12);
rule_1::encode_number_literal(right, result);
result.push(62);
*i += 3;
return Ok(true);
}
let wrapped_simple_index = content.len() == 3
&& matches!(
(content.first(), content.get(1), content.last()),
(
Some(MathToken::OpenParen(BracketKind::MathParen)),
Some(MathToken::Number(_) | MathToken::Variable(_) | MathToken::UpperVariable(_)),
Some(MathToken::CloseParen(BracketKind::MathParen))
)
);
let (sup_content, force_group) = if !wrapped_simple_index
&& content.len() >= 2
&& matches!(
(content.first(), content.last()),
(
Some(MathToken::OpenParen(BracketKind::MathParen)),
Some(MathToken::CloseParen(BracketKind::MathParen))
)
) {
(&content[1..content.len() - 1], true)
} else {
(content, false)
};
let is_left_superscript = is_left_superscript_position(tokens, *i);
result.push(24);
if wrapped_simple_index {
engine.encode_tokens(content, result)?;
} else if force_group || should_group_superscript(sup_content) || is_left_superscript {
result.push(55);
engine.encode_tokens(sup_content, result)?;
result.push(62);
} else {
engine.encode_tokens(sup_content, result)?;
}
*i += 1;
Ok(false)
}
pub struct SuperscriptRule;
impl MathTokenRule for SuperscriptRule {
fn name(&self) -> &'static str {
"SuperscriptRule"
}
fn priority(&self) -> u16 {
50
}
fn matches(&self, tokens: &[MathToken], index: usize, _state: &MathEncodeState) -> bool {
matches!(tokens.get(index), Some(MathToken::Superscript(_)))
}
fn apply(
&self,
tokens: &[MathToken],
index: usize,
result: &mut Vec<u8>,
state: &mut MathEncodeState,
engine: &MathTokenEngine,
) -> Result<MathTokenResult, String> {
let Some(MathToken::Superscript(content)) = tokens.get(index) else {
return Ok(MathTokenResult::Skip);
};
let mut cursor = index;
let _ = encode_superscript(tokens, &mut cursor, content, result, engine)?;
state.prev_was_number = false;
Ok(MathTokenResult::Consumed(cursor - index))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn enc(input: &str) -> Vec<u8> {
crate::encode(input).unwrap_or_default()
}
#[test]
fn is_simple_signed_number_paths() {
let with_ascii_minus = vec![MathToken::Operator('-'), MathToken::Number("1".into())];
assert!(is_simple_signed_number(&with_ascii_minus));
let with_math_minus = vec![MathToken::Operator('\u{2212}'), MathToken::Variable('x')];
assert!(is_simple_signed_number(&with_math_minus));
let plus = vec![MathToken::Operator('+'), MathToken::Number("1".into())];
assert!(!is_simple_signed_number(&plus));
let single = vec![MathToken::Number("1".into())];
assert!(!is_simple_signed_number(&single));
let weird = vec![MathToken::Operator('-'), MathToken::Operator('+')];
assert!(!is_simple_signed_number(&weird));
}
#[rstest::rstest]
#[case::single_token_no_group(vec![MathToken::Number("2".into())], false)]
#[case::signed_number_no_group(
vec![MathToken::Operator('-'), MathToken::Number("1".into())],
false,
)]
#[case::has_operator_groups(
vec![MathToken::Number("1".into()), MathToken::Operator('+'), MathToken::Number("2".into())],
true,
)]
#[case::has_paren_groups(
vec![
MathToken::OpenParen(BracketKind::MathParen),
MathToken::Variable('x'),
MathToken::CloseParen(BracketKind::MathParen),
],
true,
)]
#[case::len_ge_3_simple_groups(
vec![
MathToken::Number("1".into()),
MathToken::Number("2".into()),
MathToken::Number("3".into()),
],
true,
)]
fn should_group_superscript_paths(#[case] content: Vec<MathToken>, #[case] expected: bool) {
assert_eq!(should_group_superscript(&content), expected);
}
#[test]
fn superscript_simple_digit() {
let bytes = enc("$x^2$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_compound() {
let bytes = enc("$x^{n+1}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_negative_index() {
let bytes = enc("$x^{-1}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_parenthesised_index() {
let bytes = enc("$y^{(n)}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_followed_by_radical() {
let bytes = enc("$x^2\\sqrt{y}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_dot_product_form() {
let bytes = enc("$10^{2}\\cdot10^{3}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_rule_priority_and_name() {
let r = SuperscriptRule;
assert_eq!(r.priority(), 50);
assert_eq!(r.name(), "SuperscriptRule");
}
#[test]
fn superscript_with_slash_and_superscript_follow() {
let bytes = enc("$10^{2}/10^{3}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_followed_by_sqrt() {
let bytes = enc("$x^{2}\\sqrt{y}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_with_paren_complex() {
let bytes = enc("$x^{(a+b)}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_after_bracket_close() {
let bytes = enc("$\\sum_{i=1}^n$");
assert!(!bytes.is_empty());
}
#[test]
fn left_superscript_position_blocked_by_partial_derivative() {
let toks = vec![
MathToken::MathSymbol('\u{2202}'),
MathToken::Superscript(vec![MathToken::Number("2".into())]),
MathToken::Variable('z'),
];
assert!(!is_left_superscript_position(&toks, 1));
}
#[test]
fn left_superscript_position_blocked_by_sum() {
let toks = vec![
MathToken::MathSymbol('\u{2211}'),
MathToken::Subscript(vec![MathToken::Variable('i')]),
MathToken::Superscript(vec![MathToken::MathSymbol('\u{221E}')]),
MathToken::Variable('x'),
];
assert!(!is_left_superscript_position(&toks, 2));
}
#[test]
fn left_superscript_position_blocked_by_function_name() {
let toks = vec![
MathToken::FunctionName("sin".into()),
MathToken::Superscript(vec![MathToken::Number("2".into())]),
MathToken::Variable('x'),
];
assert!(!is_left_superscript_position(&toks, 1));
}
#[test]
fn superscript_after_square_close_with_subscript() {
let bytes = enc("$[a]_i^2$");
let _ = bytes;
}
#[test]
fn superscript_with_slash_then_superscript_number() {
let bytes = enc("10²/⁵");
assert!(!bytes.is_empty());
let bytes2 = enc("10²·⁵");
assert!(!bytes2.is_empty());
}
#[test]
fn superscript_paren_wrapped_simple_index() {
let bytes = enc("$y^{(4)}$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_paren_wrapped_complex_content() {
let bytes = enc("$x^{(a+b)}$");
assert!(!bytes.is_empty());
}
#[test]
fn left_superscript_position_continues_over_space_and_subscript() {
let toks = vec![
MathToken::Variable('a'),
MathToken::Space,
MathToken::Superscript(vec![MathToken::Number("2".into())]),
MathToken::Variable('b'),
];
let _ = is_left_superscript_position(&toks, 2);
let toks = vec![
MathToken::Subscript(vec![MathToken::Number("1".into())]),
MathToken::Superscript(vec![MathToken::Number("2".into())]),
MathToken::Variable('b'),
];
let _ = is_left_superscript_position(&toks, 1);
}
#[test]
fn superscript_after_close_square_subscript_followed_by_var() {
let bytes = enc("$[a]_i^{x}y$");
assert!(!bytes.is_empty());
}
#[test]
fn superscript_rule_apply_with_non_superscript_skip() {
let r = SuperscriptRule;
let mut state = MathEncodeState::with_context(
false,
super::super::math_token_rule::MathContext::default(),
);
let toks = vec![MathToken::Variable('x')];
let mut result = Vec::new();
let engine =
MathTokenEngine::with_context(super::super::math_token_rule::MathContext::default());
let res = r.apply(&toks, 0, &mut result, &mut state, &engine);
assert!(matches!(res, Ok(MathTokenResult::Skip)));
}
}