use crate::rules::math::parser::{BracketKind, MathToken};
use super::math_token_rule::{MathEncodeState, MathTokenEngine, MathTokenResult, MathTokenRule};
use super::rule_1;
fn single_numeric(content: &[MathToken]) -> Option<String> {
match content {
[MathToken::Number(n)] => Some(n.clone()),
_ => None,
}
}
fn prev_non_space(tokens: &[MathToken], mut idx: usize) -> Option<&MathToken> {
while idx > 0 {
idx -= 1;
let token = tokens.get(idx)?;
if !matches!(token, MathToken::Space) {
return Some(token);
}
}
None
}
fn is_plain_numeric_subscript(content: &[MathToken]) -> bool {
content
.iter()
.all(|token| matches!(token, MathToken::Number(_) | MathToken::DecimalPoint))
}
pub fn should_group_subscript(content: &[MathToken]) -> bool {
if content.len() <= 1 {
return false;
}
if matches!(
(content.first(), content.last()),
(
Some(MathToken::OpenParen(BracketKind::MathParen)),
Some(MathToken::CloseParen(BracketKind::MathParen))
)
) {
return false;
}
!is_plain_numeric_subscript(content)
}
fn encode_combo_subscript_content(
content: &[MathToken],
result: &mut Vec<u8>,
engine: &MathTokenEngine,
) -> Result<(), String> {
if let [MathToken::Number(n)] = content {
rule_1::encode_number_literal(n, result);
return Ok(());
}
engine.encode_tokens(content, result)
}
fn next_non_space(tokens: &[MathToken], mut idx: usize) -> Option<&MathToken> {
loop {
idx += 1;
let token = tokens.get(idx)?;
if !matches!(token, MathToken::Space) {
return Some(token);
}
}
}
fn is_left_subscript_position(tokens: &[MathToken], index: usize) -> bool {
let prev_blocks = match prev_non_space(tokens, index) {
Some(MathToken::Variable(_))
| Some(MathToken::UpperVariable(_))
| Some(MathToken::Number(_))
| Some(MathToken::CloseParen(_))
| Some(MathToken::Prime) => true,
Some(MathToken::FunctionName(_)) => true,
Some(MathToken::MathSymbol(
'\u{222B}' | '\u{222C}' | '\u{222D}' | '\u{222E}' | '\u{2211}' | '\u{220F}' | '\u{22C3}' | '\u{22C2}' | '\u{2200}' | '\u{2203}', )) => true,
_ => false,
};
if prev_blocks {
return false;
}
matches!(
next_non_space(tokens, index),
Some(MathToken::Variable(_))
| Some(MathToken::UpperVariable(_))
| Some(MathToken::MathSymbol(_))
)
}
pub fn encode_subscript(
tokens: &[MathToken],
i: &mut usize,
content: &[MathToken],
result: &mut Vec<u8>,
engine: &MathTokenEngine,
) -> Result<bool, String> {
if matches!(
tokens.get(*i + 1),
Some(MathToken::UpperVariable('P' | 'C' | 'H'))
) && let Some(MathToken::Subscript(right_content)) = tokens.get(*i + 2)
&& let Some(MathToken::UpperVariable(mark)) = tokens.get(*i + 1)
{
result.push(32); result.push(crate::english::encode_english(mark.to_ascii_lowercase())?);
result.push(55); encode_combo_subscript_content(content, result, engine)?;
result.push(0);
encode_combo_subscript_content(right_content, result, engine)?;
result.push(62); *i += 3;
return Ok(true);
}
if matches!(tokens.get(*i + 1), Some(MathToken::MathSymbol('\u{03A0}')))
&& let Some(MathToken::Subscript(right_content)) = tokens.get(*i + 2)
{
result.push(32); result.push(40); result.push(crate::english::encode_english('p')?); result.push(55); encode_combo_subscript_content(content, result, engine)?;
result.push(0);
encode_combo_subscript_content(right_content, result, engine)?;
result.push(62); *i += 3;
return Ok(true);
}
if let Some(base) = single_numeric(content)
&& matches!(prev_non_space(tokens, *i), Some(MathToken::Number(_)))
{
result.push(48);
result.push(38);
rule_1::encode_number_literal(&base, result);
result.push(52);
*i += 1;
return Ok(false);
}
result.push(48);
let prev_is_quantifier_op = {
let mut cursor = *i;
let mut result: Option<bool> = None;
while result.is_none() {
match prev_non_space(tokens, cursor) {
Some(MathToken::MathSymbol(
'\u{222B}' | '\u{222C}' | '\u{222D}' | '\u{222E}' | '\u{2211}' | '\u{220F}'
| '\u{2200}' | '\u{2203}',
))
| Some(MathToken::FunctionName(_)) => {
result = Some(true);
}
Some(MathToken::Subscript(_)) => {
let progress = (0..cursor)
.rev()
.find(|&pc| !matches!(tokens.get(pc), Some(MathToken::Space)));
cursor = progress.unwrap_or(cursor);
result = progress.is_none().then_some(false);
}
_ => {
result = Some(false);
}
}
}
result.unwrap_or(false)
};
if prev_is_quantifier_op {
engine.encode_tokens(content, result)?;
*i += 1;
if needs_quantifier_trailing_space(tokens, *i) {
result.push(0);
}
return Ok(false);
}
let force_group = is_left_subscript_position(tokens, *i);
if should_group_subscript(content) || force_group {
result.push(55);
if let [MathToken::Number(n), MathToken::Variable(v)] = content {
rule_1::encode_number_literal(n, result);
result.push(16);
result.push(crate::english::encode_english(v.to_ascii_lowercase())?);
} else if let [MathToken::Number(n), MathToken::UpperVariable(v)] = content {
rule_1::encode_number_literal(n, result);
result.push(16);
result.push(crate::english::encode_english(v.to_ascii_lowercase())?);
} else {
engine.encode_tokens(content, result)?;
}
result.push(62);
} else {
engine.encode_tokens(content, result)?;
}
*i += 1;
let prev_is_quantifier = matches!(
prev_non_space(tokens, *i - 1),
Some(MathToken::FunctionName(_))
| Some(MathToken::MathSymbol(
'\u{222B}' | '\u{222C}' | '\u{222D}' | '\u{222E}' | '\u{2211}' | '\u{220F}' | '\u{2200}' | '\u{2203}' ))
);
let needs_pad = prev_is_quantifier && needs_quantifier_trailing_space(tokens, *i);
let pad_bytes: &[u8] = if needs_pad { &[0] } else { &[] };
result.extend_from_slice(pad_bytes);
Ok(false)
}
fn needs_quantifier_trailing_space(tokens: &[MathToken], idx: usize) -> bool {
let mut cursor = idx;
if matches!(tokens.get(cursor), Some(MathToken::Space)) {
return false;
}
if matches!(tokens.get(idx), Some(MathToken::Superscript(_))) {
return false;
}
while cursor < tokens.len() {
match &tokens[cursor] {
MathToken::Space => return false,
MathToken::Superscript(_) => return false,
MathToken::Variable(_)
| MathToken::UpperVariable(_)
| MathToken::Number(_)
| MathToken::OpenParen(_)
| MathToken::FunctionName(_)
| MathToken::MathSymbol(_) => return true,
_ => cursor += 1,
}
}
false
}
pub struct SubscriptRule;
impl MathTokenRule for SubscriptRule {
fn name(&self) -> &'static str {
"SubscriptRule"
}
fn priority(&self) -> u16 {
50
}
fn matches(&self, tokens: &[MathToken], index: usize, _state: &MathEncodeState) -> bool {
matches!(tokens.get(index), Some(MathToken::Subscript(_)))
}
fn apply(
&self,
tokens: &[MathToken],
index: usize,
result: &mut Vec<u8>,
state: &mut MathEncodeState,
engine: &MathTokenEngine,
) -> Result<MathTokenResult, String> {
let Some(MathToken::Subscript(content)) = tokens.get(index) else {
return Ok(MathTokenResult::Skip);
};
let mut cursor = index;
let _ = encode_subscript(tokens, &mut cursor, content, result, engine)?;
state.prev_was_number = false;
Ok(MathTokenResult::Consumed(cursor - index))
}
}
#[cfg(test)]
mod tests {
use super::super::encoder::encode_math_expression;
use super::*;
#[test]
fn encodes_number_base_notation_without_explicit_subscript_parentheses() {
assert_eq!(
encode_math_expression("1010₂").expect("math encoding should succeed"),
vec![60, 1, 26, 1, 26, 48, 38, 60, 3, 52]
);
}
#[test]
fn encodes_number_base_notation_with_explicit_subscript_parentheses() {
assert_eq!(
encode_math_expression("1101₍₂₎").expect("math encoding should succeed"),
vec![60, 1, 1, 26, 1, 48, 38, 60, 3, 52]
);
}
fn enc(input: &str) -> Vec<u8> {
crate::encode(input).unwrap_or_default()
}
#[test]
fn subscript_simple_digit() {
let bytes = enc("$x_2$");
assert!(!bytes.is_empty());
}
#[test]
fn subscript_compound_index() {
let bytes = enc("$x_{i+1}$");
assert!(!bytes.is_empty());
}
#[test]
fn subscript_quantifier_with_following_var() {
let bytes = enc("$\\sum_{i=1}^{n} i$");
assert!(!bytes.is_empty());
}
#[test]
fn subscript_after_function_then_paren() {
let bytes = enc("$\\log_{2}(x)$");
assert!(!bytes.is_empty());
}
#[test]
fn subscript_multi_digit_index() {
let bytes = enc("$a_{12}$");
assert!(!bytes.is_empty());
}
#[test]
fn subscript_with_negative_index() {
let bytes = enc("$x_{-1}$");
assert!(!bytes.is_empty());
}
#[rstest::rstest]
#[case::space_false(vec![MathToken::Space], false)]
#[case::variable_true(vec![MathToken::Variable('x')], true)]
#[case::superscript_at_idx_false(
vec![MathToken::Superscript(vec![MathToken::Number("2".into())])],
false,
)]
#[case::number_true(vec![MathToken::Number("1".into())], true)]
#[case::empty_false(vec![], false)]
#[case::function_name_true(vec![MathToken::FunctionName("sin".into())], true)]
#[case::open_paren_true(vec![MathToken::OpenParen(BracketKind::MathParen)], true)]
#[case::math_symbol_true(vec![MathToken::MathSymbol('+')], true)]
#[case::upper_variable_true(vec![MathToken::UpperVariable('X')], true)]
#[case::operator_tail_empty_false(
vec![MathToken::Operator('+'), MathToken::Operator('+')],
false,
)]
fn needs_quantifier_trailing_space_branches(
#[case] tokens: Vec<MathToken>,
#[case] expected: bool,
) {
assert_eq!(needs_quantifier_trailing_space(&tokens, 0), expected);
}
#[test]
fn subscript_rule_priority_and_name() {
let r = SubscriptRule;
assert_eq!(r.priority(), 50);
assert_eq!(r.name(), "SubscriptRule");
}
#[test]
fn left_subscript_position_blocked_by_function_name() {
let toks = vec![
MathToken::FunctionName("lim".into()),
MathToken::Subscript(vec![MathToken::Variable('n')]),
MathToken::Variable('x'),
];
assert!(!is_left_subscript_position(&toks, 1));
}
#[test]
fn left_subscript_position_blocked_by_universal_quantifier() {
let toks = vec![
MathToken::MathSymbol('\u{2200}'),
MathToken::Subscript(vec![MathToken::Variable('x')]),
MathToken::Variable('y'),
];
assert!(!is_left_subscript_position(&toks, 1));
}
#[test]
fn subscript_after_substack_chain() {
let bytes = enc("$\\sum_{i=1}\\substack{j=1}$");
let _ = bytes;
}
#[test]
fn subscript_with_number_upper_var_content() {
let bytes = enc("$a_{1X}$");
assert!(!bytes.is_empty());
}
#[test]
fn quantifier_trailing_space_after_subscript() {
let bytes = enc("$\\sum_{i=1}f(x)$");
assert!(!bytes.is_empty());
}
#[rstest::rstest]
#[case::paren_wrapped_no_group(
vec![
MathToken::OpenParen(BracketKind::MathParen),
MathToken::Variable('a'),
MathToken::CloseParen(BracketKind::MathParen),
],
false,
)]
#[case::multi_token_non_numeric_groups(
vec![MathToken::Variable('a'), MathToken::Operator('+'), MathToken::Variable('b')],
true,
)]
fn should_group_subscript_paren_wrapped_content_skipped(
#[case] content: Vec<MathToken>,
#[case] expected: bool,
) {
assert_eq!(should_group_subscript(&content), expected);
}
#[test]
fn left_subscript_combinatorics_pattern() {
let bytes = enc("$\\sum_{n}P_{r}$");
let _ = bytes;
}
#[test]
fn needs_quantifier_trailing_space_loop_encounters_space() {
let toks = vec![MathToken::Operator(','), MathToken::Space];
assert!(!needs_quantifier_trailing_space(&toks, 0));
}
#[test]
fn needs_quantifier_trailing_space_loop_encounters_superscript() {
let toks = vec![
MathToken::Operator(','),
MathToken::Superscript(vec![MathToken::Number("2".into())]),
];
assert!(!needs_quantifier_trailing_space(&toks, 0));
}
#[test]
fn subscript_rule_apply_with_non_subscript_returns_skip() {
let r = SubscriptRule;
let mut state = MathEncodeState::with_context(
false,
super::super::math_token_rule::MathContext::default(),
);
let toks = vec![MathToken::Variable('x')];
let mut result = Vec::new();
let engine =
MathTokenEngine::with_context(super::super::math_token_rule::MathContext::default());
let res = r.apply(&toks, 0, &mut result, &mut state, &engine);
assert!(matches!(res, Ok(MathTokenResult::Skip)));
}
}