use crate::rules::context::EncoderState;
use crate::rules::token::Token;
use crate::rules::token_rule::{TokenAction, TokenPhase, TokenRule};
pub struct MathExpressionTokenRule;
mod apply;
mod detect;
mod helpers;
impl TokenRule for MathExpressionTokenRule {
fn phase(&self) -> TokenPhase {
TokenPhase::FractionDetection
}
fn priority(&self) -> u16 {
50 }
fn apply<'a>(
&self,
tokens: &[Token<'a>],
index: usize,
state: &mut EncoderState,
) -> Result<TokenAction<'a>, String> {
apply::run(tokens, index, state)
}
}
#[cfg(test)]
mod tests {
use super::detect::is_math_expression;
use super::helpers::*;
use super::*;
use crate::rules::math::math_token_rule::MathContext;
use crate::rules::token::WordMeta;
use std::borrow::Cow;
#[test]
fn test_is_math_with_operator() {
let chars: Vec<char> = "ax+b=0".chars().collect();
assert!(is_math_expression(&chars, "ax+b=0"));
}
#[test]
fn test_is_math_with_function() {
let chars: Vec<char> = "sin3x".chars().collect();
assert!(is_math_expression(&chars, "sin3x"));
}
#[test]
fn test_is_math_with_standalone_function_name() {
let chars: Vec<char> = "sin".chars().collect();
assert!(is_math_expression(&chars, "sin"));
}
#[test]
fn test_is_not_math_korean() {
let chars: Vec<char> = "안녕".chars().collect();
assert!(!is_math_expression(&chars, "안녕"));
}
#[test]
fn test_is_not_math_plain_english() {
let chars: Vec<char> = "hello".chars().collect();
assert!(!is_math_expression(&chars, "hello"));
}
#[test]
fn test_is_math_with_superscript() {
let chars: Vec<char> = "x²".chars().collect();
assert!(is_math_expression(&chars, "x²"));
}
#[test]
fn test_is_math_digit_letter_with_operator() {
let chars: Vec<char> = "3a+b".chars().collect();
assert!(is_math_expression(&chars, "3a+b"));
}
#[test]
fn test_is_math_digit_then_letter() {
let chars: Vec<char> = "3ab".chars().collect();
assert!(is_math_expression(&chars, "3ab"));
}
#[test]
fn test_is_not_math_letter_then_digit() {
let chars: Vec<char> = "MP3".chars().collect();
assert!(!is_math_expression(&chars, "MP3"));
}
#[test]
fn test_is_math_symbol_digit_combo() {
let chars: Vec<char> = "≠0".chars().collect();
assert!(is_math_expression(&chars, "≠0"));
}
#[test]
fn test_decimal_starting_with_digit_is_not_math() {
let chars: Vec<char> = "0.17".chars().collect();
assert!(!is_math_expression(&chars, "0.17"));
let chars: Vec<char> = "96.7".chars().collect();
assert!(!is_math_expression(&chars, "96.7"));
}
#[test]
fn test_decimal_starting_with_dot_is_math() {
let chars: Vec<char> = ".47".chars().collect();
assert!(is_math_expression(&chars, ".47"));
}
#[test]
fn test_is_math_relation_shorthand() {
let chars: Vec<char> = "aRb".chars().collect();
assert!(is_math_expression(&chars, "aRb"));
}
#[rstest::rstest]
#[case("arcsinx")]
#[case("arccosy")]
#[case("arctanz")]
fn test_is_math_arctrig_prefix(#[case] input: &str) {
let chars: Vec<char> = input.chars().collect();
assert!(is_math_expression(&chars, input), "input={input}");
}
#[rstest::rstest]
#[case::upper_force_normal("F/N", true)]
#[case::lower_pair("a/b", true)]
#[case::xy_pair("x/y", true)]
#[case::pq_pair("P/Q", true)]
#[case::trailing_slash_not_math("a/", false)]
fn test_is_math_letter_slash_letter_fraction(#[case] input: &str, #[case] expected: bool) {
let chars: Vec<char> = input.chars().collect();
assert_eq!(is_math_expression(&chars, input), expected, "input={input}");
}
#[rstest::rstest]
#[case("-3")]
#[case("-1.5")]
#[case("−7")]
#[case("-3x")]
#[case("−5y")]
fn test_is_math_signed_numeric(#[case] input: &str) {
let chars: Vec<char> = input.chars().collect();
assert!(is_math_expression(&chars, input), "input={input}");
}
#[test]
fn test_is_math_negative_infinity() {
let chars: Vec<char> = "-∞".chars().collect();
assert!(is_math_expression(&chars, "-∞"));
}
#[test]
fn test_is_math_unicode_fraction_char() {
let chars: Vec<char> = "⅔".chars().collect();
assert!(is_math_expression(&chars, "⅔"));
}
#[test]
fn test_is_math_base_notation() {
let chars: Vec<char> = "1010₂".chars().collect();
assert!(is_math_expression(&chars, "1010₂"));
}
#[test]
fn split_mixed_math_word_extracts_math_prefix() {
let chars: Vec<char> = "tan의".chars().collect();
let word = crate::rules::token::WordToken {
text: Cow::Borrowed("tan의"),
chars: chars.clone(),
meta: WordMeta::from_chars(&chars),
};
let replacement =
split_mixed_math_word(&word, 2, MathContext::default()).expect("expected split");
assert!(matches!(replacement[0], Token::PreEncoded(ref bytes) if bytes == &vec![0, 0]));
assert!(matches!(replacement[1], Token::PreEncoded(_)));
assert!(matches!(replacement[2], Token::PreEncoded(ref bytes) if bytes == &vec![0, 0]));
assert!(matches!(&replacement[3], Token::Word(w) if w.text == "의"));
}
#[test]
fn split_mixed_math_word_keeps_plain_mixed_english_korean() {
let chars: Vec<char> = "ATM에서".chars().collect();
let word = crate::rules::token::WordToken {
text: Cow::Borrowed("ATM에서"),
chars: chars.clone(),
meta: WordMeta::from_chars(&chars),
};
assert!(split_mixed_math_word(&word, 2, MathContext::default()).is_none());
}
fn enc(input: &str) -> Vec<u8> {
crate::encode(input).unwrap_or_default()
}
#[test]
fn is_superscript_table() {
for c in ['\u{2070}', '\u{00B9}', '\u{00B2}', '\u{00B3}'] {
assert!(is_superscript(c));
}
assert!(!is_superscript('1'));
assert!(!is_superscript('a'));
}
#[test]
fn is_subscript_table() {
for c in ['\u{2080}', '\u{2081}', '\u{2082}'] {
assert!(is_subscript(c));
}
assert!(!is_subscript('1'));
}
#[test]
fn is_combining_math_mark_table() {
assert!(is_combining_math_mark('\u{0304}'));
assert!(is_combining_math_mark('\u{0305}'));
assert!(!is_combining_math_mark('a'));
}
#[test]
fn is_middle_dot_numeric_word_paths() {
let chars: Vec<char> = "1·2".chars().collect();
assert!(is_middle_dot_numeric_word(&chars));
let chars: Vec<char> = "ab".chars().collect();
assert!(!is_middle_dot_numeric_word(&chars));
let chars: Vec<char> = "".chars().collect();
assert!(!is_middle_dot_numeric_word(&chars));
}
#[test]
fn is_korean_char_paths() {
assert!(is_korean_char('가'));
assert!(!is_korean_char('a'));
assert!(!is_korean_char('1'));
}
#[test]
fn is_korean_suffix_char_paths() {
let _ = is_korean_suffix_char('가');
let _ = is_korean_suffix_char('a');
}
#[test]
fn rule_44_space_before_korean_paths() {
let _ = rule_44_requires_space_before_korean("abc가");
let _ = rule_44_requires_space_before_korean("123");
let _ = rule_44_requires_space_before_korean("");
}
#[test]
fn is_strong_mixed_math_candidate_paths() {
let chars: Vec<char> = "a+b".chars().collect();
let _ = is_strong_mixed_math_candidate(&chars, "a+b");
let chars: Vec<char> = "".chars().collect();
let _ = is_strong_mixed_math_candidate(&chars, "");
}
#[test]
fn is_rule_68_compact_notation_paths() {
let chars: Vec<char> = "A⁺".chars().collect();
let _ = is_rule_68_compact_notation(&chars);
let chars: Vec<char> = "hello".chars().collect();
assert!(!is_rule_68_compact_notation(&chars));
}
#[test]
fn math_expression_diverse_inputs() {
let inputs: &[&str] = &[
"ax+b=0",
"1+2=3",
"x²",
"y₂",
"x²+y²=r²",
"1·2",
"3·4",
"$x \\bar{a}$",
"$\\overline{AB}$",
"ATM에서",
"1+1=2가",
"f'(x)",
"f''(x)",
"x^2_n",
"a^2 b^2",
];
for input in inputs {
let _ = enc(input);
}
}
#[test]
fn build_word_token_basic() {
let t = build_word_token("hello".to_string());
assert!(matches!(t, Token::Word(_)));
}
#[test]
fn try_encode_math_slice_paths() {
let chars: Vec<char> = "1+2".chars().collect();
let _ = try_encode_math_slice(&chars, MathContext::default());
let chars: Vec<char> = "abc".chars().collect();
let _ = try_encode_math_slice(&chars, MathContext::default());
}
#[test]
fn try_encode_mixed_math_slice_paths() {
let chars: Vec<char> = "1+2가".chars().collect();
let _ = try_encode_mixed_math_slice(&chars, MathContext::default());
}
#[test]
fn try_encode_mixed_math_prefix_paths() {
let prefix: Vec<char> = "1+2".chars().collect();
let suffix: Vec<char> = "가".chars().collect();
let _ = try_encode_mixed_math_prefix(&prefix, &suffix, MathContext::default());
}
}