use super::BracketKind;
use super::MathToken;
pub(super) fn is_korean_char(c: char) -> bool {
let code = c as u32;
(0xAC00..=0xD7A3).contains(&code) || (0x3131..=0x3163).contains(&code)
}
pub(super) fn is_superscript_char(c: char) -> bool {
matches!(
c,
'\u{2070}' | '\u{00B9}' | '\u{00B2}' | '\u{00B3}' | '\u{2074}'
..='\u{2079}'
| '\u{207A}'
| '\u{207B}'
| '\u{207D}'
| '\u{207E}'
| '\u{207F}'
| '\u{1D43}' | '\u{1D47}' | '\u{1D9C}' | '\u{1D48}' | '\u{1D49}' | '\u{1DA0}' | '\u{1D4D}' | '\u{02B0}' | '\u{2071}' | '\u{02B2}' | '\u{1D4F}' | '\u{02E1}' | '\u{1D50}' | '\u{1D52}' | '\u{1D56}' | '\u{02B3}' | '\u{02E2}' | '\u{1D57}' | '\u{1D58}' | '\u{1D5B}' | '\u{02B7}' | '\u{02E3}' | '\u{02B8}' | '\u{1DBB}' )
}
pub(super) fn is_subscript_char(c: char) -> bool {
matches!(
c,
'\u{2080}'..='\u{2089}' | '\u{208A}' | '\u{208B}' | '\u{208D}' | '\u{208E}'
| '\u{2090}'..='\u{209C}' | '\u{1D62}'..='\u{1D65}' )
}
pub(super) fn normalize_superscript(c: char) -> Option<MathToken> {
match c {
'\u{2070}' => Some(MathToken::Number("0".into())),
'\u{00B9}' => Some(MathToken::Number("1".into())),
'\u{00B2}' => Some(MathToken::Number("2".into())),
'\u{00B3}' => Some(MathToken::Number("3".into())),
'\u{2074}' => Some(MathToken::Number("4".into())),
'\u{2075}' => Some(MathToken::Number("5".into())),
'\u{2076}' => Some(MathToken::Number("6".into())),
'\u{2077}' => Some(MathToken::Number("7".into())),
'\u{2078}' => Some(MathToken::Number("8".into())),
'\u{2079}' => Some(MathToken::Number("9".into())),
'\u{207A}' => Some(MathToken::Operator('+')),
'\u{207B}' => Some(MathToken::Operator('\u{2212}')),
'\u{207D}' => Some(MathToken::OpenParen(BracketKind::MathParen)),
'\u{207E}' => Some(MathToken::CloseParen(BracketKind::MathParen)),
'\u{207F}' => Some(MathToken::Variable('n')),
'\u{1D43}' => Some(MathToken::Variable('a')),
'\u{1D47}' => Some(MathToken::Variable('b')),
'\u{1D9C}' => Some(MathToken::Variable('c')),
'\u{1D48}' => Some(MathToken::Variable('d')),
'\u{1D49}' => Some(MathToken::Variable('e')),
'\u{1DA0}' => Some(MathToken::Variable('f')),
'\u{1D4D}' => Some(MathToken::Variable('g')),
'\u{02B0}' => Some(MathToken::Variable('h')),
'\u{2071}' => Some(MathToken::Variable('i')),
'\u{02B2}' => Some(MathToken::Variable('j')),
'\u{1D4F}' => Some(MathToken::Variable('k')),
'\u{02E1}' => Some(MathToken::Variable('l')),
'\u{1D50}' => Some(MathToken::Variable('m')),
'\u{1D52}' => Some(MathToken::Variable('o')),
'\u{1D56}' => Some(MathToken::Variable('p')),
'\u{02B3}' => Some(MathToken::Variable('r')),
'\u{02E2}' => Some(MathToken::Variable('s')),
'\u{1D57}' => Some(MathToken::Variable('t')),
'\u{1D58}' => Some(MathToken::Variable('u')),
'\u{1D5B}' => Some(MathToken::Variable('v')),
'\u{02B7}' => Some(MathToken::Variable('w')),
'\u{02E3}' => Some(MathToken::Variable('x')),
'\u{02B8}' => Some(MathToken::Variable('y')),
'\u{1DBB}' => Some(MathToken::Variable('z')),
_ => None,
}
}
pub(super) fn normalize_subscript(c: char) -> Option<MathToken> {
match c {
'\u{2080}' => Some(MathToken::Number("0".into())),
'\u{2081}' => Some(MathToken::Number("1".into())),
'\u{2082}' => Some(MathToken::Number("2".into())),
'\u{2083}' => Some(MathToken::Number("3".into())),
'\u{2084}' => Some(MathToken::Number("4".into())),
'\u{2085}' => Some(MathToken::Number("5".into())),
'\u{2086}' => Some(MathToken::Number("6".into())),
'\u{2087}' => Some(MathToken::Number("7".into())),
'\u{2088}' => Some(MathToken::Number("8".into())),
'\u{2089}' => Some(MathToken::Number("9".into())),
'\u{208A}' => Some(MathToken::Operator('+')),
'\u{208B}' => Some(MathToken::Operator('\u{2212}')),
'\u{208D}' => Some(MathToken::OpenParen(BracketKind::MathParen)),
'\u{208E}' => Some(MathToken::CloseParen(BracketKind::MathParen)),
'\u{2090}' => Some(MathToken::Variable('a')),
'\u{2091}' => Some(MathToken::Variable('e')),
'\u{2092}' => Some(MathToken::Variable('o')),
'\u{2093}' => Some(MathToken::Variable('x')),
'\u{2095}' => Some(MathToken::Variable('h')),
'\u{2096}' => Some(MathToken::Variable('k')),
'\u{2097}' => Some(MathToken::Variable('l')),
'\u{2098}' => Some(MathToken::Variable('m')),
'\u{2099}' => Some(MathToken::Variable('n')),
'\u{209A}' => Some(MathToken::Variable('p')),
'\u{209B}' => Some(MathToken::Variable('s')),
'\u{209C}' => Some(MathToken::Variable('t')),
'\u{1D62}' => Some(MathToken::Variable('i')),
'\u{1D63}' => Some(MathToken::Variable('r')),
'\u{1D64}' => Some(MathToken::Variable('u')),
'\u{1D65}' => Some(MathToken::Variable('v')),
_ => None,
}
}
pub(super) fn normalize_math_alphanumeric(c: char) -> char {
let cp = c as u32;
if cp == 0x210E {
return 'h';
}
const BLOCKS: &[(u32, char)] = &[
(0x1D400, 'A'),
(0x1D41A, 'a'), (0x1D434, 'A'),
(0x1D44E, 'a'), (0x1D468, 'A'),
(0x1D482, 'a'), (0x1D49C, 'A'),
(0x1D4B6, 'a'), (0x1D4D0, 'A'),
(0x1D4EA, 'a'), (0x1D504, 'A'),
(0x1D51E, 'a'), (0x1D538, 'A'),
(0x1D552, 'a'), (0x1D56C, 'A'),
(0x1D586, 'a'), (0x1D5A0, 'A'),
(0x1D5BA, 'a'), (0x1D5D4, 'A'),
(0x1D5EE, 'a'), (0x1D608, 'A'),
(0x1D622, 'a'), (0x1D63C, 'A'),
(0x1D656, 'a'), (0x1D670, 'A'),
(0x1D68A, 'a'), ];
for &(start, base) in BLOCKS {
if cp >= start && cp < start + 26 {
return char::from_u32(base as u32 + (cp - start)).unwrap_or(c);
}
}
const DIGIT_BLOCKS: &[u32] = &[0x1D7CE, 0x1D7D8, 0x1D7E2, 0x1D7EC, 0x1D7F6];
for &start in DIGIT_BLOCKS {
if cp >= start && cp < start + 10 {
return char::from_u32(b'0' as u32 + (cp - start)).unwrap_or(c);
}
}
c
}
#[cfg(test)]
mod tests {
use super::*;
#[rstest::rstest]
#[case('a')]
#[case('1')]
fn normalize_superscript_none_for_non_superscript(#[case] ch: char) {
assert!(normalize_superscript(ch).is_none());
}
#[rstest::rstest]
#[case('a')]
#[case('Z')]
fn normalize_subscript_none_for_non_subscript(#[case] ch: char) {
assert!(normalize_subscript(ch).is_none());
}
#[rstest::rstest]
#[case('\u{210E}', 'h')]
#[case('\u{1D400}', 'A')]
#[case('\u{1D434}', 'A')]
#[case('\u{1D7CE}', '0')]
#[case('\u{1D7D8}', '0')]
fn normalize_math_alphanumeric_table(#[case] input: char, #[case] expected: char) {
assert_eq!(normalize_math_alphanumeric(input), expected);
}
}