use crate::char_struct::CharType;
use crate::english;
use crate::rules::RuleMeta;
use crate::rules::context::RuleContext;
use crate::rules::korean::rule_29::ROMAN_INDICATOR;
use crate::rules::traits::{BrailleRule, Phase, RuleResult};
pub static META: RuleMeta = RuleMeta {
section: "68",
subsection: None,
name: "superscript_subscript_symbols",
standard_ref: "2024 Korean Braille Standard, Ch.6 Art.68",
description: "Superscripts, subscripts, and selected compact unit symbols",
};
const MAPPINGS: &[(char, &str)] = &[
('㎡', "⠴⠍⠘⠼⠃"),
('㏊', "⠴⠓⠁⠲"),
('⁺', "⠘⠢"),
('⁻', "⠘⠔"),
('₆', "⠰⠼⠋"),
('₉', "⠰⠼⠊"),
];
const GRADE_MINUS: [u8; 2] = [
crate::unicode::decode_unicode('⠘'),
crate::unicode::decode_unicode('⠔'),
];
const SUPERSCRIPT_PREFIX: u8 = crate::unicode::decode_unicode('⠘');
const SUBSCRIPT_PREFIX: u8 = crate::unicode::decode_unicode('⠰');
const NUMBER_PREFIX: u8 = crate::unicode::decode_unicode('⠼');
const ENGLISH_PREFIX: u8 = crate::unicode::decode_unicode('⠴');
const UPPERCASE_PREFIX: u8 = crate::unicode::decode_unicode('⠠');
fn encode_unicode_cells(unicode: &str) -> Vec<u8> {
unicode
.chars()
.map(crate::unicode::decode_unicode)
.collect()
}
fn should_insert_separator_after_symbol(ctx: &RuleContext) -> bool {
matches!(ctx.current_char(), '㎡') && matches!(ctx.next_char(), Some('는' | '은'))
}
pub fn is_rule_68_symbol(c: char) -> bool {
MAPPINGS.iter().any(|(candidate, _)| *candidate == c)
}
fn is_superscript_symbol(c: char) -> bool {
matches!(c, '⁺' | '⁻')
}
fn is_subscript_digit(c: char) -> bool {
matches!(c, '₀'..='₉')
}
fn is_grade_notation(word: &[char], index: usize) -> bool {
matches!(word.get(index), Some(ch) if ch.is_ascii_uppercase())
&& matches!(word.get(index + 1), Some('-'))
&& word.len() == index + 2
}
fn is_compact_ascii_notation(word: &[char], index: usize) -> bool {
matches!(word.get(index), Some(ch) if ch.is_ascii_uppercase())
&& word
.get(index + 1)
.is_some_and(|next| is_superscript_symbol(*next) || is_subscript_digit(*next))
}
fn encode_compact_ascii_notation(
word: &[char],
index: usize,
needs_roman_indicator: bool,
) -> Result<Option<(Vec<u8>, usize)>, String> {
let Some(base) = word.get(index).copied() else {
return Ok(None);
};
if !base.is_ascii_uppercase() {
return Ok(None);
}
let mut encoded = Vec::new();
if needs_roman_indicator {
encoded.push(ENGLISH_PREFIX);
}
encoded.push(UPPERCASE_PREFIX);
encoded.push(english::encode_english(base)?);
let mut consumed = 1usize;
let mut cursor = index + 1;
if word.get(cursor) == Some(&'-') {
encoded.extend_from_slice(&GRADE_MINUS);
consumed += 1;
return Ok(Some((encoded, consumed)));
}
if word
.get(cursor)
.is_some_and(|ch| is_superscript_symbol(*ch))
{
encoded.push(SUPERSCRIPT_PREFIX);
while let Some(ch) = word.get(cursor).copied() {
let cell = match ch {
'⁺' => crate::unicode::decode_unicode('⠢'),
'⁻' => crate::unicode::decode_unicode('⠔'),
_ => break,
};
encoded.push(cell);
consumed += 1;
cursor += 1;
}
return Ok(Some((encoded, consumed)));
}
if word.get(cursor).is_some_and(|ch| is_subscript_digit(*ch)) {
encoded.push(SUBSCRIPT_PREFIX);
encoded.push(NUMBER_PREFIX);
while let Some(ch) = word.get(cursor).copied() {
let digit = match ch {
'₀' => '0',
'₁' => '1',
'₂' => '2',
'₃' => '3',
'₄' => '4',
'₅' => '5',
'₆' => '6',
'₇' => '7',
'₈' => '8',
'₉' => '9',
_ => break,
};
encoded.push(crate::number::encode_number(digit)?);
consumed += 1;
cursor += 1;
}
return Ok(Some((encoded, consumed)));
}
Ok(None)
}
pub struct Rule68;
impl BrailleRule for Rule68 {
fn meta(&self) -> &'static RuleMeta {
&META
}
fn phase(&self) -> Phase {
Phase::CoreEncoding
}
fn priority(&self) -> u16 {
90
}
fn matches(&self, ctx: &RuleContext) -> bool {
matches!(ctx.char_type, CharType::Symbol(c) if is_rule_68_symbol(*c))
|| matches!(ctx.char_type, CharType::English(_)
if is_compact_ascii_notation(ctx.word_chars, ctx.index)
|| is_grade_notation(ctx.word_chars, ctx.index))
|| (matches!(
ctx.char_type,
CharType::MathSymbol('+') | CharType::Symbol('+')
) && is_digit_grade_plus_notation(ctx.word_chars, ctx.index))
}
fn apply(&self, ctx: &mut RuleContext) -> Result<RuleResult, String> {
if matches!(ctx.char_type, CharType::English(_))
&& let Some((encoded, consumed)) = encode_compact_ascii_notation(
ctx.word_chars,
ctx.index,
!ctx.state.is_english && ctx.result.last().copied() != Some(ROMAN_INDICATOR),
)?
{
ctx.emit_slice(&encoded);
ctx.state.is_english = false;
ctx.state.needs_english_continuation = false;
*ctx.skip_count = consumed.saturating_sub(1);
return Ok(RuleResult::Consumed);
}
if matches!(
ctx.char_type,
CharType::MathSymbol('+') | CharType::Symbol('+')
) && is_digit_grade_plus_notation(ctx.word_chars, ctx.index)
{
ctx.emit(SUPERSCRIPT_PREFIX);
let mut cursor = ctx.index;
let mut consumed = 0usize;
while let Some(&ch) = ctx.word_chars.get(cursor) {
let cell = match ch {
'+' => crate::unicode::decode_unicode('⠢'),
'-' => crate::unicode::decode_unicode('⠔'),
_ => break,
};
ctx.emit(cell);
consumed += 1;
cursor += 1;
}
if ctx
.word_chars
.get(cursor)
.is_some_and(|c| crate::utils::is_korean_char(*c))
{
ctx.emit(0);
}
*ctx.skip_count = consumed.saturating_sub(1);
return Ok(RuleResult::Consumed);
}
let Some((_, unicode)) = MAPPINGS
.iter()
.find(|(candidate, _)| *candidate == ctx.current_char())
else {
return Ok(RuleResult::Skip);
};
let encoded = encode_unicode_cells(unicode);
ctx.emit_slice(&encoded);
if should_insert_separator_after_symbol(ctx) {
ctx.emit(0);
}
Ok(RuleResult::Consumed)
}
}
fn is_digit_grade_plus_notation(word: &[char], index: usize) -> bool {
if index == 0 {
return false;
}
if !word.get(index - 1).is_some_and(|c| c.is_ascii_digit()) {
return false;
}
let mut cursor = index;
while let Some(&ch) = word.get(cursor) {
if matches!(ch, '+' | '-') {
cursor += 1;
} else {
break;
}
}
word.get(cursor)
.is_some_and(|c| crate::utils::is_korean_char(*c))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn is_rule_68_symbol_recognises_each_entry() {
for (c, _) in MAPPINGS {
assert!(is_rule_68_symbol(*c), "should recognise {c}");
}
assert!(!is_rule_68_symbol('a'));
assert!(!is_rule_68_symbol('1'));
}
#[test]
fn is_superscript_symbol_plus_minus_only() {
assert!(is_superscript_symbol('⁺'));
assert!(is_superscript_symbol('⁻'));
assert!(!is_superscript_symbol('a'));
}
#[test]
fn is_subscript_digit_recognises_each() {
for c in ['₀', '₁', '₂', '₃', '₄', '₅', '₆', '₇', '₈', '₉'] {
assert!(is_subscript_digit(c), "should recognise {c}");
}
assert!(!is_subscript_digit('0'));
assert!(!is_subscript_digit('a'));
}
#[test]
fn is_grade_notation_paths() {
let word: Vec<char> = "A-".chars().collect();
assert!(is_grade_notation(&word, 0));
let word: Vec<char> = "A-x".chars().collect();
assert!(!is_grade_notation(&word, 0));
let word: Vec<char> = "a-".chars().collect();
assert!(!is_grade_notation(&word, 0));
let word: Vec<char> = "A".chars().collect();
assert!(!is_grade_notation(&word, 0));
}
#[test]
fn is_compact_ascii_notation_paths() {
let word: Vec<char> = "A⁺".chars().collect();
assert!(is_compact_ascii_notation(&word, 0));
let word: Vec<char> = "B₃".chars().collect();
assert!(is_compact_ascii_notation(&word, 0));
let word: Vec<char> = "a⁺".chars().collect();
assert!(!is_compact_ascii_notation(&word, 0));
let word: Vec<char> = "A".chars().collect();
assert!(!is_compact_ascii_notation(&word, 0));
let word: Vec<char> = "Ab".chars().collect();
assert!(!is_compact_ascii_notation(&word, 0));
}
#[test]
fn encode_compact_ascii_notation_grade_minus() {
let word: Vec<char> = "A-".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, true).unwrap();
assert!(result.is_some());
let (_, consumed) = result.unwrap();
assert_eq!(consumed, 2);
}
#[test]
fn encode_compact_ascii_notation_superscript() {
let word: Vec<char> = "A⁺⁻".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, false).unwrap();
assert!(result.is_some());
let (_, consumed) = result.unwrap();
assert_eq!(consumed, 3);
}
#[test]
fn encode_compact_ascii_notation_subscript() {
let word: Vec<char> = "B₃".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, true).unwrap();
assert!(result.is_some());
let (_, consumed) = result.unwrap();
assert_eq!(consumed, 2);
}
#[test]
fn encode_compact_ascii_notation_non_ascii_returns_none() {
let word: Vec<char> = "가".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, false).unwrap();
assert!(result.is_none());
}
#[test]
fn encode_compact_ascii_notation_lowercase_returns_none() {
let word: Vec<char> = "a⁺".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, false).unwrap();
assert!(result.is_none());
}
#[test]
fn encode_compact_ascii_notation_out_of_bounds() {
let word: Vec<char> = "A".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, false).unwrap();
assert!(result.is_none());
let result = encode_compact_ascii_notation(&word, 99, false).unwrap();
assert!(result.is_none());
}
#[test]
fn is_digit_grade_plus_notation_paths() {
let word: Vec<char> = "1+등급".chars().collect();
assert!(is_digit_grade_plus_notation(&word, 1));
let word: Vec<char> = "1++등급".chars().collect();
assert!(is_digit_grade_plus_notation(&word, 1));
assert!(!is_digit_grade_plus_notation(&word, 0));
let word: Vec<char> = "1++x".chars().collect();
assert!(!is_digit_grade_plus_notation(&word, 1));
let word: Vec<char> = "a+등급".chars().collect();
assert!(!is_digit_grade_plus_notation(&word, 1));
}
#[test]
fn rule68_meta_phase_priority() {
let r = Rule68;
assert_eq!(r.meta().section, "68");
assert!(matches!(r.phase(), Phase::CoreEncoding));
assert_eq!(r.priority(), 90);
}
#[test]
fn rule68_apply_emits_for_known_symbol() {
use crate::char_struct::CharType;
let word: Vec<char> = "㎡".chars().collect();
let ct = CharType::Symbol('㎡');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
let mut out = Vec::new();
let mut ctx = RuleContext {
word_chars: &word,
index: 0,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: false,
is_all_uppercase: false,
ascii_starts_at_beginning: false,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let res = Rule68.apply(&mut ctx).unwrap();
assert!(matches!(res, RuleResult::Consumed));
assert!(!out.is_empty());
}
#[test]
fn rule68_apply_digit_grade_plus_followed_by_korean() {
let word: Vec<char> = "1+등급".chars().collect();
let ct = CharType::MathSymbol('+');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
let mut out = Vec::new();
let mut ctx = RuleContext {
word_chars: &word,
index: 1,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: true,
is_all_uppercase: false,
ascii_starts_at_beginning: false,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let outcome = Rule68.apply(&mut ctx).unwrap();
assert!(matches!(outcome, RuleResult::Consumed));
assert_eq!(out[0], SUPERSCRIPT_PREFIX);
assert_eq!(*out.last().unwrap(), 0);
}
#[test]
fn rule68_apply_compact_ascii_uppercase_superscript() {
let word: Vec<char> = "A⁺".chars().collect();
let ct = CharType::English('A');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
let mut out = Vec::new();
let mut ctx = RuleContext {
word_chars: &word,
index: 0,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: false,
is_all_uppercase: true,
ascii_starts_at_beginning: true,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let outcome = Rule68.apply(&mut ctx).unwrap();
assert!(matches!(outcome, RuleResult::Consumed));
assert!(!out.is_empty());
}
#[test]
fn should_insert_separator_after_symbol_only_for_specific_pattern() {
use crate::char_struct::CharType;
let word: Vec<char> = "㎡는".chars().collect();
let ct = CharType::Symbol('㎡');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
let mut out = Vec::new();
let ctx = RuleContext {
word_chars: &word,
index: 0,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: false,
is_all_uppercase: false,
ascii_starts_at_beginning: false,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
assert!(should_insert_separator_after_symbol(&ctx));
}
#[test]
fn rule68_digit_grade_with_minus_in_chain() {
let _ = crate::encode("1+-가");
let _ = crate::encode("5-+나");
let _ = crate::encode("3--다");
}
#[test]
fn rule68_superscript_block_breaks_on_non_super_direct() {
let word: Vec<char> = "A\u{207A}x".chars().collect();
let result = encode_compact_ascii_notation(&word, 0, false).unwrap();
assert!(result.is_some());
let (_, consumed) = result.unwrap();
assert_eq!(consumed, 2);
}
#[test]
fn rule68_apply_falls_through_to_mappings_skip() {
let word: Vec<char> = "AB".chars().collect();
let ct = CharType::English('B');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
let mut out = Vec::new();
let mut ctx = RuleContext {
word_chars: &word,
index: 1,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: false,
is_all_uppercase: true,
ascii_starts_at_beginning: true,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let outcome = Rule68.apply(&mut ctx).unwrap();
assert!(matches!(outcome, RuleResult::Skip));
}
}