braillify 2.0.0

use crate::char_struct::CharType;
use crate::english;
use crate::rules::RuleMeta;
use crate::rules::context::RuleContext;
use crate::rules::korean::rule_29::ROMAN_INDICATOR;
use crate::rules::traits::{BrailleRule, Phase, RuleResult};

pub static META: RuleMeta = RuleMeta {
    section: "68",
    subsection: None,
    name: "superscript_subscript_symbols",
    standard_ref: "2024 Korean Braille Standard, Ch.6 Art.68",
    description: "Superscripts, subscripts, and selected compact unit symbols",
};

const MAPPINGS: &[(char, &str)] = &[
    ('㎡', "⠴⠍⠘⠼⠃"),
    ('㏊', "⠴⠓⠁⠲"),
    ('⁺', "⠘⠢"),
    ('⁻', "⠘⠔"),
    ('₆', "⠰⠼⠋"),
    ('₉', "⠰⠼⠊"),
];

const GRADE_MINUS: [u8; 2] = [
    crate::unicode::decode_unicode('⠘'),
    crate::unicode::decode_unicode('⠔'),
];
const SUPERSCRIPT_PREFIX: u8 = crate::unicode::decode_unicode('⠘');
const SUBSCRIPT_PREFIX: u8 = crate::unicode::decode_unicode('⠰');
const NUMBER_PREFIX: u8 = crate::unicode::decode_unicode('⠼');
const ENGLISH_PREFIX: u8 = crate::unicode::decode_unicode('⠴');
const UPPERCASE_PREFIX: u8 = crate::unicode::decode_unicode('⠠');

fn encode_unicode_cells(unicode: &str) -> Vec<u8> {
    unicode
        .chars()
        .map(crate::unicode::decode_unicode)
        .collect()
}

fn should_insert_separator_after_symbol(ctx: &RuleContext) -> bool {
    matches!(ctx.current_char(), '㎡') && matches!(ctx.next_char(), Some('는' | '은'))
}

pub fn is_rule_68_symbol(c: char) -> bool {
    MAPPINGS.iter().any(|(candidate, _)| *candidate == c)
}

fn is_superscript_symbol(c: char) -> bool {
    matches!(c, '⁺' | '⁻')
}

fn is_subscript_digit(c: char) -> bool {
    matches!(c, '₀'..='₉')
}

fn is_grade_notation(word: &[char], index: usize) -> bool {
    matches!(word.get(index), Some(ch) if ch.is_ascii_uppercase())
        && matches!(word.get(index + 1), Some('-'))
        && word.len() == index + 2
}

fn is_compact_ascii_notation(word: &[char], index: usize) -> bool {
    matches!(word.get(index), Some(ch) if ch.is_ascii_uppercase())
        && word
            .get(index + 1)
            .is_some_and(|next| is_superscript_symbol(*next) || is_subscript_digit(*next))
}

fn encode_compact_ascii_notation(
    word: &[char],
    index: usize,
    needs_roman_indicator: bool,
) -> Result<Option<(Vec<u8>, usize)>, String> {
    let Some(base) = word.get(index).copied() else {
        return Ok(None);
    };

    if !base.is_ascii_uppercase() {
        return Ok(None);
    }

    let mut encoded = Vec::new();
    if needs_roman_indicator {
        encoded.push(ENGLISH_PREFIX);
    }
    encoded.push(UPPERCASE_PREFIX);
    encoded.push(english::encode_english(base)?);
    let mut consumed = 1usize;
    let mut cursor = index + 1;

    if word.get(cursor) == Some(&'-') {
        encoded.extend_from_slice(&GRADE_MINUS);
        consumed += 1;
        return Ok(Some((encoded, consumed)));
    }

    if word
        .get(cursor)
        .is_some_and(|ch| is_superscript_symbol(*ch))
    {
        encoded.push(SUPERSCRIPT_PREFIX);
        while let Some(ch) = word.get(cursor).copied() {
            let cell = match ch {
                '⁺' => crate::unicode::decode_unicode('⠢'),
                '⁻' => crate::unicode::decode_unicode('⠔'),
                _ => break,
            };
            encoded.push(cell);
            consumed += 1;
            cursor += 1;
        }
        return Ok(Some((encoded, consumed)));
    }

    if word.get(cursor).is_some_and(|ch| is_subscript_digit(*ch)) {
        encoded.push(SUBSCRIPT_PREFIX);
        encoded.push(NUMBER_PREFIX);
        while let Some(ch) = word.get(cursor).copied() {
            let digit = match ch {
                '₀' => '0',
                '₁' => '1',
                '₂' => '2',
                '₃' => '3',
                '₄' => '4',
                '₅' => '5',
                '₆' => '6',
                '₇' => '7',
                '₈' => '8',
                '₉' => '9',
                _ => break,
            };
            encoded.push(crate::number::encode_number(digit)?);
            consumed += 1;
            cursor += 1;
        }
        return Ok(Some((encoded, consumed)));
    }

    Ok(None)
}

pub struct Rule68;

impl BrailleRule for Rule68 {
    fn meta(&self) -> &'static RuleMeta {
        &META
    }

    fn phase(&self) -> Phase {
        Phase::CoreEncoding
    }

    fn priority(&self) -> u16 {
        90
    }

    fn matches(&self, ctx: &RuleContext) -> bool {
        matches!(ctx.char_type, CharType::Symbol(c) if is_rule_68_symbol(*c))
            || matches!(ctx.char_type, CharType::English(_)
                if is_compact_ascii_notation(ctx.word_chars, ctx.index)
                    || is_grade_notation(ctx.word_chars, ctx.index))
    }

    fn apply(&self, ctx: &mut RuleContext) -> Result<RuleResult, String> {
        if matches!(ctx.char_type, CharType::English(_))
            && let Some((encoded, consumed)) = encode_compact_ascii_notation(
                ctx.word_chars,
                ctx.index,
                !ctx.state.is_english && ctx.result.last().copied() != Some(ROMAN_INDICATOR),
            )?
        {
            ctx.emit_slice(&encoded);
            ctx.state.is_english = false;
            ctx.state.needs_english_continuation = false;
            *ctx.skip_count = consumed.saturating_sub(1);
            return Ok(RuleResult::Consumed);
        }

        let Some((_, unicode)) = MAPPINGS
            .iter()
            .find(|(candidate, _)| *candidate == ctx.current_char())
        else {
            return Ok(RuleResult::Skip);
        };
        let encoded = encode_unicode_cells(unicode);
        ctx.emit_slice(&encoded);
        if should_insert_separator_after_symbol(ctx) {
            ctx.emit(0);
        }
        Ok(RuleResult::Consumed)
    }
}