braillify 2.0.0

Rust 기반 크로스플랫폼 한국어 점역 라이브러리
Documentation
//! 제41항 — 숫자 또는 로마자 구간에서 쉼표는 ⠂(2)으로 적는다.
//!
//! When a comma appears between digits (e.g., "1,000") or between ASCII letters
//! and alphanumeric characters, it uses the numeric comma ⠂ instead of the
//! standard Korean comma ⠐.
//!
//! Reference: 2024 Korean Braille Standard, Chapter 5, Section 11, Article 41

use crate::char_struct::CharType;
use crate::rules::RuleMeta;
use crate::rules::context::RuleContext;
use crate::rules::traits::{BrailleRule, Phase, RuleResult};
pub static META: RuleMeta = RuleMeta {
    section: "41",
    subsection: None,
    name: "numeric_comma",
    standard_ref: "2024 Korean Braille Standard, Ch.5 Sec.11 Art.41",
    description: "Comma between digits/letters uses ⠂ (2) instead of standard comma",
};

/// Numeric comma braille code.
const NUMERIC_COMMA: u8 = 2; //
/// Plugin struct for the rule engine.
///
/// Handles comma encoding in numeric/English context.
/// Runs before generic punctuation (rule_49) to intercept commas.
pub struct Rule41;

impl BrailleRule for Rule41 {
    fn meta(&self) -> &'static RuleMeta {
        &META
    }

    fn phase(&self) -> Phase {
        Phase::CoreEncoding
    }

    fn priority(&self) -> u16 {
        400 // Before rule_49 (500) — intercept comma before generic punctuation
    }

    fn matches(&self, ctx: &RuleContext) -> bool {
        let CharType::Symbol(c) = ctx.char_type else {
            return false;
        };
        if *c != ',' {
            return false;
        }

        let (has_numeric_prefix, has_ascii_prefix) = scan_prefix(ctx.word_chars, ctx.index);
        let next_char = get_next_char(ctx);
        let next_is_digit = next_char.is_some_and(|ch| ch.is_ascii_digit());
        let next_is_ascii = next_char.is_some_and(|ch| ch.is_ascii_alphabetic());
        let next_is_alphanumeric = next_is_digit || next_is_ascii;

        // Comma between numbers, or between ASCII and alphanumeric
        ((ctx.state.is_number || has_numeric_prefix) && next_is_digit)
            || (has_ascii_prefix && next_is_alphanumeric)
    }

    fn apply(&self, ctx: &mut RuleContext) -> Result<RuleResult, String> {
        ctx.emit(NUMERIC_COMMA);
        Ok(RuleResult::Consumed)
    }
}

/// Scan backwards from index to find if preceded by a digit or ASCII letter.
fn scan_prefix(word_chars: &[char], index: usize) -> (bool, bool) {
    let mut has_numeric_prefix = false;
    let mut has_ascii_prefix = false;
    let mut j = index;
    while j > 0 {
        let prev = word_chars[j - 1];
        if prev.is_ascii_digit() {
            has_numeric_prefix = true;
            break;
        } else if prev.is_ascii_alphabetic() {
            has_ascii_prefix = true;
            break;
        } else if prev == ' ' {
            j -= 1;
        } else {
            break;
        }
    }
    (has_numeric_prefix, has_ascii_prefix)
}

/// Get the next character (within word or from next word).
fn get_next_char(ctx: &RuleContext) -> Option<char> {
    if ctx.index + 1 < ctx.word_chars.len() {
        Some(ctx.word_chars[ctx.index + 1])
    } else {
        ctx.remaining_words.first().and_then(|w| w.chars().next())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn scan_prefix_finds_digit() {
        let chars: Vec<char> = "1,000".chars().collect();
        let (num, ascii) = scan_prefix(&chars, 1);
        assert!(num);
        assert!(!ascii);
    }

    #[test]
    fn scan_prefix_finds_ascii() {
        let chars: Vec<char> = "A,B".chars().collect();
        let (num, ascii) = scan_prefix(&chars, 1);
        assert!(!num);
        assert!(ascii);
    }

    #[test]
    fn golden_test_alignment() {
        let cases = vec![
            ("1,000", "⠼⠁⠂⠚⠚⠚"), // comma between digits → ⠂
            ("0.48", "⠼⠚⠲⠙⠓"),   // period between digits (NOT this rule)
        ];
        for (input, expected) in cases {
            let result = crate::encode_to_unicode(input).unwrap();
            assert_eq!(
                result, expected,
                "Rule 41 golden test failed for: {}",
                input
            );
        }
    }

    #[test]
    fn meta_is_correct() {
        assert_eq!(META.section, "41");
        assert_eq!(META.name, "numeric_comma");
    }
}