use crate::char_struct::CharType;
use crate::english;
use crate::rule_en::{rule_en_10_4, rule_en_10_5_whole_word, rule_en_10_6, rule_en_multi_cell};
use crate::rules::RuleMeta;
use crate::rules::context::RuleContext;
use crate::rules::traits::{BrailleRule, Phase, RuleResult};
pub static META: RuleMeta = RuleMeta {
section: "28",
subsection: None,
name: "english_encoding",
standard_ref: "2024 Korean Braille Standard, Ch.4 Sec.10 Art.28",
description: "English letters encoded per UEB (Unified English Braille)",
};
pub const UPPERCASE_SINGLE: u8 = 32;
#[cfg(test)]
fn apply(ch: char) -> Result<u8, String> {
english::encode_english(ch)
}
#[cfg(test)]
fn uppercase_indicators(
is_single_uppercase: bool,
is_word_all_uppercase: bool,
consecutive_uppercase_words: u8,
) -> &'static [u8] {
if consecutive_uppercase_words >= 3 {
&[32, 32, 32] } else if is_word_all_uppercase {
&[32, 32] } else if is_single_uppercase {
&[32] } else {
&[]
}
}
pub struct Rule28;
impl BrailleRule for Rule28 {
fn meta(&self) -> &'static RuleMeta {
&META
}
fn phase(&self) -> Phase {
Phase::CoreEncoding
}
fn matches(&self, ctx: &RuleContext) -> bool {
matches!(ctx.char_type, CharType::English(_))
}
fn apply(&self, ctx: &mut RuleContext) -> Result<RuleResult, String> {
let CharType::English(c) = ctx.char_type else {
return Ok(RuleResult::Skip);
};
if ctx.state.english_indicator
&& !ctx.state.is_english
&& !ctx.state.english_dominant_no_indicator
{
if ctx.state.needs_english_continuation {
ctx.emit(48);
} else {
ctx.emit(52);
}
}
if (!ctx.is_all_uppercase || ctx.word_len() < 2 || !ctx.ascii_starts_at_beginning)
&& !ctx.state.is_big_english
&& c.is_uppercase()
{
ctx.state.is_big_english = true;
for idx in 0..std::cmp::min(ctx.word_len() - ctx.index, 2) {
if ctx.word_chars[ctx.index + idx].is_uppercase() {
ctx.emit(UPPERCASE_SINGLE);
} else {
break;
}
}
}
let remaining: String = ctx.word_chars[ctx.index..]
.iter()
.map(|c| c.to_ascii_lowercase())
.collect();
let is_whole_lowercase_word =
ctx.index == 0 && ctx.word_chars.iter().all(|ch| ch.is_ascii_lowercase());
let be_boundary_non_alpha = remaining.starts_with("be")
&& remaining
.chars()
.nth(2)
.is_none_or(|ch| !ch.is_ascii_alphabetic());
let in_boundary_non_alpha = remaining.starts_with("in")
&& remaining
.chars()
.nth(2)
.is_none_or(|ch| !ch.is_ascii_alphabetic());
let prev_is_ascii_word =
!ctx.prev_word.is_empty() && ctx.prev_word.chars().all(|ch| ch.is_ascii_alphabetic());
let next_is_ascii_word = ctx
.remaining_words
.first()
.is_some_and(|w| !w.is_empty() && w.chars().all(|ch| ch.is_ascii_alphabetic()));
if is_whole_lowercase_word && remaining == "you" && prev_is_ascii_word && next_is_ascii_word
{
ctx.emit(english::encode_english('y')?);
*ctx.skip_count = ctx.word_len().saturating_sub(1);
ctx.state.is_english = true;
ctx.state.needs_english_continuation = false;
return Ok(RuleResult::Consumed);
}
let is_title_case_word = ctx.index == 0
&& !ctx.is_all_uppercase
&& ctx
.word_chars
.first()
.is_some_and(|ch| ch.is_ascii_uppercase())
&& ctx
.word_chars
.iter()
.skip(1)
.all(|ch| ch.is_ascii_lowercase())
&& ctx.word_chars.len() >= 2;
if ctx.index == 0
&& !ctx.is_all_uppercase
&& (is_whole_lowercase_word || is_title_case_word)
&& let Some(cells) = rule_en_10_5_whole_word(&remaining)
{
ctx.emit_slice(cells);
*ctx.skip_count = ctx.word_len().saturating_sub(1);
ctx.state.is_english = true;
ctx.state.needs_english_continuation = false;
return Ok(RuleResult::Consumed);
}
let wrap_active = ctx.state.english_dominant_wrap_active;
let allow_10_6 = !(ctx.is_all_uppercase
|| (!wrap_active && be_boundary_non_alpha)
|| (!wrap_active && in_boundary_non_alpha)
|| (!wrap_active
&& is_whole_lowercase_word
&& matches!(remaining.as_str(), "be" | "in")));
let allow_10_4_entry = !(ctx.is_all_uppercase
|| (!wrap_active && in_boundary_non_alpha)
|| (!wrap_active && is_whole_lowercase_word && remaining == "in"));
let allow_10_4_cont = !((!wrap_active && in_boundary_non_alpha)
|| (!wrap_active && is_whole_lowercase_word && remaining == "in"));
let at_entry = !ctx.state.is_english || ctx.index == 0;
let try_10_6_entry = at_entry && allow_10_6;
let try_10_6_middle = !at_entry && wrap_active && allow_10_6;
let try_10_4 = if at_entry {
allow_10_4_entry
} else {
allow_10_4_cont
};
let try_multi_cell = true;
if try_10_6_entry && let Some((code, len)) = rule_en_10_6(&remaining) {
ctx.emit(code);
*ctx.skip_count = len;
} else if try_10_4 && let Some((code, len)) = rule_en_10_4(&remaining) {
ctx.emit(code);
*ctx.skip_count = len;
} else if try_multi_cell && let Some((cells, len)) = rule_en_multi_cell(&remaining) {
ctx.emit_slice(cells);
*ctx.skip_count = len;
} else if try_10_6_middle && let Some((code, len)) = rule_en_10_6(&remaining) {
ctx.emit(code);
*ctx.skip_count = len;
} else {
ctx.emit(english::encode_english(*c)?);
}
ctx.state.is_english = true;
ctx.state.needs_english_continuation = false;
Ok(RuleResult::Consumed)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::unicode::decode_unicode;
#[rstest::rstest]
#[case::lower_a('a', '⠁')]
#[case::lower_z('z', '⠵')]
#[case::upper_a_as_lowercase('A', '⠁')]
fn encodes_english_letters(#[case] ch: char, #[case] expected: char) {
assert_eq!(apply(ch).unwrap(), decode_unicode(expected));
}
#[rstest::rstest]
#[case::digit('1')]
#[case::syllable('가')]
fn invalid_returns_error(#[case] ch: char) {
assert!(apply(ch).is_err());
}
#[rstest::rstest]
#[case::single_letter(true, false, 0, &[32u8] as &[u8])]
#[case::word_two_letters(false, true, 0, &[32, 32])]
#[case::passage_run(false, true, 3, &[32, 32, 32])]
#[case::no_indicator_lower(false, false, 0, &[] as &[u8])]
fn uppercase_indicator_paths(
#[case] single: bool,
#[case] is_word: bool,
#[case] run: u8,
#[case] expected: &[u8],
) {
assert_eq!(uppercase_indicators(single, is_word, run), expected);
}
#[test]
fn apply_skips_non_korean() {
let mut owned = crate::test_helpers::CtxOwned::for_text("A", false);
let mut ctx = owned.ctx_at(0);
let _ = Rule28.apply(&mut ctx).unwrap();
}
#[test]
fn rule28_multi_cell_via_pyeongchang() {
let _ = crate::encode("pyeongchang 2018");
}
#[test]
fn rule28_multi_cell_word_middle_direct() {
use crate::char_struct::CharType;
let word: Vec<char> = "along".chars().collect();
let ct = CharType::English('o');
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
state.is_english = true;
let mut out = Vec::new();
let mut ctx = crate::rules::context::RuleContext {
word_chars: &word,
index: 2, char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: false,
is_all_uppercase: false,
ascii_starts_at_beginning: true,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let outcome = Rule28.apply(&mut ctx).unwrap();
let _ = outcome;
}
#[test]
fn rule28_apply_skip_for_non_english_ctx() {
let mut owned = crate::test_helpers::CtxOwned::for_text("가", false);
let mut ctx = owned.ctx_at(0);
let outcome = Rule28.apply(&mut ctx).unwrap();
assert!(matches!(outcome, RuleResult::Skip));
}
}