1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//! 제44항 [다만] — 숫자와 혼동되는 'ㄴ, ㄷ, ㅁ, ㅋ, ㅌ, ㅍ, ㅎ'의 첫소리 글자와
//! '운'의 약자는 숫자 뒤에 붙어 나오더라도 숫자와 한글을 띄어 쓴다.
//!
//! When a Korean syllable starting with a "confusable" choseong (ㄴ,ㄷ,ㅁ,ㅋ,ㅌ,ㅍ,ㅎ)
//! or the syllable '운' follows a number, insert a space to prevent confusion.
//!
//! Reference: 2024 Korean Braille Standard, Chapter 5, Section 11, Article 44 [다만]
use crate::char_struct::CharType;
use crate::rules::RuleMeta;
use crate::rules::context::RuleContext;
use crate::rules::traits::{BrailleRule, Phase, RuleResult};
pub static META: RuleMeta = RuleMeta {
section: "44",
subsection: Some("b1"),
name: "number_korean_spacing",
standard_ref: "2024 Korean Braille Standard, Ch.5 Sec.11 Art.44 [다만]",
description: "Insert space between number and confusable Korean choseong",
};
/// Choseong characters that could be confused with digit braille patterns.
const CONFUSABLE_CHOSEONG: [char; 7] = ['ㄴ', 'ㄷ', 'ㅁ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'];
/// Plugin struct for the rule engine.
///
/// Inserts a space (code 0) before Korean syllables with confusable choseong
/// when preceded by a number sequence. Runs in CoreEncoding at high priority
/// to insert the space BEFORE the Korean character is encoded.
pub struct Rule44;
impl BrailleRule for Rule44 {
fn meta(&self) -> &'static RuleMeta {
&META
}
fn phase(&self) -> Phase {
Phase::CoreEncoding
}
fn priority(&self) -> u16 {
50 // Very high — inserts space before any encoding of the Korean char
}
fn matches(&self, ctx: &RuleContext) -> bool {
if !ctx.state.is_number {
return false;
}
let CharType::Korean(korean) = ctx.char_type else {
return false;
};
CONFUSABLE_CHOSEONG.contains(&korean.cho) || ctx.current_char() == '운'
}
fn apply(&self, ctx: &mut RuleContext) -> Result<RuleResult, String> {
// 한글 바로 앞 문자가 가운뎃점(`·`)인 경우에만 부착 분리자 ⠈(8)을 쓰고,
// 그 외 (가운뎃점 열거 내부라도 한글이 숫자 다음에 나오는 경우 등)에는
// 통상의 공백 ⠀(0)으로 분리한다.
// 근거: 제44항 [다만] — 숫자와 혼동되는 한글은 띄어 쓴다. (제50항 가운뎃점
// 열거의 부착 분리자는 `·` 바로 뒤에 한글이 붙은 형태에만 적용)
let middle_dot_adjacent = ctx.prev_char() == Some('·');
if middle_dot_adjacent {
ctx.emit(8); // Attached separator
} else {
ctx.emit(0); // Space separator
}
Ok(RuleResult::Continue) // Continue to Korean encoding rules
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn identifies_confusable_choseong() {
for &cho in &CONFUSABLE_CHOSEONG {
assert!(
CONFUSABLE_CHOSEONG.contains(&cho),
"Missing confusable: {}",
cho
);
}
}
#[test]
fn golden_test_alignment() {
// "5운6기" → ⠼⠑ + space + 운 + ⠼⠋ + 기
let result = crate::encode_to_unicode("5운6기").unwrap();
assert_eq!(result, "⠼⠑⠀⠛⠼⠋⠈⠕");
}
#[test]
fn meta_is_correct() {
assert_eq!(META.section, "44");
assert_eq!(META.name, "number_korean_spacing");
}
/// 제44항 [다만] — 가운뎃점(·) 바로 뒤에 confusable 한글이 오면 부착 분리자
/// ⠈(8)을 emit한다 (line 62-63). 가운뎃점 열거 컨텍스트.
#[test]
fn rule44_apply_emits_attached_separator_after_middle_dot() {
// "·" + "ㅎ어" pattern — confusable choseong ㅎ after middle dot
let word: Vec<char> = "·하".chars().collect();
let ct = CharType::new(word[1]).unwrap();
let mut skip = 0usize;
let mut state = crate::rules::context::EncoderState::new(false);
state.is_number = true;
let mut out = Vec::new();
let mut ctx = RuleContext {
word_chars: &word,
index: 1,
char_type: &ct,
prev_word: "",
remaining_words: &[],
has_korean_char: true,
is_all_uppercase: false,
ascii_starts_at_beginning: false,
skip_count: &mut skip,
state: &mut state,
result: &mut out,
};
let outcome = Rule44.apply(&mut ctx).unwrap();
assert!(matches!(outcome, RuleResult::Continue));
assert_eq!(out, vec![8]); // attached separator
}
}