use std::borrow::Cow;
use crate::rules::token::{Token, WordToken};
use crate::rules::token_rule::{TokenAction, TokenPhase, TokenRule};
use crate::unicode::decode_unicode;
pub struct EmphasisRingRule;
fn is_ring_mark(ch: char) -> bool {
matches!(ch, '\u{030A}' | '\u{0307}')
}
fn is_ring_mark_only(text: &str) -> bool {
!text.is_empty() && text.chars().all(is_ring_mark)
}
fn is_emphasis_word(text: &str) -> bool {
if !text.chars().any(is_ring_mark) {
return false;
}
text.chars().any(crate::utils::is_korean_char)
}
fn trim_ring_marks(text: &str) -> String {
text.chars().filter(|ch| !is_ring_mark(*ch)).collect()
}
impl TokenRule for EmphasisRingRule {
fn phase(&self) -> TokenPhase {
TokenPhase::Normalization
}
fn priority(&self) -> u16 {
120
}
fn apply<'a>(
&self,
tokens: &[Token<'a>],
index: usize,
_state: &mut crate::rules::context::EncoderState,
) -> Result<TokenAction<'a>, String> {
if let Some(Token::Word(word)) = tokens.get(index) {
return apply_word_arm(word);
}
if matches!(tokens.get(index), Some(Token::Space(_))) {
return Ok(apply_space_arm(tokens, index));
}
Ok(TokenAction::Noop)
}
}
fn apply_word_arm<'a>(word: &WordToken<'_>) -> Result<TokenAction<'a>, String> {
let text = word.text.as_ref();
if is_ring_mark_only(text) {
return Ok(TokenAction::ReplaceMany(vec![]));
}
if !is_emphasis_word(text) {
return Ok(TokenAction::Noop);
}
let trimmed = trim_ring_marks(text);
debug_assert!(!trimmed.is_empty());
let trimmed_chars: Vec<char> = trimmed.chars().collect();
let trimmed_meta = crate::rules::token::WordMeta::from_chars(&trimmed_chars);
let open = Token::PreEncoded(vec![decode_unicode('⠠'), decode_unicode('⠤')]);
let body = Token::Word(WordToken {
text: Cow::Owned(trimmed),
chars: trimmed_chars,
meta: trimmed_meta,
});
let close = Token::PreEncoded(vec![decode_unicode('⠤'), decode_unicode('⠄')]);
Ok(TokenAction::ReplaceMany(vec![open, body, close]))
}
fn apply_space_arm<'a>(tokens: &[Token<'a>], index: usize) -> TokenAction<'a> {
let prev = index.checked_sub(1).and_then(|i| tokens.get(i));
let next = tokens.get(index + 1);
let prev_word = prev.and_then(token_word_text);
let next_word = next.and_then(token_word_text);
let prev_is_emphasis_close = prev.is_some_and(is_emphasis_close_marker);
if prev_is_emphasis_close && next_word.is_some_and(|w| !is_ring_mark_only(w)) {
return TokenAction::ReplaceMany(vec![]);
}
if prev_word.is_some_and(is_ring_mark_only) || next_word.is_some_and(is_ring_mark_only) {
return TokenAction::ReplaceMany(vec![]);
}
if prev_word.is_some_and(|w| is_emphasis_word(w) || is_ring_mark_only(w))
&& next_word.is_some_and(|w| !is_ring_mark_only(w))
{
let close_marker = vec![decode_unicode('⠤'), decode_unicode('⠄')];
return TokenAction::Replace(Token::PreEncoded(close_marker));
}
TokenAction::Noop
}
fn token_word_text<'a>(tok: &'a Token<'_>) -> Option<&'a str> {
if let Token::Word(w) = tok {
Some(w.text.as_ref())
} else {
None
}
}
fn is_emphasis_close_marker(tok: &Token<'_>) -> bool {
let close = [decode_unicode('⠤'), decode_unicode('⠄')];
matches!(tok, Token::PreEncoded(bytes) if bytes.as_slice() == close.as_slice())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rules::context::EncoderState;
use crate::rules::token::{SpaceKind, WordMeta};
fn word(text: &str) -> Token<'_> {
let chars: Vec<char> = text.chars().collect();
let meta = WordMeta::from_chars(&chars);
Token::Word(WordToken {
text: Cow::Borrowed(text),
chars,
meta,
})
}
#[rstest::rstest]
#[case::korean_with_mark("훈민정음\u{030A}", true)]
#[case::latin_with_mark_only("Å", false)]
#[case::korean_without_mark("훈민정음", false)]
#[case::empty("", false)]
fn is_emphasis_word_table(#[case] text: &str, #[case] expected: bool) {
assert_eq!(is_emphasis_word(text), expected);
}
#[rstest::rstest]
#[case::single_ring("\u{030A}", true)]
#[case::single_dot_above("\u{0307}", true)]
#[case::ring_then_dot("\u{030A}\u{0307}", true)]
#[case::empty("", false)]
#[case::ascii_letter("a", false)]
#[case::ring_then_letter("\u{030A}a", false)]
fn is_ring_mark_only_table(#[case] text: &str, #[case] expected: bool) {
assert_eq!(is_ring_mark_only(text), expected);
}
#[test]
fn apply_word_emphasis_emits_triple() {
let tokens = vec![word("훈민정음\u{030A}")];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 0, &mut state).unwrap();
match action {
TokenAction::ReplaceMany(replacement) => {
assert_eq!(replacement.len(), 3);
}
_ => panic!("expected ReplaceMany(3 tokens)"),
}
}
#[test]
fn apply_word_pure_ring_marks_returns_empty_replace() {
assert_eq!(trim_ring_marks("\u{030A}\u{0307}"), "");
assert_eq!(trim_ring_marks("a\u{030A}b"), "ab");
}
#[test]
fn apply_space_between_emphasis_and_real_word_closes() {
let tokens = vec![
word("훈민정음\u{030A}"),
Token::Space(SpaceKind::Regular),
word("이다"),
];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 1, &mut state).unwrap();
match action {
TokenAction::Replace(Token::PreEncoded(bytes)) => {
assert_eq!(bytes.len(), 2);
}
_ => panic!("expected close-emphasis PreEncoded"),
}
}
#[test]
fn apply_space_adjacent_ring_mark_only_removes_spacing() {
let tokens = vec![
word("훈민정음"),
Token::Space(SpaceKind::Regular),
word("\u{030A}"),
];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 1, &mut state).unwrap();
assert!(matches!(action, TokenAction::ReplaceMany(_)));
}
#[test]
fn apply_non_word_non_space_falls_through() {
let tokens = vec![Token::PreEncoded(vec![1])];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 0, &mut state).unwrap();
assert!(matches!(action, TokenAction::Noop));
}
#[test]
fn apply_word_only_ring_marks_replaces_with_empty() {
let tokens = vec![word("\u{030A}\u{030A}")];
let mut state = EncoderState::new(false);
let _ = EmphasisRingRule.apply(&tokens, 0, &mut state).unwrap();
}
#[test]
fn apply_space_after_emphasis_close_marker() {
let close_marker = Token::PreEncoded(vec![
crate::unicode::decode_unicode('⠤'),
crate::unicode::decode_unicode('⠄'),
]);
let tokens = vec![close_marker, Token::Space(SpaceKind::Regular), word("이다")];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 1, &mut state).unwrap();
assert!(matches!(action, TokenAction::ReplaceMany(ts) if ts.is_empty()));
}
#[test]
fn apply_space_no_emphasis_neighbors_returns_noop() {
let tokens = vec![
word("hello"),
Token::Space(SpaceKind::Regular),
word("world"),
];
let mut state = EncoderState::new(false);
let action = EmphasisRingRule.apply(&tokens, 1, &mut state).unwrap();
assert!(matches!(action, TokenAction::Noop));
}
}