use crate::{symbol_shortcut, utils};
pub(crate) fn should_skip_terminator_for_symbol(symbol: char) -> bool {
matches!(
symbol,
'.' | '?'
| '!'
| '…'
| '⋯'
| '"'
| '\''
| '”'
| '’'
| '」'
| '』'
| '〉'
| '》'
| '('
| ')'
| ']'
| '}'
| ','
| ':'
| ';'
| '―'
)
}
pub(crate) fn should_request_continuation(symbol: char) -> bool {
matches!(
symbol,
'.' | '?'
| '!'
| '…'
| '⋯'
| '"'
| '\''
| '”'
| '’'
| '」'
| '』'
| '〉'
| '》'
| ')'
| ']'
| '}'
| ','
| ':'
| ';'
| '―'
)
}
pub(crate) fn should_force_terminator_before_symbol(symbol: char) -> bool {
matches!(symbol, '/' | '~' | '∼')
}
pub(crate) fn is_english_symbol(symbol: char) -> bool {
symbol_shortcut::is_english_symbol_char(symbol)
}
pub(crate) fn requires_single_letter_continuation(letter: char) -> bool {
letter.is_ascii_alphabetic() && !matches!(letter.to_ascii_lowercase(), 'a' | 'i' | 'o')
}
fn is_ascii_letter_or_digit(ch: Option<char>) -> bool {
ch.is_some_and(|c| c.is_ascii_alphanumeric())
}
fn is_digital_notation_symbol(symbol: char) -> bool {
matches!(symbol, '/' | '@' | '#' | '.' | '_' | ':')
}
fn has_digital_notation_signature(word_chars: &[char]) -> bool {
let text: String = word_chars.iter().collect();
if text.contains("//") || text.contains('@') || text.contains('#') {
return true;
}
text.contains('_') && (text.contains('.') || text.contains('/') || text.contains(':'))
}
pub(crate) fn prev_ascii_letter_or_digit(word_chars: &[char], index: usize) -> bool {
let mut j = index;
while j > 0 {
let ch = word_chars[j - 1];
if ch.is_ascii_alphanumeric() {
return true;
}
if symbol_shortcut::is_english_symbol_char(ch) {
j -= 1;
continue;
}
break;
}
false
}
pub(crate) fn next_ascii_letter_or_digit(
word_chars: &[char],
index: usize,
remaining_words: &[&str],
) -> bool {
let mut j = index + 1;
while j < word_chars.len() {
let ch = word_chars[j];
if ch.is_ascii_alphanumeric() {
return true;
}
if symbol_shortcut::is_english_symbol_char(ch) {
j += 1;
continue;
}
return false;
}
for word in remaining_words {
for ch in word.chars() {
if ch.is_ascii_alphanumeric() {
return true;
}
if symbol_shortcut::is_english_symbol_char(ch) {
continue;
}
return false;
}
}
false
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn should_render_symbol_as_english(
english_indicator: bool,
is_english: bool,
parenthesis_stack: &[bool],
symbol: char,
word_chars: &[char],
index: usize,
remaining_words: &[&str],
) -> bool {
if !english_indicator {
return false;
}
let prev_char = if index > 0 {
Some(word_chars[index - 1])
} else {
None
};
let next_char = if index + 1 < word_chars.len() {
Some(word_chars[index + 1])
} else {
remaining_words.first().and_then(|w| w.chars().next())
};
match symbol {
'(' => is_ascii_letter_or_digit(next_char) && !prev_char.is_some_and(utils::is_korean_char),
')' => parenthesis_stack.last().copied().unwrap_or(false),
',' => {
if !is_english {
return false;
}
let prev_ascii = prev_ascii_letter_or_digit(word_chars, index);
let next_ascii = next_ascii_letter_or_digit(word_chars, index, remaining_words);
prev_ascii && next_ascii
}
'/' | '@' | '#' | '.' | '_' | ':' | '-' => {
let prev_ascii = prev_ascii_letter_or_digit(word_chars, index);
let next_ascii = next_ascii_letter_or_digit(word_chars, index, remaining_words);
(prev_ascii && next_ascii)
|| (symbol == '/' && prev_char == Some('/') && next_ascii)
|| (symbol == '/' && next_char == Some('/') && prev_ascii)
}
_ => false,
}
}
pub(crate) fn should_keep_english_mode_for_symbol(
symbol: char,
word_chars: &[char],
index: usize,
remaining_words: &[&str],
) -> bool {
if !is_digital_notation_symbol(symbol) || !has_digital_notation_signature(word_chars) {
return false;
}
should_render_symbol_as_english(true, true, &[], symbol, word_chars, index, remaining_words)
}
#[cfg(test)]
mod tests {
use super::*;
#[rstest::rstest]
#[case::lowercase_b_requires('b', true)]
#[case::lowercase_a_excluded('a', false)]
#[case::uppercase_excluded('A', false)]
fn requires_single_letter_continuation_distinguishes_letters(
#[case] ch: char,
#[case] expected: bool,
) {
assert_eq!(requires_single_letter_continuation(ch), expected);
}
#[test]
fn skip_and_force_terminator_sets_are_separate() {
for symbol in ['.', '?', '!', ')', ']', ','] {
assert!(should_skip_terminator_for_symbol(symbol));
}
for symbol in ['/', '~'] {
assert!(should_force_terminator_before_symbol(symbol));
assert!(!should_skip_terminator_for_symbol(symbol));
}
assert!(!should_force_terminator_before_symbol('-'));
assert!(should_request_continuation('.'));
assert!(!should_request_continuation('('));
}
#[rstest::rstest]
#[case('(', true)]
#[case(')', true)]
#[case(',', true)]
#[case('?', false)]
fn english_symbol_detection_matches_lookup_table(#[case] ch: char, #[case] expected: bool) {
assert_eq!(is_english_symbol(ch), expected);
}
#[rstest::rstest]
#[case::skip_english_symbol_to_ascii("A(,B", 2, true)]
#[case::korean_neighbor_blocks("가,", 1, false)]
fn prev_ascii_letter_or_digit_skips_english_symbols(
#[case] input: &str,
#[case] idx: usize,
#[case] expected: bool,
) {
let word: Vec<char> = input.chars().collect();
assert_eq!(prev_ascii_letter_or_digit(&word, idx), expected);
}
#[rstest::rstest]
#[case::contiguous_ascii("A,B", 1, &[], true)]
#[case::skip_english_symbol("A,(B", 1, &[], true)]
#[case::remaining_word_ascii("A,", 1, &["B"], true)]
#[case::hangul_following("A,가", 1, &[], false)]
#[case::remaining_word_with_symbol_then_ascii("A,", 1, &["(B"], true)]
#[case::remaining_word_only_symbols("A,", 1, &["()"], false)]
fn next_ascii_letter_or_digit_checks_future_ascii(
#[case] input: &str,
#[case] idx: usize,
#[case] remaining: &[&str],
#[case] expected: bool,
) {
let word: Vec<char> = input.chars().collect();
assert_eq!(next_ascii_letter_or_digit(&word, idx, remaining), expected);
}
#[test]
fn should_render_symbol_as_english_for_parentheses() {
let opener: Vec<char> = "(Hello".chars().collect();
assert!(should_render_symbol_as_english(
true,
false,
&[],
'(',
&opener,
0,
&[]
));
let korean_before: Vec<char> = "가(".chars().collect();
assert!(!should_render_symbol_as_english(
true,
false,
&[],
'(',
&korean_before,
1,
&["A"]
));
assert!(!should_render_symbol_as_english(
false,
false,
&[],
'(',
&opener,
0,
&[]
));
}
#[rstest::rstest]
#[case::stack_top_true(true, true)]
#[case::stack_top_false(false, false)]
fn should_render_symbol_as_english_for_closing_parenthesis(
#[case] stack_top: bool,
#[case] expected: bool,
) {
let closer: Vec<char> = ")".chars().collect();
assert_eq!(
should_render_symbol_as_english(true, true, &[stack_top], ')', &closer, 0, &[]),
expected,
);
}
#[rstest::rstest]
#[case::both_ascii_in_english_mode("A,B", true, true)]
#[case::not_in_english_mode("A,B", false, false)]
#[case::korean_neighbor("가,B", true, false)]
fn should_render_symbol_as_english_for_comma_requires_ascii_neighbors(
#[case] input: &str,
#[case] is_english: bool,
#[case] expected: bool,
) {
let word: Vec<char> = input.chars().collect();
assert_eq!(
should_render_symbol_as_english(true, is_english, &[], ',', &word, 1, &[]),
expected
);
}
#[rstest::rstest]
#[case::double_slash("http://example.com", true)]
#[case::at_sign("user@host", true)]
#[case::hash("tag#name", true)]
#[case::underscore_plus_dot("a_b.c", true)]
#[case::pure_underscore("a_b", false)]
fn digital_notation_signature_strong_markers(#[case] input: &str, #[case] expected: bool) {
let chars: Vec<char> = input.chars().collect();
assert_eq!(
super::has_digital_notation_signature(&chars),
expected,
"input={input:?}"
);
}
#[test]
fn should_keep_english_mode_for_symbol_passes_through() {
let chars: Vec<char> = "user@host.com".chars().collect();
let _ = super::should_keep_english_mode_for_symbol('@', &chars, 4, &[]);
}
}