use crate::rules::token::Token;
fn neighbor_is_korean(tokens: &[Token<'_>], index: usize, dir: NeighborDir) -> bool {
let neighbor_idx = match dir {
NeighborDir::Prev => index.checked_sub(1),
NeighborDir::Next => Some(index + 1),
};
let Some(idx) = neighbor_idx else {
return false;
};
let direct_is_korean_word =
matches!(tokens.get(idx), Some(Token::Word(w)) if w.meta.has_korean);
if direct_is_korean_word {
return true;
}
let direct_is_space = matches!(tokens.get(idx), Some(Token::Space(_)));
if !direct_is_space {
return false;
}
let beyond_idx = match dir {
NeighborDir::Prev => idx.checked_sub(1),
NeighborDir::Next => Some(idx + 1),
};
beyond_idx
.and_then(|j| tokens.get(j))
.is_some_and(|t| matches!(t, Token::Word(w) if w.meta.has_korean))
}
#[derive(Clone, Copy)]
enum NeighborDir {
Prev,
Next,
}
pub(super) fn previous_content_needs_math_spacing(tokens: &[Token<'_>], index: usize) -> usize {
let Some(previous_index) = index.checked_sub(1) else {
return 0;
};
match tokens.get(previous_index) {
Some(Token::Space(_)) => {
let left = previous_index
.checked_sub(1)
.and_then(|left_index| tokens.get(left_index));
match left {
Some(Token::Word(word)) if word.meta.has_korean => 1,
_ => 0,
}
}
Some(Token::Word(_) | Token::PreEncoded(_) | Token::Fraction(_) | Token::Mode(_)) => 2,
None => 0,
}
}
pub(super) fn next_content_needs_math_spacing(tokens: &[Token<'_>], index: usize) -> usize {
match tokens.get(index + 1) {
Some(Token::Space(_)) => {
let right = tokens.get(index + 2);
match right {
Some(Token::Word(word)) if word.meta.has_korean => 1,
_ => 0,
}
}
Some(Token::Word(_) | Token::PreEncoded(_) | Token::Fraction(_) | Token::Mode(_)) => 2,
None => 0,
}
}
pub(crate) fn wrap_latex_math_tokens_with_inner<'a>(
tokens: &[Token<'a>],
index: usize,
bytes: Vec<u8>,
inner: &str,
) -> Vec<Token<'a>> {
let mut replacement = Vec::new();
let inner_is_simple_numeric = !inner.is_empty()
&& inner
.chars()
.all(|c| c.is_ascii_digit() || matches!(c, '-' | '+' | '\u{2212}' | '.' | ','));
let is_short_prose_letter = !inner.is_empty()
&& inner.chars().count() <= 2
&& inner.chars().all(|c| c.is_ascii_alphabetic());
let comma_separated_letter_list = !inner.is_empty()
&& inner.contains(',')
&& inner.split(',').map(str::trim).all(|part| {
!part.is_empty()
&& part.chars().count() == 1
&& part.chars().all(|c| c.is_ascii_alphabetic())
});
let in_korean_prose = if is_short_prose_letter || comma_separated_letter_list {
let prev_is_korean = neighbor_is_korean(tokens, index, NeighborDir::Prev);
let next_is_korean = neighbor_is_korean(tokens, index, NeighborDir::Next);
prev_is_korean || next_is_korean
} else {
false
};
let leading_spaces = if inner_is_simple_numeric || in_korean_prose {
0
} else {
previous_content_needs_math_spacing(tokens, index)
};
if leading_spaces > 0 {
replacement.push(Token::PreEncoded(vec![0; leading_spaces]));
}
if in_korean_prose && comma_separated_letter_list {
let letters: Vec<&str> = inner.split(',').map(str::trim).collect();
let mut wrapped = Vec::new();
for (i, letter) in letters.iter().enumerate() {
if let Some(c) = letter.chars().next() {
if i == 0 {
wrapped.push(52); } else {
wrapped.push(0); wrapped.push(48); }
if c.is_ascii_uppercase() {
wrapped.push(32); if let Ok(code) = crate::english::encode_english(c.to_ascii_lowercase()) {
wrapped.push(code);
}
} else if let Ok(code) = crate::english::encode_english(c) {
wrapped.push(code);
}
if i + 1 < letters.len() {
wrapped.push(16); } else {
wrapped.push(50); }
}
}
replacement.push(Token::PreEncoded(wrapped));
} else if in_korean_prose {
let mut wrapped = Vec::with_capacity(bytes.len() + 2);
wrapped.push(52);
wrapped.extend(bytes);
wrapped.push(50);
replacement.push(Token::PreEncoded(wrapped));
} else {
replacement.push(Token::PreEncoded(bytes));
let trailing_spaces = if inner_is_simple_numeric {
0
} else {
next_content_needs_math_spacing(tokens, index)
};
if trailing_spaces > 0 {
replacement.push(Token::PreEncoded(vec![0; trailing_spaces]));
}
}
replacement
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rules::token::{SpaceKind, WordMeta, WordToken};
use std::borrow::Cow;
fn word(text: &str) -> Token<'static> {
let chars: Vec<char> = text.chars().collect();
Token::Word(WordToken {
text: Cow::Owned(text.to_string()),
chars: chars.clone(),
meta: WordMeta::from_chars(&chars),
})
}
#[test]
fn neighbor_is_korean_direct_korean_word() {
let tokens = vec![word("한국"), Token::PreEncoded(vec![])];
assert!(neighbor_is_korean(&tokens, 1, NeighborDir::Prev));
let tokens = vec![Token::PreEncoded(vec![]), word("한국")];
assert!(neighbor_is_korean(&tokens, 0, NeighborDir::Next));
}
#[test]
fn neighbor_is_korean_direct_english_word() {
let tokens = vec![word("hello"), Token::PreEncoded(vec![])];
assert!(!neighbor_is_korean(&tokens, 1, NeighborDir::Prev));
}
#[test]
fn neighbor_is_korean_space_then_korean() {
let tokens = vec![
word("한국"),
Token::Space(SpaceKind::Regular),
Token::PreEncoded(vec![]),
];
assert!(neighbor_is_korean(&tokens, 2, NeighborDir::Prev));
}
#[test]
fn neighbor_is_korean_no_neighbour() {
let tokens = vec![Token::PreEncoded(vec![])];
assert!(!neighbor_is_korean(&tokens, 0, NeighborDir::Prev));
}
#[rstest::rstest]
#[case::prev_oob(true, 2)]
#[case::next_oob(false, 2)]
fn content_needs_math_spacing_none_arm_returns_zero(
#[case] is_prev: bool,
#[case] index: usize,
) {
let tokens: Vec<Token<'_>> = vec![Token::PreEncoded(vec![])];
let result = if is_prev {
previous_content_needs_math_spacing(&tokens, index)
} else {
next_content_needs_math_spacing(&tokens, index)
};
assert_eq!(result, 0);
}
#[test]
fn next_content_needs_math_spacing_word_or_preencoded_returns_2() {
let tokens: Vec<Token<'_>> = vec![Token::PreEncoded(vec![]), Token::PreEncoded(vec![])];
assert_eq!(next_content_needs_math_spacing(&tokens, 0), 2);
}
}