use crate::unicode_constants::{combining_marks, format_chars, halfwidth_fullwidth, prepend_chars};
#[derive(Debug, Clone)]
pub struct CharacterInfo {
pub code_point: u32,
pub is_combining: bool,
pub is_format: bool,
pub is_prepend: bool,
}
impl CharacterInfo {
pub fn new(ch: char) -> Self {
let code_point = ch as u32;
Self {
code_point,
is_combining: Self::is_combining_mark(code_point),
is_format: Self::is_format_character(code_point),
is_prepend: Self::is_prepend_character(code_point),
}
}
fn is_combining_mark(code: u32) -> bool {
combining_marks::DIACRITICAL.contains(&code)
|| combining_marks::DIACRITICAL_EXTENDED.contains(&code)
|| combining_marks::DIACRITICAL_SUPPLEMENT.contains(&code)
|| combining_marks::DIACRITICAL_SYMBOLS.contains(&code)
|| combining_marks::HALF_MARKS.contains(&code)
}
fn is_format_character(code: u32) -> bool {
matches!(code,
format_chars::SOFT_HYPHEN |
format_chars::ZERO_WIDTH_SPACE..=format_chars::RIGHT_TO_LEFT_MARK |
0x202A..=0x202E | 0x2060..=0x206F | format_chars::ZERO_WIDTH_NO_BREAK_SPACE |
0xFFF9..=0xFFFB )
}
fn is_prepend_character(code: u32) -> bool {
prepend_chars::ARABIC_PREPEND.contains(&code)
|| prepend_chars::ARABIC_DIACRITICS.contains(&code)
|| prepend_chars::ARABIC_SUPPLEMENT.contains(&code)
}
}
pub fn is_halfwidth_fullwidth(ch: char) -> bool {
halfwidth_fullwidth::BLOCK.contains(&(ch as u32))
}
pub fn find_main_character_optimized(segment: &str) -> Option<CharacterInfo> {
segment
.chars()
.map(CharacterInfo::new)
.find(|info| !info.is_prepend && !info.is_combining && !info.is_format)
.or_else(|| segment.chars().map(CharacterInfo::new).next())
}