use std::sync::OnceLock;
use unicode_width::UnicodeWidthChar;
const BMP_LEN: usize = 0x1_0000;
const SENTINEL_NONE: u8 = 0xFF;
static BMP_TABLE: OnceLock<Box<[u8]>> = OnceLock::new();
#[inline]
fn bmp_table() -> &'static [u8] {
BMP_TABLE.get_or_init(build_bmp_table)
}
fn build_bmp_table() -> Box<[u8]> {
let mut table = vec![SENTINEL_NONE; BMP_LEN].into_boxed_slice();
for cp in 0..BMP_LEN as u32 {
if let Some(c) = char::from_u32(cp) {
if let Some(w) = UnicodeWidthChar::width(c) {
table[cp as usize] = w as u8;
}
}
}
table
}
#[inline]
pub fn codepoint_width(cp: u32) -> Option<u8> {
if cp < BMP_LEN as u32 {
let w = bmp_table()[cp as usize];
if w == SENTINEL_NONE {
None
} else {
Some(w)
}
} else {
let c = char::from_u32(cp)?;
UnicodeWidthChar::width(c).map(|w| w as u8)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ascii_printable_is_one() {
for cp in 0x20u32..=0x7E {
assert_eq!(codepoint_width(cp), Some(1), "cp = U+{cp:04X}");
}
}
#[test]
fn ascii_control_matches_unicode_width() {
for cp in [0x00u32, 0x1B, 0x7F] {
let scalar = char::from_u32(cp)
.and_then(UnicodeWidthChar::width)
.map(|w| w as u8);
assert_eq!(codepoint_width(cp), scalar, "cp = U+{cp:04X}");
}
}
#[test]
fn cjk_ideograph_is_wide() {
assert_eq!(codepoint_width(0x4E2D), Some(2)); assert_eq!(codepoint_width(0x65E5), Some(2)); }
#[test]
fn vs15_vs16_zero_width() {
assert_eq!(codepoint_width(0xFE0E), Some(0));
assert_eq!(codepoint_width(0xFE0F), Some(0));
}
#[test]
fn supplementary_plane_emoji_wide() {
assert_eq!(codepoint_width(0x1F389), Some(2));
}
#[test]
fn surrogate_is_none() {
assert_eq!(codepoint_width(0xD800), None);
assert_eq!(codepoint_width(0xDFFF), None);
}
#[test]
fn invalid_codepoint_is_none() {
assert_eq!(codepoint_width(0x11_0000), None);
}
#[test]
fn matches_unicode_width_crate_for_bmp_sample() {
for cp in (0x20u32..0xFFFF).step_by(7) {
let table = codepoint_width(cp);
let scalar = char::from_u32(cp)
.and_then(UnicodeWidthChar::width)
.map(|w| w as u8);
assert_eq!(table, scalar, "cp = U+{cp:04X}");
}
}
}