#[rustfmt::skip]
pub const NON_ASCII_WHITESPACE_RANGES: &[(u32, u32)] = &[
(0x0085, 0x0085),
(0x00A0, 0x00A0),
(0x1680, 0x1680),
(0x2000, 0x200A),
(0x2028, 0x2028),
(0x2029, 0x2029),
(0x202F, 0x202F),
(0x205F, 0x205F),
(0x3000, 0x3000),
];
pub fn non_ascii_whitespace_ranges() -> &'static [(u32, u32)] {
NON_ASCII_WHITESPACE_RANGES
}
pub fn encode_ranges_bytes(table: &[(u32, u32)]) -> Vec<u8> {
super::encode_u32_pair_table(table)
}
pub fn encoded_ranges_size(table: &[(u32, u32)]) -> usize {
super::encoded_u32_pair_table_size(table.len())
}
pub fn is_unicode_whitespace(cp: u32) -> bool {
if (0x09..=0x0D).contains(&cp) || cp == 0x20 {
return true;
}
super::cp_in_ranges(cp, NON_ASCII_WHITESPACE_RANGES)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ranges_sorted_non_overlapping() {
for win in NON_ASCII_WHITESPACE_RANGES.windows(2) {
let (_, prev_end) = win[0];
let (next_start, next_end) = win[1];
assert!(prev_end < next_start);
assert!(next_start <= next_end);
}
}
#[test]
fn ascii_whitespace_detected() {
for cp in [0x09u32, 0x0A, 0x0B, 0x0C, 0x0D, 0x20] {
assert!(is_unicode_whitespace(cp));
}
}
#[test]
fn non_ascii_whitespace_detected() {
for cp in [
0x00A0u32, 0x1680, 0x2000, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000,
] {
assert!(is_unicode_whitespace(cp), "cp {cp:#x} should be whitespace");
}
}
#[test]
fn letters_not_whitespace() {
for cp in [b'a' as u32, b'Z' as u32, 0x00E9, 0x4F60, 0x1F30D] {
assert!(
!is_unicode_whitespace(cp),
"cp {cp:#x} must not be whitespace"
);
}
}
#[test]
fn matches_rust_char_is_whitespace() {
for cp in 0u32..=0xFFFF {
if let Some(ch) = char::from_u32(cp) {
assert_eq!(
is_unicode_whitespace(cp),
ch.is_whitespace(),
"mismatch at cp {cp:#x}"
);
}
}
}
}