#[derive(Debug, Clone)]
pub struct Script {
pub name: &'static str,
pub blocks: &'static [[u32; 2]],
}
pub const SCRIPT_DATA: &[Script] = &[
Script {
name: "latin",
blocks: &[
[0x0100, 0x024f], [0x0300, 0x036f], ],
},
Script {
name: "cyrillic",
blocks: &[[0x0400, 0x04ff]],
},
Script {
name: "armenian",
blocks: &[[0x0530, 0x058F]],
},
Script {
name: "brahmic",
blocks: &[[0x0900, 0x109F]],
},
Script {
name: "georgian",
blocks: &[[0x10A0, 0x10ff]],
},
Script {
name: "cjk",
blocks: &[
[0x3000, 0x30FF], [0x4E00, 0x9FAF], [0xFF00, 0xFF60],
],
},
Script {
name: "hangul",
blocks: &[[0xAC00, 0xD7AF]],
},
];
#[must_use]
pub const fn all_blocks_len() -> usize {
let mut len = 0;
let mut i = 0;
while i < SCRIPT_DATA.len() {
let mut j = 0;
while j < SCRIPT_DATA[i].blocks.len() {
len += 2;
j += 1;
}
i += 1;
}
len
}
#[must_use]
pub const fn build_all_blocks() -> [u32; all_blocks_len()] {
let mut arr = [0u32; all_blocks_len()];
let mut k = 0;
let mut i = 0;
while i < SCRIPT_DATA.len() {
let mut j = 0;
while j < SCRIPT_DATA[i].blocks.len() {
arr[k] = SCRIPT_DATA[i].blocks[j][0];
arr[k + 1] = SCRIPT_DATA[i].blocks[j][1];
k += 2;
j += 1;
}
i += 1;
}
arr
}
pub const ALL_BLOCKS: [u32; all_blocks_len()] = build_all_blocks();
#[must_use]
pub fn script_from_codepoint(codepoint: u32) -> Option<&'static str> {
for script in SCRIPT_DATA {
for block in script.blocks {
if codepoint >= block[0] && codepoint <= block[1] {
return Some(script.name);
}
}
}
None
}
#[must_use]
pub const fn supported_codepoint(codepoint: u32) -> bool {
let blocks = ALL_BLOCKS;
let mut i = 0;
while i < blocks.len() {
if codepoint >= blocks[i] && codepoint <= blocks[i + 1] {
return true;
}
i += 2;
}
false
}
#[must_use]
pub fn script_from_char(ch: char) -> Option<&'static str> {
script_from_codepoint(ch as u32)
}
#[must_use]
pub const fn supported_char(ch: char) -> bool {
supported_codepoint(ch as u32)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_latin_characters() {
assert_eq!(script_from_codepoint(0x0100), Some("latin")); assert_eq!(script_from_codepoint(0x024f), Some("latin"));
assert_eq!(script_from_codepoint(0x0300), Some("latin")); assert_eq!(script_from_codepoint(0x036f), Some("latin"));
assert!(supported_codepoint(0x0100));
assert!(supported_codepoint(0x0300));
}
#[test]
fn test_cyrillic_characters() {
assert_eq!(script_from_codepoint(0x0400), Some("cyrillic")); assert_eq!(script_from_codepoint(0x04ff), Some("cyrillic")); assert!(supported_codepoint(0x0400));
}
#[test]
fn test_armenian_characters() {
assert_eq!(script_from_codepoint(0x0530), Some("armenian")); assert_eq!(script_from_codepoint(0x058F), Some("armenian")); assert!(supported_codepoint(0x0530));
}
#[test]
fn test_brahmic_characters() {
assert_eq!(script_from_codepoint(0x0900), Some("brahmic")); assert_eq!(script_from_codepoint(0x109F), Some("brahmic")); assert!(supported_codepoint(0x0900));
}
#[test]
fn test_georgian_characters() {
assert_eq!(script_from_codepoint(0x10A0), Some("georgian")); assert_eq!(script_from_codepoint(0x10ff), Some("georgian")); assert!(supported_codepoint(0x10A0));
}
#[test]
fn test_cjk_characters() {
assert_eq!(script_from_codepoint(0x3000), Some("cjk")); assert_eq!(script_from_codepoint(0x30FF), Some("cjk")); assert_eq!(script_from_codepoint(0x4E00), Some("cjk")); assert_eq!(script_from_codepoint(0x9FAF), Some("cjk")); assert_eq!(script_from_codepoint(0xFF00), Some("cjk")); assert_eq!(script_from_codepoint(0xFF60), Some("cjk"));
assert!(supported_codepoint(0x3000));
assert!(supported_codepoint(0x4E00));
assert!(supported_codepoint(0xFF00));
}
#[test]
fn test_hangul_characters() {
assert_eq!(script_from_codepoint(0xAC00), Some("hangul")); assert_eq!(script_from_codepoint(0xD7AF), Some("hangul")); assert!(supported_codepoint(0xAC00));
}
#[test]
fn test_unsupported_characters() {
assert_eq!(script_from_codepoint(0x0041), None); assert!(!supported_codepoint(0x0041));
assert_eq!(script_from_codepoint(0x2000), None); assert!(!supported_codepoint(0x2000));
}
#[test]
fn test_char_helpers() {
assert_eq!(script_from_char('\u{100}'), Some("latin")); assert!(supported_char('\u{100}'));
assert_eq!(script_from_char('\u{410}'), Some("cyrillic")); assert!(supported_char('\u{410}'));
assert_eq!(script_from_char('A'), None); assert!(!supported_char('A'));
}
#[test]
fn test_boundary_conditions() {
assert!(supported_codepoint(0x0100)); assert!(supported_codepoint(0x024f)); assert!(!supported_codepoint(0x00FF)); assert!(!supported_codepoint(0x0250)); }
}