use std::env;
use std::fs;
use std::path::PathBuf;
use unicode_bidi::BidiClass;
use unicode_joining_type::{JoiningType, get_joining_type};
use unicode_normalization::char::canonical_combining_class;
use unicode_script::{Script, UnicodeScript};
const UNICODE_LIMIT: usize = 0x110000;
const PAGE_SIZE: usize = 256;
const PAGE_COUNT: usize = UNICODE_LIMIT / PAGE_SIZE;
const JOIN_NONE: u8 = 0;
const JOIN_RIGHT: u8 = 1;
const JOIN_LEFT: u8 = 2;
const JOIN_DUAL: u8 = 3;
const JOIN_TRANSPARENT: u8 = 4;
const SCRIPT_COMMON: u8 = 0;
const SCRIPT_LATIN: u8 = 1;
const SCRIPT_ARABIC: u8 = 2;
const SCRIPT_HEBREW: u8 = 3;
const SCRIPT_DEVANAGARI: u8 = 4;
const SCRIPT_OTHER: u8 = 5;
const BIDI_OTHER_NEUTRAL: u8 = 0;
const BIDI_L: u8 = 1;
const BIDI_R: u8 = 2;
const BIDI_AL: u8 = 3;
const BIDI_EN: u8 = 4;
const BIDI_AN: u8 = 5;
const BIDI_NSM: u8 = 6;
const BIDI_WS: u8 = 7;
fn build_sparse(values: &[u8], default: u8) -> (Vec<u16>, Vec<u8>) {
let mut unique_pages: Vec<Vec<u8>> = vec![vec![default; PAGE_SIZE]];
let mut page_index = vec![0u16; PAGE_COUNT];
let mut page = 0usize;
while page < PAGE_COUNT {
let start = page * PAGE_SIZE;
let chunk = &values[start..start + PAGE_SIZE];
let mut found = None;
let mut i = 0usize;
while i < unique_pages.len() {
if unique_pages[i].as_slice() == chunk {
found = Some(i as u16);
break;
}
i += 1;
}
let idx = if let Some(i) = found {
i
} else {
let i = unique_pages.len() as u16;
unique_pages.push(chunk.to_vec());
i
};
page_index[page] = idx;
page += 1;
}
let mut page_data = Vec::with_capacity(unique_pages.len() * PAGE_SIZE);
for page in unique_pages {
page_data.extend_from_slice(&page);
}
(page_index, page_data)
}
fn render_u16_array(name: &str, values: &[u16]) -> String {
let mut out = String::new();
out.push_str(&format!("pub(crate) static {name}: [u16; {}] = [\n", values.len()));
for chunk in values.chunks(32) {
out.push_str(" ");
for v in chunk {
out.push_str(&format!("{v},"));
}
out.push('\n');
}
out.push_str("];\n\n");
out
}
fn render_u8_array(name: &str, values: &[u8]) -> String {
let mut out = String::new();
out.push_str(&format!("pub(crate) static {name}: [u8; {}] = [\n", values.len()));
for chunk in values.chunks(64) {
out.push_str(" ");
for v in chunk {
out.push_str(&format!("{v},"));
}
out.push('\n');
}
out.push_str("];\n\n");
out
}
#[inline]
fn map_joining_class(ch: char) -> u8 {
match get_joining_type(ch) {
JoiningType::RightJoining => JOIN_RIGHT,
JoiningType::LeftJoining => JOIN_LEFT,
JoiningType::DualJoining | JoiningType::JoinCausing => JOIN_DUAL,
JoiningType::Transparent => JOIN_TRANSPARENT,
JoiningType::NonJoining => JOIN_NONE,
_ => JOIN_NONE,
}
}
#[inline]
fn map_script_class(ch: char) -> u8 {
match ch.script() {
Script::Latin => SCRIPT_LATIN,
Script::Arabic => SCRIPT_ARABIC,
Script::Hebrew => SCRIPT_HEBREW,
Script::Devanagari => SCRIPT_DEVANAGARI,
Script::Common | Script::Inherited | Script::Unknown => SCRIPT_COMMON,
_ => SCRIPT_OTHER,
}
}
#[inline]
fn map_bidi_class(ch: char) -> u8 {
match unicode_bidi::bidi_class(ch) {
BidiClass::L => BIDI_L,
BidiClass::R => BIDI_R,
BidiClass::AL => BIDI_AL,
BidiClass::EN => BIDI_EN,
BidiClass::AN => BIDI_AN,
BidiClass::NSM => BIDI_NSM,
BidiClass::WS | BidiClass::B | BidiClass::S => BIDI_WS,
_ => BIDI_OTHER_NEUTRAL,
}
}
fn main() {
println!("cargo:rerun-if-changed=build.rs");
let mut joining_values = vec![JOIN_NONE; UNICODE_LIMIT];
let mut combining_values = vec![0u8; UNICODE_LIMIT];
let mut script_values = vec![SCRIPT_COMMON; UNICODE_LIMIT];
let mut bidi_values = vec![BIDI_OTHER_NEUTRAL; UNICODE_LIMIT];
for cp in 0..UNICODE_LIMIT {
let Some(ch) = char::from_u32(cp as u32) else {
continue;
};
joining_values[cp] = map_joining_class(ch);
combining_values[cp] = canonical_combining_class(ch);
script_values[cp] = map_script_class(ch);
bidi_values[cp] = map_bidi_class(ch);
}
let (joining_page_index, joining_page_data) = build_sparse(&joining_values, JOIN_NONE);
let (combining_page_index, combining_page_data) = build_sparse(&combining_values, 0);
let (script_page_index, script_page_data) = build_sparse(&script_values, SCRIPT_COMMON);
let (bidi_page_index, bidi_page_data) = build_sparse(&bidi_values, BIDI_OTHER_NEUTRAL);
let mut out = String::new();
out.push_str("// @generated by build.rs\n");
out.push_str("pub(crate) const UNICODE_PAGE_SHIFT: u32 = 8;\n");
out.push_str("pub(crate) const UNICODE_PAGE_MASK: u32 = 0xFF;\n");
out.push_str("pub(crate) const JOIN_NONE: u8 = 0;\n");
out.push_str("pub(crate) const JOIN_RIGHT: u8 = 1;\n");
out.push_str("pub(crate) const JOIN_LEFT: u8 = 2;\n");
out.push_str("pub(crate) const JOIN_DUAL: u8 = 3;\n");
out.push_str("pub(crate) const JOIN_TRANSPARENT: u8 = 4;\n");
out.push_str("pub(crate) const SCRIPT_COMMON: u8 = 0;\n");
out.push_str("pub(crate) const SCRIPT_LATIN: u8 = 1;\n");
out.push_str("pub(crate) const SCRIPT_ARABIC: u8 = 2;\n");
out.push_str("pub(crate) const SCRIPT_HEBREW: u8 = 3;\n");
out.push_str("pub(crate) const SCRIPT_DEVANAGARI: u8 = 4;\n");
out.push_str("pub(crate) const SCRIPT_OTHER: u8 = 5;\n");
out.push_str("pub(crate) const BIDI_OTHER_NEUTRAL: u8 = 0;\n");
out.push_str("pub(crate) const BIDI_L: u8 = 1;\n");
out.push_str("pub(crate) const BIDI_R: u8 = 2;\n");
out.push_str("pub(crate) const BIDI_AL: u8 = 3;\n");
out.push_str("pub(crate) const BIDI_EN: u8 = 4;\n");
out.push_str("pub(crate) const BIDI_AN: u8 = 5;\n");
out.push_str("pub(crate) const BIDI_NSM: u8 = 6;\n");
out.push_str("pub(crate) const BIDI_WS: u8 = 7;\n\n");
out.push_str(&render_u16_array("JOINING_PAGE_INDEX", &joining_page_index));
out.push_str(&render_u8_array("JOINING_PAGE_DATA", &joining_page_data));
out.push_str(&render_u16_array("COMBINING_PAGE_INDEX", &combining_page_index));
out.push_str(&render_u8_array("COMBINING_PAGE_DATA", &combining_page_data));
out.push_str(&render_u16_array("SCRIPT_PAGE_INDEX", &script_page_index));
out.push_str(&render_u8_array("SCRIPT_PAGE_DATA", &script_page_data));
out.push_str(&render_u16_array("BIDI_PAGE_INDEX", &bidi_page_index));
out.push_str(&render_u8_array("BIDI_PAGE_DATA", &bidi_page_data));
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR"));
let out_path = out_dir.join("layout_unicode_tables.rs");
fs::write(out_path, out).expect("write generated unicode tables");
}