lumentype 0.0.3

The fastest feature-complete no-std fast font parser, rasterizer, and blitter. LTR/RTL, Color Emoji, and advanced layout features.
Documentation
use std::env;
use std::fs;
use std::path::PathBuf;

use unicode_bidi::BidiClass;
use unicode_joining_type::{JoiningType, get_joining_type};
use unicode_normalization::char::canonical_combining_class;
use unicode_script::{Script, UnicodeScript};

const UNICODE_LIMIT: usize = 0x110000;
const PAGE_SIZE: usize = 256;
const PAGE_COUNT: usize = UNICODE_LIMIT / PAGE_SIZE;

const JOIN_NONE: u8 = 0;
const JOIN_RIGHT: u8 = 1;
const JOIN_LEFT: u8 = 2;
const JOIN_DUAL: u8 = 3;
const JOIN_TRANSPARENT: u8 = 4;

const SCRIPT_COMMON: u8 = 0;
const SCRIPT_LATIN: u8 = 1;
const SCRIPT_ARABIC: u8 = 2;
const SCRIPT_HEBREW: u8 = 3;
const SCRIPT_DEVANAGARI: u8 = 4;
const SCRIPT_OTHER: u8 = 5;

const BIDI_OTHER_NEUTRAL: u8 = 0;
const BIDI_L: u8 = 1;
const BIDI_R: u8 = 2;
const BIDI_AL: u8 = 3;
const BIDI_EN: u8 = 4;
const BIDI_AN: u8 = 5;
const BIDI_NSM: u8 = 6;
const BIDI_WS: u8 = 7;

fn build_sparse(values: &[u8], default: u8) -> (Vec<u16>, Vec<u8>) {
    let mut unique_pages: Vec<Vec<u8>> = vec![vec![default; PAGE_SIZE]];
    let mut page_index = vec![0u16; PAGE_COUNT];

    let mut page = 0usize;
    while page < PAGE_COUNT {
        let start = page * PAGE_SIZE;
        let chunk = &values[start..start + PAGE_SIZE];
        let mut found = None;

        let mut i = 0usize;
        while i < unique_pages.len() {
            if unique_pages[i].as_slice() == chunk {
                found = Some(i as u16);
                break;
            }
            i += 1;
        }

        let idx = if let Some(i) = found {
            i
        } else {
            let i = unique_pages.len() as u16;
            unique_pages.push(chunk.to_vec());
            i
        };
        page_index[page] = idx;
        page += 1;
    }

    let mut page_data = Vec::with_capacity(unique_pages.len() * PAGE_SIZE);
    for page in unique_pages {
        page_data.extend_from_slice(&page);
    }

    (page_index, page_data)
}

fn render_u16_array(name: &str, values: &[u16]) -> String {
    let mut out = String::new();
    out.push_str(&format!("pub(crate) static {name}: [u16; {}] = [\n", values.len()));
    for chunk in values.chunks(32) {
        out.push_str("    ");
        for v in chunk {
            out.push_str(&format!("{v},"));
        }
        out.push('\n');
    }
    out.push_str("];\n\n");
    out
}

fn render_u8_array(name: &str, values: &[u8]) -> String {
    let mut out = String::new();
    out.push_str(&format!("pub(crate) static {name}: [u8; {}] = [\n", values.len()));
    for chunk in values.chunks(64) {
        out.push_str("    ");
        for v in chunk {
            out.push_str(&format!("{v},"));
        }
        out.push('\n');
    }
    out.push_str("];\n\n");
    out
}

#[inline]
fn map_joining_class(ch: char) -> u8 {
    match get_joining_type(ch) {
        JoiningType::RightJoining => JOIN_RIGHT,
        JoiningType::LeftJoining => JOIN_LEFT,
        JoiningType::DualJoining | JoiningType::JoinCausing => JOIN_DUAL,
        JoiningType::Transparent => JOIN_TRANSPARENT,
        JoiningType::NonJoining => JOIN_NONE,
        _ => JOIN_NONE,
    }
}

#[inline]
fn map_script_class(ch: char) -> u8 {
    match ch.script() {
        Script::Latin => SCRIPT_LATIN,
        Script::Arabic => SCRIPT_ARABIC,
        Script::Hebrew => SCRIPT_HEBREW,
        Script::Devanagari => SCRIPT_DEVANAGARI,
        Script::Common | Script::Inherited | Script::Unknown => SCRIPT_COMMON,
        _ => SCRIPT_OTHER,
    }
}

#[inline]
fn map_bidi_class(ch: char) -> u8 {
    match unicode_bidi::bidi_class(ch) {
        BidiClass::L => BIDI_L,
        BidiClass::R => BIDI_R,
        BidiClass::AL => BIDI_AL,
        BidiClass::EN => BIDI_EN,
        BidiClass::AN => BIDI_AN,
        BidiClass::NSM => BIDI_NSM,
        BidiClass::WS | BidiClass::B | BidiClass::S => BIDI_WS,
        _ => BIDI_OTHER_NEUTRAL,
    }
}

fn main() {
    println!("cargo:rerun-if-changed=build.rs");

    let mut joining_values = vec![JOIN_NONE; UNICODE_LIMIT];
    let mut combining_values = vec![0u8; UNICODE_LIMIT];
    let mut script_values = vec![SCRIPT_COMMON; UNICODE_LIMIT];
    let mut bidi_values = vec![BIDI_OTHER_NEUTRAL; UNICODE_LIMIT];

    for cp in 0..UNICODE_LIMIT {
        let Some(ch) = char::from_u32(cp as u32) else {
            continue;
        };
        joining_values[cp] = map_joining_class(ch);
        combining_values[cp] = canonical_combining_class(ch);
        script_values[cp] = map_script_class(ch);
        bidi_values[cp] = map_bidi_class(ch);
    }

    let (joining_page_index, joining_page_data) = build_sparse(&joining_values, JOIN_NONE);
    let (combining_page_index, combining_page_data) = build_sparse(&combining_values, 0);
    let (script_page_index, script_page_data) = build_sparse(&script_values, SCRIPT_COMMON);
    let (bidi_page_index, bidi_page_data) = build_sparse(&bidi_values, BIDI_OTHER_NEUTRAL);

    let mut out = String::new();
    out.push_str("// @generated by build.rs\n");
    out.push_str("pub(crate) const UNICODE_PAGE_SHIFT: u32 = 8;\n");
    out.push_str("pub(crate) const UNICODE_PAGE_MASK: u32 = 0xFF;\n");
    out.push_str("pub(crate) const JOIN_NONE: u8 = 0;\n");
    out.push_str("pub(crate) const JOIN_RIGHT: u8 = 1;\n");
    out.push_str("pub(crate) const JOIN_LEFT: u8 = 2;\n");
    out.push_str("pub(crate) const JOIN_DUAL: u8 = 3;\n");
    out.push_str("pub(crate) const JOIN_TRANSPARENT: u8 = 4;\n");
    out.push_str("pub(crate) const SCRIPT_COMMON: u8 = 0;\n");
    out.push_str("pub(crate) const SCRIPT_LATIN: u8 = 1;\n");
    out.push_str("pub(crate) const SCRIPT_ARABIC: u8 = 2;\n");
    out.push_str("pub(crate) const SCRIPT_HEBREW: u8 = 3;\n");
    out.push_str("pub(crate) const SCRIPT_DEVANAGARI: u8 = 4;\n");
    out.push_str("pub(crate) const SCRIPT_OTHER: u8 = 5;\n");
    out.push_str("pub(crate) const BIDI_OTHER_NEUTRAL: u8 = 0;\n");
    out.push_str("pub(crate) const BIDI_L: u8 = 1;\n");
    out.push_str("pub(crate) const BIDI_R: u8 = 2;\n");
    out.push_str("pub(crate) const BIDI_AL: u8 = 3;\n");
    out.push_str("pub(crate) const BIDI_EN: u8 = 4;\n");
    out.push_str("pub(crate) const BIDI_AN: u8 = 5;\n");
    out.push_str("pub(crate) const BIDI_NSM: u8 = 6;\n");
    out.push_str("pub(crate) const BIDI_WS: u8 = 7;\n\n");
    out.push_str(&render_u16_array("JOINING_PAGE_INDEX", &joining_page_index));
    out.push_str(&render_u8_array("JOINING_PAGE_DATA", &joining_page_data));
    out.push_str(&render_u16_array("COMBINING_PAGE_INDEX", &combining_page_index));
    out.push_str(&render_u8_array("COMBINING_PAGE_DATA", &combining_page_data));
    out.push_str(&render_u16_array("SCRIPT_PAGE_INDEX", &script_page_index));
    out.push_str(&render_u8_array("SCRIPT_PAGE_DATA", &script_page_data));
    out.push_str(&render_u16_array("BIDI_PAGE_INDEX", &bidi_page_index));
    out.push_str(&render_u8_array("BIDI_PAGE_DATA", &bidi_page_data));

    let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR"));
    let out_path = out_dir.join("layout_unicode_tables.rs");
    fs::write(out_path, out).expect("write generated unicode tables");
}