rustybuzz 0.20.1

A complete harfbuzz shaping algorithm port to Rust.
Documentation
#!/usr/bin/env python3

# Based on harfbuzz/src/gen-arabic-table.py

import os
import urllib.request

DEPENDENCIES = [
    "ArabicShaping.txt",
    "UnicodeData.txt",
    "Blocks.txt",
]

for dep in DEPENDENCIES:
    if not os.path.exists(dep):
        urllib.request.urlretrieve("https://unicode.org/Public/UCD/latest/ucd/" + dep, dep)

files = [open(x, encoding="utf-8") for x in DEPENDENCIES]

headers = [
    [files[0].readline(), files[0].readline()],
    [files[2].readline(), files[2].readline()],
    ["UnicodeData.txt does not have a header."],
]
while files[0].readline().find("##################") < 0:
    pass

blocks = {}


def read_blocks(f):
    global blocks
    for line in f:
        j = line.find("#")
        if j >= 0:
            line = line[:j]

        fields = [x.strip() for x in line.split(";")]
        if len(fields) == 1:
            continue

        uu = fields[0].split("..")
        start = int(uu[0], 16)
        if len(uu) == 1:
            end = start
        else:
            end = int(uu[1], 16)

        t = fields[1]

        for u in range(start, end + 1):
            blocks[u] = t


def print_joining_table(f):
    values = {}
    for line in f:
        if line[0] == "#":
            continue

        fields = [x.strip() for x in line.split(";")]
        if len(fields) == 1:
            continue

        u = int(fields[0], 16)

        if fields[3] in ["ALAPH", "DALATH RISH"]:
            value = "JOINING_GROUP_" + fields[3].replace(" ", "_")
        else:
            value = "JOINING_TYPE_" + fields[2]
        values[u] = value

    short_value = {}
    for value in sorted(set([v for v in values.values()] + ["JOINING_TYPE_X"])):
        short = "".join(x[0] for x in value.split("_")[2:])
        assert short not in short_value.values()

        short_value[value] = short

    uu = sorted(values.keys())
    num = len(values)
    all_blocks = set([blocks[u] for u in uu])

    last = -100000
    ranges = []
    for u in uu:
        if u - last <= 1 + 16 * 5:
            ranges[-1][-1] = u
        else:
            ranges.append([u, u])
        last = u

    print("#[rustfmt::skip]")
    print("pub const JOINING_TABLE: &[hb_arabic_joining_type_t] = &[")
    last_block = None
    offset = 0

    join_offsets = []

    for start, end in ranges:
        join_offsets.append(
            "const JOINING_OFFSET_0X%04X: usize = %d;" % (start, offset)
        )

        for u in range(start, end + 1):
            block = blocks.get(u, last_block)
            value = values.get(u, "JOINING_TYPE_X")

            if block != last_block or u == start:
                if u != start:
                    print()
                if block in all_blocks:
                    print("\n  /* %s */" % block)
                else:
                    print("\n  /* FILLER */")
                last_block = block
                if u % 32 != 0:
                    print()
                    print("  /* %04X */" % (u // 32 * 32), "  " * (u % 32), end="")

            if u % 32 == 0:
                print()
                print("  /* %04X */ " % u, end="")

            val = short_value[value]

            if val == "C":
                val = "D"

            print("%s," % val, end="")
        print()

        offset += end - start + 1
    print("];")
    print()

    for offset in join_offsets:
        print(offset)

    page_bits = 12
    print()
    print("pub fn joining_type(u: char) -> hb_arabic_joining_type_t {")
    print("    let u = u as u32;")
    print("    match u >> %d {" % page_bits)
    pages = set(
        [u >> page_bits for u in [s for s, e in ranges] + [e for s, e in ranges]]
    )
    for p in sorted(pages):
        print("        0x%0X => {" % p)
        for start, end in ranges:
            if p not in [start >> page_bits, end >> page_bits]:
                continue
            offset = "JOINING_OFFSET_0X%04X" % start
            print("            if (0x%04X..=0x%04X).contains(&u) {" % (start, end))
            print(
                "                return JOINING_TABLE[u as usize - 0x%04X + %s]"
                % (start, offset)
            )
            print("            }")
        print("        }")
    print("        _ => {}")
    print("    }")
    print()
    print("    X")
    print("}")
    print()


print("// WARNING: this file was generated by scripts/gen-arabic-table.py")
print()
print(
    "use super::ot_shaper_arabic::hb_arabic_joining_type_t::{\n"
    "    self, GroupAlaph as A, GroupDalathRish as DR, D, L, R, T, U, X,\n"
    "};"
)
print()

read_blocks(files[2])
print_joining_table(files[0])