icu-data 0.1.0

International Components for Unicode (ICU) data in Rust structures
Documentation
import glob
import re

ucms = [f[f.rindex('/')+1:] for f in glob.glob("../ucm/*.ucm")]

for f in ucms:
    rs = f.replace(".ucm", ".rs")
    f = open("../ucm/"+f, "r")
    lines = f.readlines()
    f.close()

    out = open(rs, "w+")

    U = list()
    B = list()

    in_header = True

    for line in lines:
        if in_header:
            if line.startswith("#"):
                out.write(line.replace("#", "//"))
                continue
            else:
                in_header = False
        search = re.search(r"^<(U[a-fA-F0-9]+)>\s*((?:\\x[a-fA-F0-9]+)+)\s*\|([0123])\s*$", line)
        if not search: continue
        groups = search.groups()
        if groups:
            (uni, bytestring, kind) = groups
            U.append(r"U{}('\u{{{}}}')".format(kind, uni[1:]))
            B.append("""b"{}" """.format(bytestring).rstrip())
            #print((uni, bytestring, kind))
    outdata = """use crate::Codepoint::{{U0, U1, U2, U3}};

static CODEPOINTS: &[Codepoint] = &[{}];

static BYTESTRINGS: &[&[u8]] = &[{}];""".format(", ".join(U), ", ".join(B))
    out.write(outdata)
    out.close()