import glob
import re
ucms = [f[f.rindex('/')+1:] for f in glob.glob("../ucm/*.ucm")]
for f in ucms:
rs = f.replace(".ucm", ".rs")
f = open("../ucm/"+f, "r")
lines = f.readlines()
f.close()
out = open(rs, "w+")
U = list()
B = list()
in_header = True
for line in lines:
if in_header:
if line.startswith("#"):
out.write(line.replace("#", "//"))
continue
else:
in_header = False
search = re.search(r"^<(U[a-fA-F0-9]+)>\s*((?:\\x[a-fA-F0-9]+)+)\s*\|([0123])\s*$", line)
if not search: continue
groups = search.groups()
if groups:
(uni, bytestring, kind) = groups
U.append(r"U{}('\u{{{}}}')".format(kind, uni[1:]))
B.append("""b"{}" """.format(bytestring).rstrip())
outdata = """use crate::Codepoint::{{U0, U1, U2, U3}};
static CODEPOINTS: &[Codepoint] = &[{}];
static BYTESTRINGS: &[&[u8]] = &[{}];""".format(", ".join(U), ", ".join(B))
out.write(outdata)
out.close()