from pathlib import Path
import struct
SCRIPT_DIR = Path(__file__).resolve().parent
def generate():
glyphlist = Path(SCRIPT_DIR.joinpath("../data/glyphlist.txt")).read_text(encoding="utf-8")
lines = glyphlist.split("\n")
singles = []
multis = []
max_name_len = 0
for line in lines:
if line:
if line.startswith("#"):
continue
fields = line.split(';')
subfields = fields[1].split(" ")
name = fields[0]
max_name_len = max(max_name_len, len(name))
if len(subfields) == 1:
singles.append((fields[0], int(fields[1], 16)))
else:
multis.append((fields[0], ["0x" + x for x in subfields]))
trie = StringNode("", 0)
for pair in singles:
trie.add(pair[0], pair[1])
trie = trie.optimize()
trie_len = trie.locate(0)
trie_array = trie.store(b"")
buf = ""
buf += "// THIS FILE IS AUTOGENERATED.\n"
buf += "// Any changes to this file will be overwritten.\n"
buf += "// Use ../scripts/gen_agl.py to regenerate.\n\n"
buf += "/// Maximum length of an AGL glyph name.\n"
buf += "pub const MAX_NAME_LEN: usize = {};\n\n".format(max_name_len + 1)
buf += "#[rustfmt::skip]\n"
buf += "static AGL_TRIE: [u8; {}] = [\n".format(len(trie_array))
buf += " "
line_count = 0
for value in trie_array:
if line_count == 32:
buf += "\n "
line_count = 0
if line_count > 0:
buf += " "
line_count += 1
buf += str(value).rjust(3, ' ') + ","
buf += "\n];\n\n"
multis = sorted(multis, key=lambda tup: tup[0])
buf += "#[rustfmt::skip]\n"
buf += "static AGL_MULTIS: [(&str, &[u16]); {}] = [\n".format(len(multis))
for value in multis:
buf += " (\"{}\", &[{}]),\n".format(value[0], ", ".join(value[1]))
buf += "];\n"
return buf
class StringNode:
def __init__(self, letter, value):
self.letter = letter
self.value = value
self.children = {}
def __cmp__(self, other):
return ord(self.letter[0]) - ord(other.letter[0])
def __lt__(self, other):
return self.letter[0] < other.letter[0]
def add(self, word, value):
if len(word) == 0:
self.value = value
return
letter = word[0]
word = word[1:]
if letter in self.children:
child = self.children[letter]
else:
child = StringNode(letter, 0)
self.children[letter] = child
child.add(word, value)
def optimize(self):
children = list(self.children.values())
self.children = {}
for child in children:
self.children[child.letter[0]] = child.optimize()
if (self.value != 0) or (not children) or len(children) > 1:
return self
child = children[0]
self.letter += child.letter
self.value = child.value
self.children = child.children
return self
def dump_debug(self, write, margin):
line = margin + "+-"
if len(self.letter) == 0:
line += "<NOLETTER>"
else:
line += self.letter
if self.value:
line += " => " + repr(self.value)
write(line + "\n")
if self.children:
margin += "| "
for child in self.children.values():
child.dump_debug(write, margin)
def locate(self, index):
self.index = index
if len(self.letter) > 0:
index += len(self.letter) + 1
else:
index += 2
if self.value != 0:
index += 2
children = list(self.children.values())
children.sort()
index += 2 * len(children)
for child in children:
index = child.locate(index)
return index
def store(self, storage):
length = len(self.letter)
if length == 0:
storage += struct.pack("B", 0)
else:
for n in range(length):
val = ord(self.letter[n])
if n < length - 1:
val += 128
storage += struct.pack("B", val)
children = list(self.children.values())
children.sort()
count = len(children)
if self.value != 0:
storage += struct.pack("!BH", count + 128, self.value)
else:
storage += struct.pack("B", count)
for child in children:
storage += struct.pack("!H", child.index)
for child in children:
storage = child.store(storage)
return storage
if __name__ == "__main__":
data = generate()
Path(SCRIPT_DIR.joinpath("../data/generated/generated_agl.rs")).write_text(data, encoding="utf-8")