import re
import sys
def parse_version(lines):
line = next(lines)
return re.match(r'# CaseFolding-(\d+)\.(\d+)\.(\d+)\.txt', line).groups()
def strip_comments(line):
return line.partition('#')[0]
def parse_tables(lines):
tables = {'C': {}, 'F': {}, 'S': {}, 'T': {}}
for line in lines:
line = strip_comments(line).strip()
if not line:
continue
code, status, mapping, _empty = line.split(';')
code = int(code, 16)
status = status.strip()
if status == 'F':
mapping = [int(s, 16) for s in mapping.split()]
else:
mapping = int(mapping, 16)
tables[status][code] = mapping
if tables['T'] != {0x0049: 0x0131, 0x0130: 0x0069}:
raise Exception('Turkic tables have changed -- please update code')
return tables
def render(codepoint):
return r"'\u{{{:x}}}'".format(codepoint)
def main(lines):
print('// NOTE: the following code was generated by `scripts/generate.py`; do not edit directly')
version = parse_version(lines)
print("""
/// The version of [Unicode](http://www.unicode.org/) that this version
/// of `unicode-casefold` is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = ({}, {}, {});
""".format(*version))
print("""
#[derive(Copy, Clone, Debug)]
pub enum Buffer {
Zero,
One(char),
Two(char, char),
}
""")
tables = parse_tables(lines)
print("""/// Common mappings shared by both the full and simple mappings.""")
print("""pub static COMMON_TABLE: &'static [(char, char)] = &[""")
for code, mapping in sorted(tables['C'].items()):
print(""" ({}, {}),""".format(render(code), render(mapping)))
print("""];""")
print()
print("""/// Full mappings, which cause strings to grow in length.""")
print("""pub static FULL_TABLE: &'static [(char, (char, Buffer))] = &[""")
for code, mapping in sorted(tables['F'].items()):
if len(mapping) == 1:
variant = 'Buffer::Zero'
elif len(mapping) == 2:
variant = 'Buffer::One({})'.format(render(mapping[1]))
elif len(mapping) == 3:
variant = 'Buffer::Two({}, {})'.format(render(mapping[1]), render(mapping[2]))
else:
raise Exception('code {} maps to a string of length {}'.format(code, len(mapping)))
print(""" ({}, ({}, {})),""".format(render(code), render(mapping[0]), variant))
print("""];""")
print()
print("""/// Simple mappings, which differ from those in the `FULL_TABLE`.""")
print("""pub static SIMPLE_TABLE: &'static [(char, char)] = &[""")
for code, mapping in sorted(tables['S'].items()):
print(""" ({}, {}),""".format(render(code), render(mapping)))
print("""];""")
if __name__ == '__main__':
main(sys.stdin)