import collections
import io
import json
def assert_boring_ascii(text):
assert text.isascii()
assert text.lower() == text
assert text.isprintable()
assert '"' not in text
assert "'" not in text
assert "\\" not in text
assert "/" not in text
with open("mime-db/db.json") as f:
db = json.load(f)
by_type = collections.defaultdict(dict)
for mime, info in sorted(db.items()):
if extensions := info.get("extensions"):
type_, subtype = mime.split("/")
by_type[type_][subtype] = extensions[0]
raw_data = io.StringIO()
lookup_text = io.StringIO()
lookup_text.write(
"""// This file is generated by build.py
// Do not edit manually
&[
"""
)
for type_, extensions in by_type.items():
assert_boring_ascii(type_)
lookup_text.write(f"""("{type_}", &[\n""")
for subtype, extension in extensions.items():
assert_boring_ascii(subtype)
assert_boring_ascii(extension)
assert "." not in extension
lookup_text.write(
f"""// {type_}/{subtype}: {extension}
Entry({raw_data.tell()}, {len(subtype)}, {len(extension)}),
"""
)
raw_data.write(subtype)
raw_data.write(extension)
lookup_text.write("]),\n")
lookup_text.write("]\n")
with open("src/raw_data", "w") as f:
f.write(raw_data.getvalue())
with open("src/lookup", "w") as f:
f.write(lookup_text.getvalue())