mime2ext 0.1.54

Given a mimetype, suggest a file extension
Documentation
#!/usr/bin/env python3
"""This file generates the lookup table from mime-db/db.json.

You only need to run this when updating mime-db.
"""

import collections
import io
import json


def assert_boring_ascii(text):
    """Check that it's safe to do code generation with this string."""
    # If there's unicode we'll get incorrect offsets
    # If mime-db ever starts containing unicode (unlikely!), process strings
    # with .encode("utf8") first
    assert text.isascii()
    assert text.lower() == text
    assert text.isprintable()
    assert '"' not in text
    assert "'" not in text
    assert "\\" not in text
    assert "/" not in text


with open("mime-db/db.json") as f:
    db = json.load(f)

by_type = collections.defaultdict(dict)

for mime, info in sorted(db.items()):
    if extensions := info.get("extensions"):
        type_, subtype = mime.split("/")
        by_type[type_][subtype] = extensions[0]

raw_data = io.StringIO()
lookup_text = io.StringIO()
lookup_text.write(
    """// This file is generated by build.py
// Do not edit manually

&[
"""
)

for type_, extensions in by_type.items():
    assert_boring_ascii(type_)
    lookup_text.write(f"""("{type_}", &[\n""")
    for subtype, extension in extensions.items():
        assert_boring_ascii(subtype)
        assert_boring_ascii(extension)
        assert "." not in extension
        lookup_text.write(
            f"""// {type_}/{subtype}: {extension}
Entry({raw_data.tell()}, {len(subtype)}, {len(extension)}),
"""
        )
        raw_data.write(subtype)
        raw_data.write(extension)
    lookup_text.write("]),\n")
lookup_text.write("]\n")

with open("src/raw_data", "w") as f:
    f.write(raw_data.getvalue())

with open("src/lookup", "w") as f:
    f.write(lookup_text.getvalue())