import argparse
import gzip
import msgpack
import zstandard as zstd
def cB_to_freq(cB: int) -> float:
if cB > 0:
raise ValueError("A frequency cannot be a positive number of centibels.")
return 10 ** (cB / 100)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("input", type=str)
parser.add_argument("output", type=str)
args = parser.parse_args()
with gzip.open(args.input, "rb") as infile:
data = msgpack.load(infile, raw=False)
header = data[0]
if header != {'format': 'cB', 'version': 1}:
raise ValueError(f"Unexpected header: {header}")
cctx = zstd.ZstdCompressor(level=19)
with open(args.output, "wb") as outfile:
compressor = cctx.stream_writer(outfile)
for index, bucket in enumerate(data[1:]):
freq = cB_to_freq(-index)
for word in bucket:
compressor.write(f"{word} {freq}\n".encode())
compressor.flush(zstd.FLUSH_FRAME)
if __name__ == "__main__":
main()