import fileinput
import itertools
import json
import multiprocessing
import os
import pathlib
from typing import Callable, Iterable, Sized, TypeVar
Cmp = TypeVar("Cmp", bound=Sized)
Key = Callable[[str], Cmp]
Filter = Callable[[Iterable[str]], Iterable[str]]
def main():
cleaner = Cleaner(most_recent_modification_date, yield_all_except_first)
sequential(fileinput.input(), cleaner)
def sequential(ldjson: Iterable[str], cleaner: "Cleaner"):
for line in ldjson:
cleaner(line)
def parallel(ldjson: Iterable[str], cleaner: "Cleaner"):
with multiprocessing.Pool() as pool:
pool.imap_unordered(cleaner, ldjson)
class Cleaner:
def __init__(self, key: Key = None, filter: Filter = lambda f: f):
self.key = key
self.filter = filter
def __call__(self, line: str):
files: list[str] = json.loads(line)
files.sort(key=self.key)
for filename in self.filter(files):
pass
def most_recent_modification_date(filename: str) -> float:
return pathlib.Path(filename).stat().st_mtime
def yield_all_except_first(files: Iterable[str]) -> Iterable[str]:
return itertools.islice(files, 1, None)
if __name__ == "__main__":
main()