from __future__ import annotations
import hashlib
import json
import pathlib
import sys
def _recorded_rel_paths(meta: dict) -> dict[str, str]:
out: dict[str, str] = {}
for entity in meta.get("@graph", []):
rel = entity.get("@id", "")
want = entity.get("sha256")
if not want or not isinstance(rel, str):
continue
if rel.startswith(("http://", "https://", "#")):
continue
out[rel] = want
return out
def _provenance_warnings(meta: dict) -> list[str]:
warns: list[str] = []
for entity in meta.get("@graph", []):
if entity.get("@id") == "#tilezz":
ver = str(entity.get("softwareVersion", ""))
if ver == "unknown":
warns.append(
"provenance: producing tool reports softwareVersion "
"'unknown' (built outside a git checkout); the source "
"commit is NOT recorded."
)
elif ver.endswith("-dirty"):
warns.append(
f"provenance: producing tool was built -dirty ({ver}); "
"the working tree had uncommitted changes, so the "
"recorded commit does not fully describe the binary."
)
return warns
def verify(asset_dir: pathlib.Path, strict: bool = False) -> int:
crate_path = asset_dir / "ro-crate-metadata.json"
if not crate_path.exists():
sys.stderr.write(
f"no ro-crate-metadata.json in {asset_dir}; "
"pass the asset directory as the first argument\n"
)
return 2
meta = json.loads(crate_path.read_text())
recorded = _recorded_rel_paths(meta)
errs = 0
checked = 0
for rel, want in sorted(recorded.items()):
path = asset_dir / rel
if not path.is_file():
print(f"{rel}: MISSING (manifest expected sha256={want[:12]}...)")
errs += 1
continue
got = hashlib.sha256(path.read_bytes()).hexdigest()
if got != want:
print(f"{rel}: MISMATCH (got {got[:12]}, want {want[:12]})")
errs += 1
else:
checked += 1
recorded_set = set(recorded)
for path in sorted(asset_dir.rglob("*")):
if not path.is_file():
continue
rel = path.relative_to(asset_dir).as_posix()
if rel == "ro-crate-metadata.json":
continue
if "__pycache__" in path.parts:
continue
if rel not in recorded_set:
print(f"{rel}: UNEXPECTED (present on disk but not in the manifest)")
errs += 1
prov = _provenance_warnings(meta)
for w in prov:
print(f"{'PROVENANCE FAILURE' if strict else 'WARNING'}: {w}")
if strict:
errs += len(prov)
if errs:
print(f"{errs} problem(s); {checked} file(s) verified")
return 1
print(f"OK ({checked} file(s) verified; no unexpected files)")
return 0
def main(argv: list[str]) -> int:
import argparse
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("asset_dir", nargs="?", default=".")
ap.add_argument(
"--strict",
action="store_true",
help="also FAIL (not just warn) on non-pristine provenance "
"(softwareVersion 'unknown' or '-dirty') -- use before publishing",
)
args = ap.parse_args(argv[1:])
return verify(pathlib.Path(args.asset_dir), strict=args.strict)
if __name__ == "__main__":
sys.exit(main(sys.argv))