#!/usr/bin/env sh
# scripts/check_doc_consistency.sh
#
# Verifies that the counts cited in the top-level README / index files
# match the actual filesystem state. Catches the kind of doc drift that
# accumulated between the P411-P415 and P416-P420 batches: the
# `whatsnew.md` "P411-P410 / 10 standalone examples / 5 helper scripts"
# paragraph and the `scripts/README.md` "5 shell scripts" line were
# both stale when the P416-P420 batch started.
#
# Run with:
#   bash scripts/check_doc_consistency.sh
#
# The 5 checks (in order):
#   1. tests/README.md "N test files" matches `ls tests/*.rs | wc -l`.
#   2. examples/README.md "18 standalone examples" matches
#      `ls examples/*.rs | wc -l`.
#   3. scripts/README.md "8 shell scripts" matches
#      `ls scripts/*.sh | wc -l`.
#   4. CLAIMS.md has exactly 8 `*_claim_allowed` lines.
#   5. tests/README.md mentions every tests/*.rs file (P432 per-item
#      coverage check). Exits 1 if any test file is unindexed.
#
# Exits 0 if all 5 checks pass; exits 1 on the first mismatch with a
# clear error message. Matches the style of scripts/check_*.sh.

set -u

checked=0
failed=0

count_actual() {
    # $1 = glob
    if [ -d "$(dirname "$1")" ]; then
        # shellcheck disable=SC2086
        ls $1 2>/dev/null | wc -l | tr -d ' '
    else
        echo "0"
    fi
}

assert_count() {
    label="$1"
    actual="$2"
    expected="$3"
    checked=$((checked + 1))
    if [ "$actual" = "$expected" ]; then
        echo "  [OK]      $label: $actual"
    else
        echo "  [FAIL]    $label: expected $expected, got $actual" >&2
        failed=$((failed + 1))
    fi
}

echo "checking doc-vs-reality consistency (P416-P420 batch)..."

# 1. tests/ count
tests_count=$(count_actual "tests/*.rs")
# tests/README.md currently says "135 test files" (after P432 fix).
assert_count "tests/*.rs count" "$tests_count" "135"

# 2. examples/ count (standalone; the book chapters live in examples/book/).
examples_count=$(count_actual "examples/*.rs")
# examples/README.md says "18 standalone examples" (P440 added
# proof_replay_witnesses).
assert_count "examples/*.rs count (standalone)" "$examples_count" "18"

# 3. scripts/ count
scripts_count=$(count_actual "scripts/*.sh")
# scripts/README.md says "8 shell scripts" (the 7 listed in P417 + this
# script added in P418).
assert_count "scripts/*.sh count" "$scripts_count" "8"

# 4. CLAIMS.md has 8 *_claim_allowed lines. Note that sci_q2_claim_allowed
# contains a digit, so the character class must allow [a-z0-9_], not just
# [a-z_].
claim_lines=$(rg -c '^[a-z0-9_]+_claim_allowed=' CLAIMS.md 2>/dev/null | head -1)
# rg -c prints the filename:count, so split off the count.
claim_count=$(echo "$claim_lines" | awk -F: '{print $NF}')
claim_count=${claim_count:-0}
assert_count "CLAIMS.md *_claim_allowed lines" "$claim_count" "8"

# 5. P432: per-item coverage check. Every tests/*.rs file should be
# mentioned in tests/README.md (either as a literal `name.rs` reference
# or as part of a glob like `tensor_*.rs`). Walk the directory and grep
# for each filename. Glob entries (`tensor_*.rs`, `sheaf_*.rs`) cover
# multiple concrete filenames; treat the glob as covering its
# first-matchable concrete filename only.
checked=$((checked + 1))
unindexed=""
for f in tests/*.rs; do
    base=$(basename "$f")
    # The P421 fix introduced the `tensor_*.rs` / `sheaf_*.rs` glob
    # entries that subsume the per-tensor / per-sheaf micro-test files.
    # Any file matching one of these globs is covered if and only if the
    # corresponding glob entry appears in the README.
    case "$base" in
        tensor_*.rs)
            if ! grep -q 'tensor_\*\.rs' tests/README.md; then
                unindexed="$unindexed $base"
            fi
            ;;
        sheaf_*.rs)
            if ! grep -q 'sheaf_\*\.rs' tests/README.md; then
                unindexed="$unindexed $base"
            fi
            ;;
        *)
            if ! grep -q "$base" tests/README.md; then
                unindexed="$unindexed $base"
            fi
            ;;
    esac
done
if [ -z "$unindexed" ]; then
    echo "  [OK]      tests/README.md mentions every tests/*.rs file"
else
    echo "  [FAIL]    tests/README.md does not mention: $unindexed" >&2
    failed=$((failed + 1))
fi

echo
echo "checked: $checked   failed: $failed"
if [ "$failed" -ne 0 ]; then
    echo "FAIL: $failed doc-vs-reality check(s) failed" >&2
    exit 1
fi
echo "OK: all doc-vs-reality counts match the filesystem"
