#!/usr/bin/env bash
# contrib/validation/doc-python-samples.sh — validate Python samples in Synta docs
#
# Extracts every fenced Python code block from the documentation, wraps each
# one in a checkable translation unit, and validates syntax with
# ``python3 -m py_compile``.  No synta installation is required — the check
# is purely syntactic; import resolution errors are not reported.
#
# Delegates all markdown parsing, classification, and source-file wrapping to
# the companion script doc-python-samples.py, then syntax-checks each
# generated file with ``python3 -m py_compile``.
#
# Usage:
#   ./contrib/validation/doc-python-samples.sh [OPTIONS] [FILE.md ...]
#
# If no FILE.md arguments and no --docs-dir are given, every .md file in
# the workspace is processed (build directories and external test repos
# under tests/vectors/ are excluded automatically).  Only blocks that
# reference ``synta`` are checked; all others are skipped automatically.
#
# Options:
#   --docs-dir DIR     Search for .md files in DIR instead of the whole workspace
#   --verbose, -v      Print a line for every block, not just failures
#   --help, -h         Show this message and exit
#
# Exit status: 0 if all samples pass syntax check, 1 if any failed.

# Require bash 4+
if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then
    echo "error: bash 4 or later is required (you have ${BASH_VERSION:-unknown})" >&2
    exit 1
fi

set -euo pipefail

# ── Colour helpers (honour NO_COLOR=1) ──────────────────────────────────────
if [[ "${NO_COLOR:-}" == "1" ]]; then
    RED=''; GREEN=''; YELLOW=''; CYAN=''; BOLD=''; NC=''
else
    RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
    CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
fi

# ── Locate repo root ─────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"

# ── Defaults ─────────────────────────────────────────────────────────────────
DOCS_DIR=""
VERBOSE=0
EXTRA_MD_FILES=()

# ── Helpers ──────────────────────────────────────────────────────────────────
die()  { printf "${RED}error:${NC} %s\n" "$*" >&2; exit 1; }
info() { printf "${CYAN}%s${NC}\n" "$*"; }

usage() {
    grep '^#' "$0" | grep -v '^#!/' | sed 's/^# \{0,1\}//'
}

# ── Argument parsing ─────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
    case "$1" in
        --docs-dir)   DOCS_DIR="$2"; shift 2 ;;
        --verbose|-v) VERBOSE=1;     shift   ;;
        --help|-h)    usage; exit 0           ;;
        *.md)         EXTRA_MD_FILES+=("$1"); shift ;;
        *)            die "Unknown option: $1" ;;
    esac
done

# ── Preflight checks ─────────────────────────────────────────────────────────
command -v python3 &>/dev/null || die "python3 is required but not found"

PYTHON_SCRIPT="$SCRIPT_DIR/doc-python-samples.py"
[[ -f "$PYTHON_SCRIPT" ]] ||
    die "doc-python-samples.py not found next to this script (expected: $PYTHON_SCRIPT)"

# ── Temp workspace ───────────────────────────────────────────────────────────
WORK_DIR="$(mktemp -d)"
trap 'rm -rf "$WORK_DIR"' EXIT

# ── Collect markdown files ───────────────────────────────────────────────────
MD_FILES=()
if [[ ${#EXTRA_MD_FILES[@]} -gt 0 ]]; then
    MD_FILES=("${EXTRA_MD_FILES[@]}")
elif [[ -n "$DOCS_DIR" ]]; then
    while IFS= read -r f; do
        MD_FILES+=("$f")
    done < <(find "$DOCS_DIR" -name '*.md' -type f | sort)
else
    while IFS= read -r f; do
        MD_FILES+=("$f")
    done < <(find "$REPO_ROOT" -name '*.md' -type f \
        ! -path '*/target/*' \
        ! -path '*/.cargo/*' \
        ! -path '*/bench-data/*' \
        ! -path '*/.pytest_cache/*' \
        ! -path '*/.claude/*' \
        ! -path '*/tests/vectors/cryptography/*' \
        ! -path '*/tests/vectors/dilithium-certificates/*' \
        ! -path '*/tests/vectors/kyber-certificates/*' \
        ! -path '*/tests/vectors/mozilla-ca/*' \
        ! -path '*/tests/vectors/ccadb/*' \
        | sort)
fi

[[ ${#MD_FILES[@]} -gt 0 ]] ||
    die "No markdown files found"

# ── Banner ───────────────────────────────────────────────────────────────────
printf '\n'
info "Synta documentation Python sample validator"
info "============================================"
printf 'Repo root: %s\n'                 "$REPO_ROOT"
printf 'Sources  : %d markdown file(s)\n' "${#MD_FILES[@]}"
printf '\n'

# ── Run Python extractor ─────────────────────────────────────────────────────
MANIFEST="$WORK_DIR/manifest.tsv"
PY_OUTPUT=$(python3 "$PYTHON_SCRIPT" "$WORK_DIR" "${MD_FILES[@]}")
TOTAL_BLOCKS="${PY_OUTPUT%%$'\t'*}"
SKIP="${PY_OUTPUT##*$'\t'}"
printf 'Extracted %s Python code block(s) (%s skipped)\n\n' \
    "$TOTAL_BLOCKS" "$SKIP"

if [[ ! -s "$MANIFEST" ]]; then
    printf "${YELLOW}No checkable Python code blocks found in the specified files.${NC}\n"
    exit 0
fi

# ── Read manifest into parallel arrays ───────────────────────────────────────
# Manifest columns (tab-separated, written by doc-python-samples.py):
#   doc_file  start_line  lang  src_file  kind  raw_file
declare -a M_DOC_FILE=()
declare -a M_START_LINE=()
declare -a M_LANG=()
declare -a M_SRC_FILE=()
declare -a M_KIND=()

while IFS=$'\t' read -r doc_file start_line lang src_file kind raw_file; do
    M_DOC_FILE+=("$doc_file")
    M_START_LINE+=("$start_line")
    M_LANG+=("$lang")
    M_SRC_FILE+=("$src_file")
    M_KIND+=("$kind")
done < "$MANIFEST"

N_SNIPPETS=${#M_DOC_FILE[@]}

# ── Write the batch syntax-checker ───────────────────────────────────────────
# Check all snippets in a single Python invocation instead of one
# py_compile process per snippet.  py_compile.compile(..., doraise=True)
# is identical in behaviour to "python3 -m py_compile" but runs in-process.
# Errors are written to <snippet>.err beside each source file (same path
# convention as the original per-snippet approach) so the shell's error
# formatting code is unchanged.  Output: one "OK\tPATH" or "FAIL\tPATH"
# line per snippet, in the same order as the input arguments.
BATCH_CHECKER="$WORK_DIR/batch_check.py"
cat > "$BATCH_CHECKER" << 'PYEOF'
import sys, py_compile
for path in sys.argv[1:]:
    try:
        py_compile.compile(path, doraise=True)
        print(f"OK\t{path}")
    except py_compile.PyCompileError as e:
        err_path = path[:-3] + ".err"
        with open(err_path, "w") as f:
            f.write(str(e) + "\n")
        print(f"FAIL\t{path}")
PYEOF

# ── Batch syntax check ────────────────────────────────────────────────────────
PASS=0; FAIL=0
declare -a FAIL_MSGS=()

mapfile -t BATCH_RESULTS < <(python3 "$BATCH_CHECKER" "${M_SRC_FILE[@]}")

for i in "${!M_DOC_FILE[@]}"; do
    rel_doc="${M_DOC_FILE[$i]#"$REPO_ROOT"/}"
    label="${rel_doc}:${M_START_LINE[$i]}"
    src_file="${M_SRC_FILE[$i]}"

    if [[ "${BATCH_RESULTS[$i]}" == OK$'\t'* ]]; then
        PASS=$((PASS + 1))
        if [[ $VERBOSE -eq 1 ]]; then
            printf "  ${GREEN}OK${NC}    %-60s [%s]\n" "$label" "${M_KIND[$i]}"
        fi
    else
        FAIL=$((FAIL + 1))
        FAIL_MSGS+=("$label")
        err_file="${src_file%.py}.err"
        printf "\n${RED}[FAIL]${NC} %s\n"    "$label"
        printf   "       Language : %s\n"    "${M_LANG[$i]}"
        printf   "       Kind     : %s\n"    "${M_KIND[$i]}"
        printf   "       Errors:\n"
        sed "s|$src_file|<${rel_doc}:${M_START_LINE[$i]}>|g
             s|^|         |" "$err_file" >&2
    fi
done

# ── Summary ──────────────────────────────────────────────────────────────────
printf '\n'
printf -- '──────────────────────────────────────────────────────────\n'
printf 'Results: '
printf "${GREEN}%d passed${NC}, " "$PASS"
printf "${RED}%d failed${NC}, "   "$FAIL"
printf "${YELLOW}%d skipped${NC}\n" "$SKIP"

if [[ $FAIL -gt 0 ]]; then
    printf '\nFailed samples:\n'
    for msg in "${FAIL_MSGS[@]}"; do
        printf "  ${RED}%s${NC}\n" "$msg"
    done
    printf '\n'
    exit 1
fi

printf "\n${GREEN}${BOLD}All samples compiled successfully.${NC}\n\n"
exit 0
