#!/usr/bin/env bash
# contrib/validation/doc-python-samples.sh — validate Python samples in Synta docs
#
# Extracts every fenced Python code block from the documentation, wraps each
# one in a checkable translation unit, and validates syntax with
# ``python3 -m py_compile``.  No synta installation is required — the check
# is purely syntactic; import resolution errors are not reported.
#
# Delegates all markdown parsing, classification, and source-file wrapping to
# the companion script doc-python-samples.py, then syntax-checks each
# generated file with ``python3 -m py_compile``.
#
# Usage:
#   ./contrib/validation/doc-python-samples.sh [OPTIONS] [FILE.md ...]
#
# If no FILE.md arguments and no --docs-dir are given, every .md file in
# the workspace is processed (build directories and external test repos
# under tests/vectors/ are excluded automatically).  Only blocks that
# reference ``synta`` are checked; all others are skipped automatically.
#
# Options:
#   --docs-dir DIR     Search for .md files in DIR instead of the whole workspace
#   --verbose, -v      Print a line for every block, not just failures
#   --help, -h         Show this message and exit
#
# Exit status: 0 if all samples pass syntax check, 1 if any failed.

# Require bash 4+
if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then
    echo "error: bash 4 or later is required (you have ${BASH_VERSION:-unknown})" >&2
    exit 1
fi

set -euo pipefail

# ── Colour helpers (honour NO_COLOR=1) ──────────────────────────────────────
if [[ "${NO_COLOR:-}" == "1" ]]; then
    RED=''; GREEN=''; YELLOW=''; CYAN=''; BOLD=''; NC=''
else
    RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
    CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
fi

# ── Locate repo root ─────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"

# ── Defaults ─────────────────────────────────────────────────────────────────
DOCS_DIR=""
VERBOSE=0
EXTRA_MD_FILES=()

# ── Helpers ──────────────────────────────────────────────────────────────────
die()  { printf "${RED}error:${NC} %s\n" "$*" >&2; exit 1; }
info() { printf "${CYAN}%s${NC}\n" "$*"; }

usage() {
    grep '^#' "$0" | grep -v '^#!/' | sed 's/^# \{0,1\}//'
}

# ── Argument parsing ─────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
    case "$1" in
        --docs-dir)   DOCS_DIR="$2"; shift 2 ;;
        --verbose|-v) VERBOSE=1;     shift   ;;
        --help|-h)    usage; exit 0           ;;
        *.md)         EXTRA_MD_FILES+=("$1"); shift ;;
        *)            die "Unknown option: $1" ;;
    esac
done

# ── Preflight checks ─────────────────────────────────────────────────────────
command -v python3 &>/dev/null || die "python3 is required but not found"

PYTHON_SCRIPT="$SCRIPT_DIR/doc-python-samples.py"
[[ -f "$PYTHON_SCRIPT" ]] ||
    die "doc-python-samples.py not found next to this script (expected: $PYTHON_SCRIPT)"

# ── Temp workspace ───────────────────────────────────────────────────────────
WORK_DIR="$(mktemp -d)"
trap 'rm -rf "$WORK_DIR"' EXIT

# ── Collect markdown files ───────────────────────────────────────────────────
MD_FILES=()
if [[ ${#EXTRA_MD_FILES[@]} -gt 0 ]]; then
    MD_FILES=("${EXTRA_MD_FILES[@]}")
elif [[ -n "$DOCS_DIR" ]]; then
    while IFS= read -r f; do
        MD_FILES+=("$f")
    done < <(find "$DOCS_DIR" -name '*.md' -type f | sort)
else
    while IFS= read -r f; do
        MD_FILES+=("$f")
    done < <(find "$REPO_ROOT" -name '*.md' -type f \
        ! -path '*/target/*' \
        ! -path '*/.cargo/*' \
        ! -path '*/bench-data/*' \
        ! -path '*/.pytest_cache/*' \
        ! -path '*/tests/vectors/cryptography/*' \
        ! -path '*/tests/vectors/dilithium-certificates/*' \
        ! -path '*/tests/vectors/kyber-certificates/*' \
        ! -path '*/tests/vectors/mozilla-ca/*' \
        ! -path '*/tests/vectors/ccadb/*' \
        | sort)
fi

[[ ${#MD_FILES[@]} -gt 0 ]] ||
    die "No markdown files found"

# ── Banner ───────────────────────────────────────────────────────────────────
printf '\n'
info "Synta documentation Python sample validator"
info "============================================"
printf 'Repo root: %s\n'                 "$REPO_ROOT"
printf 'Sources  : %d markdown file(s)\n' "${#MD_FILES[@]}"
printf '\n'

# ── Run Python extractor ─────────────────────────────────────────────────────
MANIFEST="$WORK_DIR/manifest.tsv"
PY_OUTPUT=$(python3 "$PYTHON_SCRIPT" "$WORK_DIR" "${MD_FILES[@]}")
TOTAL_BLOCKS="${PY_OUTPUT%%$'\t'*}"
SKIP="${PY_OUTPUT##*$'\t'}"
printf 'Extracted %s Python code block(s) (%s skipped)\n\n' \
    "$TOTAL_BLOCKS" "$SKIP"

if [[ ! -s "$MANIFEST" ]]; then
    printf "${YELLOW}No checkable Python code blocks found in the specified files.${NC}\n"
    exit 0
fi

# ── Syntax-check loop ────────────────────────────────────────────────────────
# Manifest columns (tab-separated, written by doc-python-samples.py):
#   doc_file  start_line  lang  src_file  kind  raw_file

PASS=0; FAIL=0
declare -a FAIL_MSGS=()

while IFS=$'\t' read -r doc_file start_line lang src_file kind raw_file; do
    rel_doc="${doc_file#"$REPO_ROOT"/}"
    label="${rel_doc}:${start_line}"

    err_file="${src_file%.py}.err"
    if python3 -m py_compile "$src_file" 2>"$err_file"; then
        PASS=$(( PASS + 1 ))
        if [[ $VERBOSE -eq 1 ]]; then
            printf "  ${GREEN}OK${NC}    %-60s [%s]\n" "$label" "$kind"
        fi
    else
        FAIL=$(( FAIL + 1 ))
        FAIL_MSGS+=("$label")

        printf "\n${RED}[FAIL]${NC} %s\n"    "$label"
        printf   "       Language : %s\n"    "$lang"
        printf   "       Kind     : %s\n"    "$kind"
        printf   "       Errors:\n"
        # Replace the generated temp path with the doc reference for clarity.
        sed "s|$src_file|<${rel_doc}:${start_line}>|g
             s|^|         |" "$err_file" >&2
    fi

done < "$MANIFEST"

# ── Summary ──────────────────────────────────────────────────────────────────
printf '\n'
printf -- '──────────────────────────────────────────────────────────\n'
printf 'Results: '
printf "${GREEN}%d passed${NC}, " "$PASS"
printf "${RED}%d failed${NC}, "   "$FAIL"
printf "${YELLOW}%d skipped${NC}\n" "$SKIP"

if [[ $FAIL -gt 0 ]]; then
    printf '\nFailed samples:\n'
    for msg in "${FAIL_MSGS[@]}"; do
        printf "  ${RED}%s${NC}\n" "$msg"
    done
    printf '\n'
    exit 1
fi

printf "\n${GREEN}${BOLD}All samples compiled successfully.${NC}\n\n"
exit 0
