#!/usr/bin/env bash
# fetch.sh — populate tests/vectors/ with certificate test vectors
#
# Sparse-clones three external repositories and downloads two CA certificate
# datasets into subdirectories of tests/vectors/.  Already-present directories
# and files are skipped (idempotent).
#
# Directory names for the git-cloned sources match the dir_name field in
# tests/test_utils/repo.rs (RepoConfig::CRYPTOGRAPHY, LAMPS_ML_DSA,
# LAMPS_ML_KEM) so that the Rust test harness and benchmarks find the
# vectors without network access.
#
# Usage:
#   bash tests/vectors/fetch.sh          # fetch all sources
#   bash tests/vectors/fetch.sh --no-ca-roots   # skip CA root datasets
#
# Options:
#   --no-ca-roots   Skip the Mozilla NSS and CCADB CA certificate datasets
#
# Sources:
#   cryptography/            pyca/cryptography x509 test vectors       (git)
#   dilithium-certificates/  LAMPS WG ML-DSA (Dilithium) example certs (git)
#   kyber-certificates/      LAMPS WG ML-KEM (Kyber) example certs     (git)
#   mozilla-ca/              Mozilla NSS built-in root CA store         (curl)
#   ccadb/                   CCADB V4 all CA certificates               (curl)

# Require bash 4+
if [ "${BASH_VERSINFO:-0}" -lt 4 ]; then
    echo "error: bash 4 or later is required (you have ${BASH_VERSION:-unknown})" >&2
    exit 1
fi

set -euo pipefail

VECTORS_DIR="$(cd "$(dirname "$0")" && pwd)"
WORK_DIR="$(mktemp -d)"
trap 'rm -rf "$WORK_DIR"' EXIT

FETCH_CA_ROOTS=1

# ── Argument parsing ──────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
    case "$1" in
        --no-ca-roots) FETCH_CA_ROOTS=0; shift ;;
        *) echo "error: unknown option: $1" >&2; exit 1 ;;
    esac
done

# ── Helpers ───────────────────────────────────────────────────────────────────

clone_sparse() {
    local url="$1"
    local name="$2"
    local sparse_path="$3"
    local dest="$4"

    if [ -d "$dest" ]; then
        echo "  already present, skipping: $dest"
        return
    fi

    echo "  cloning $url (sparse: $sparse_path) ..."
    git -C "$WORK_DIR" clone --quiet --depth=1 --filter=blob:none --sparse "$url" "$name"
    git -C "$WORK_DIR/$name" sparse-checkout set "$sparse_path"

    # Move the cloned repo to the destination so callers find the full
    # directory structure (e.g. dest/vectors/cryptography_vectors/...).
    # This matches what tests/test_utils/repo.rs::setup_repository() produces
    # when it auto-clones on first use.
    mv "$WORK_DIR/$name" "$dest"
    echo "  done → $dest"
}

# fetch_file URL DEST
#   Downloads URL to DEST using curl.  Skips if DEST already exists.
#   Creates parent directories as needed.
fetch_file() {
    local url="$1"
    local dest="$2"

    if [ -f "$dest" ]; then
        echo "  already present, skipping: $(basename "$dest")"
        return
    fi

    mkdir -p "$(dirname "$dest")"
    echo "  downloading $(basename "$dest") ..."
    curl --silent --show-error --location --fail \
        --retry 3 --retry-delay 2 \
        --output "$dest" "$url"
    echo "  done → $dest"
}

# ── Git-cloned test vector repositories ──────────────────────────────────────

echo "=== Fetching certificate test vectors ==="
echo ""

echo "1. pyca/cryptography x509/pkcs7/pkcs12 vectors"
clone_sparse \
    "https://github.com/pyca/cryptography.git" \
    "cryptography" \
    "vectors" \
    "$VECTORS_DIR/cryptography"

echo ""
echo "2. LAMPS WG ML-DSA (dilithium-certificates)"
clone_sparse \
    "https://github.com/lamps-wg/dilithium-certificates.git" \
    "dilithium-certificates" \
    "examples" \
    "$VECTORS_DIR/dilithium-certificates"

echo ""
echo "3. LAMPS WG ML-KEM (kyber-certificates)"
clone_sparse \
    "https://github.com/lamps-wg/kyber-certificates.git" \
    "kyber-certificates" \
    "example" \
    "$VECTORS_DIR/kyber-certificates"

# ── CA root certificate datasets ──────────────────────────────────────────────

if [[ $FETCH_CA_ROOTS -eq 1 ]]; then
    echo ""
    echo "4. Mozilla NSS built-in root CA store (certdata.txt)"
    fetch_file \
        "https://hg-edge.mozilla.org/releases/mozilla-release/raw-file/default/security/nss/lib/ckfw/builtins/certdata.txt" \
        "$VECTORS_DIR/mozilla-ca/certdata.txt"

    echo ""
    echo "5. CCADB V4 all certificate records"
    fetch_file \
        "https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv4" \
        "$VECTORS_DIR/ccadb/AllCertificateRecordsCSVFormatv4.csv"

    echo ""
    echo "6. CCADB V4 PEM data (per decade: 1990 2000 2010 2020)"
    for decade in 1990 2000 2010 2020; do
        fetch_file \
            "https://ccadb.my.salesforce-sites.com/ccadb/AllCertificatePEMsCSVFormat?NotBeforeDecade=${decade}" \
            "$VECTORS_DIR/ccadb/AllCertificatePEMsCSVFormat_decade_${decade}.csv"
    done
fi

# ── Summary ───────────────────────────────────────────────────────────────────

echo ""
echo "=== Summary ==="

for dir in cryptography dilithium-certificates kyber-certificates; do
    target="$VECTORS_DIR/$dir"
    if [ -d "$target" ]; then
        count=$(find "$target" -type f \( -name "*.pem" -o -name "*.der" -o -name "*.crt" -o -name "*.pub" \) | wc -l)
        printf "  %-34s %d certificate file(s)\n" "$dir/" "$count"
    else
        printf "  %-34s (not fetched)\n" "$dir/"
    fi
done

if [[ $FETCH_CA_ROOTS -eq 1 ]]; then
    # Mozilla
    moz="$VECTORS_DIR/mozilla-ca/certdata.txt"
    if [ -f "$moz" ]; then
        size=$(wc -c < "$moz")
        printf "  %-34s %d bytes\n" "mozilla-ca/certdata.txt" "$size"
    else
        printf "  %-34s (not fetched)\n" "mozilla-ca/certdata.txt"
    fi

    # CCADB
    ccadb_dir="$VECTORS_DIR/ccadb"
    if [ -d "$ccadb_dir" ]; then
        ccadb_files=$(find "$ccadb_dir" -name "*.csv" | wc -l)
        printf "  %-34s %d CSV file(s)\n" "ccadb/" "$ccadb_files"
    else
        printf "  %-34s (not fetched)\n" "ccadb/"
    fi
fi
