name: Smoke Test
on:
workflow_dispatch:
schedule:
- cron: "0 4 * * 1"
env:
CARGO_TERM_COLOR: always
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
debug-format-repo-scan:
name: Scan Repositories
runs-on: ubuntu-latest
timeout-minutes: 90
permissions:
contents: read
outputs:
failure_count: ${{ steps.summarize.outputs.failure_count }}
env:
TARGET_REPOS: |
aml4td/website
andrewheiss/ath-quarto
clauswilke/dataviz
csgillespie/efficientR
davidfoxcroft/lsj-book
ddotta/cookbook-rpolars
elong0527/r4csr
geocompx/geocompr
hadley/adv-r
hadley/ggplot2-book
hadley/mastering-shiny
hadley/r-in-production
hadley/r-pkgs
hadley/r4ds
jeroenjanssens/data-science-at-the-command-line
jgm/pandoc
jolars/jolars.co
juliasilge/tidy-text-mining
kevinheavey/modern-polars
math-ku/compstat
mattiasvillani/BayesianLearningBook
mlr-org/mlr3book
moderndive/ModernDive_book
nielsrhansen/CSwR
quarto-dev/quarto-web
r-causal/causal-inference-in-R
rdpeng/rprogdatascience
RohanAlexander/tswd
rstudio-education/hopr
rstudio/blogdown
rstudio/bookdown
rstudio/rmarkdown
rstudio/rmarkdown-book
rstudio/rmarkdown-cookbook
ThinkR-open/engineering-shiny-book
wch/rgcookbook
steps:
- uses: actions/checkout@v6
- name: Install Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: 1.94.1
- name: Cache Rust artifacts
uses: Swatinem/rust-cache@v2
- name: Build panache binary
run: cargo build --release --verbose
- name: Scan repositories for debug-format regressions
id: scan
shell: bash
run: |
set -euo pipefail
PANACHE_BIN="$GITHUB_WORKSPACE/target/release/panache"
PANACHE_SHA="$(git rev-parse HEAD)"
PANACHE_VERSION="$("$PANACHE_BIN" --version | head -n 1 || true)"
RESULTS_DIR="$RUNNER_TEMP/panache-debug-format-scan"
REPOS_DIR="$RESULTS_DIR/repos"
LOGS_DIR="$RESULTS_DIR/logs"
mkdir -p "$REPOS_DIR" "$LOGS_DIR"
FAILURES_TSV="$RESULTS_DIR/failures.tsv"
{
printf 'repo\tfailure_type\tfile\tlog_path\treport_path\trepo_sha\tpanache_sha\tpanache_version\tidempotency_input_path\tidempotency_once_path\tidempotency_twice_path\n'
} > "$FAILURES_TSV"
while IFS= read -r repo; do
if [ -z "$repo" ]; then
continue
fi
repo_dir="$REPOS_DIR/${repo//\//__}"
git clone --depth 1 "https://github.com/${repo}.git" "$repo_dir"
repo_sha="$(git -C "$repo_dir" rev-parse HEAD)"
while IFS= read -r -d '' rel_file; do
file_key="$(printf '%s' "${repo}:${rel_file}" | sha256sum | awk '{print $1}')"
log_path="$LOGS_DIR/$file_key.log"
report_path="$LOGS_DIR/$file_key.report.md"
pass_dir="$LOGS_DIR/$file_key.passes"
if ! (cd "$repo_dir" && "$PANACHE_BIN" debug format --checks all --dump-dir "$pass_dir" --dump-passes "$rel_file") >"$log_path" 2>&1; then
# Generate human-friendly markdown report for issue snippets.
(cd "$repo_dir" && "$PANACHE_BIN" debug format --checks all --report "$rel_file") >"$report_path" 2>&1 || true
matched=0
failure_types=""
idempotency_input_rel=""
idempotency_once_rel=""
idempotency_twice_rel=""
safe_rel_file="$(printf '%s' "$rel_file" | sed 's/[^[:alnum:]._-]/_/g')"
if [ -f "$pass_dir/$safe_rel_file.idempotency.input.txt" ]; then
idempotency_input_rel="logs/$file_key.passes/$safe_rel_file.idempotency.input.txt"
fi
if [ -f "$pass_dir/$safe_rel_file.idempotency.once.txt" ]; then
idempotency_once_rel="logs/$file_key.passes/$safe_rel_file.idempotency.once.txt"
fi
if [ -f "$pass_dir/$safe_rel_file.idempotency.twice.txt" ]; then
idempotency_twice_rel="logs/$file_key.passes/$safe_rel_file.idempotency.twice.txt"
fi
add_failure_type() {
local candidate="$1"
if ! grep -Fxq "$candidate" <<< "$failure_types"; then
if [ -z "$failure_types" ]; then
failure_types="$candidate"
else
failure_types+=$'\n'"$candidate"
fi
fi
}
if grep -Eiq 'idempot' "$log_path" "$report_path" 2>/dev/null; then
add_failure_type "idempotency"
fi
if grep -Eiq 'lossless' "$log_path" "$report_path" 2>/dev/null; then
add_failure_type "losslessness"
fi
if [ -f "$report_path" ]; then
while IFS= read -r parsed_type; do
[ -n "$parsed_type" ] && add_failure_type "$parsed_type"
done < <(grep -Eo '\((idempotency|losslessness)\)' "$report_path" | tr -d '()' | sort -u)
fi
if [ -n "$failure_types" ]; then
matched=1
while IFS= read -r failure_type; do
[ -z "$failure_type" ] && continue
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
"$repo" \
"$failure_type" \
"$rel_file" \
"logs/$file_key.log" \
"logs/$file_key.report.md" \
"$repo_sha" \
"$PANACHE_SHA" \
"$PANACHE_VERSION" \
"$idempotency_input_rel" \
"$idempotency_once_rel" \
"$idempotency_twice_rel" \
>> "$FAILURES_TSV"
done <<< "$failure_types"
fi
if [ "$matched" -eq 0 ]; then
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
"$repo" \
"unknown" \
"$rel_file" \
"logs/$file_key.log" \
"logs/$file_key.report.md" \
"$repo_sha" \
"$PANACHE_SHA" \
"$PANACHE_VERSION" \
"$idempotency_input_rel" \
"$idempotency_once_rel" \
"$idempotency_twice_rel" \
>> "$FAILURES_TSV"
fi
fi
done < <(git -C "$repo_dir" ls-files -z -- '*.md' '*.qmd' '*.Rmd')
done <<< "$TARGET_REPOS"
- name: Summarize scan results
id: summarize
shell: bash
run: |
set -euo pipefail
RESULTS_DIR="$RUNNER_TEMP/panache-debug-format-scan"
FAILURES_TSV="$RESULTS_DIR/failures.tsv"
failure_count="$(($(wc -l < "$FAILURES_TSV") - 1))"
if [ "$failure_count" -lt 0 ]; then
failure_count=0
fi
echo "failure_count=$failure_count" >> "$GITHUB_OUTPUT"
echo "### Panache debug format repo scan" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "- Failure records: $failure_count" >> "$GITHUB_STEP_SUMMARY"
echo "- Scanned extensions: \`.md\`, \`.qmd\`, \`.Rmd\`" >> "$GITHUB_STEP_SUMMARY"
- name: Upload scan results artifact
uses: actions/upload-artifact@v7
with:
name: debug-format-repo-scan-results
path: ${{ runner.temp }}/panache-debug-format-scan
if-no-files-found: error
debug-format-repo-scan-issues:
name: Create Regression Issues
needs: debug-format-repo-scan
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Download scan results artifact
uses: actions/download-artifact@v8
with:
name: debug-format-repo-scan-results
path: ${{ runner.temp }}/panache-debug-format-scan
- name: Create or update regression issues
if: needs.debug-format-repo-scan.outputs.failure_count != '0'
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
set -euo pipefail
RESULTS_DIR="$RUNNER_TEMP/panache-debug-format-scan"
FAILURES_TSV="$RESULTS_DIR/failures.tsv"
RUN_URL="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
existing_labels="$(gh label list --repo "$GITHUB_REPOSITORY" --limit 500 --json name --jq '.[].name')"
has_label() {
local target="$1"
grep -Fxq "$target" <<< "$existing_labels"
}
ensure_label() {
local name="$1"
local color="$2"
local description="$3"
if has_label "$name"; then
return 0
fi
if gh label create "$name" --repo "$GITHUB_REPOSITORY" --color "$color" --description "$description" >/dev/null 2>&1; then
existing_labels+=$'\n'"$name"
else
echo "warning: could not create label '$name'; issues will be created without that label"
fi
}
tail -n +2 "$FAILURES_TSV" | awk -F '\t' '{print $1 "\t" $2}' | sort -u | while IFS=$'\t' read -r repo failure_type; do
if [ -z "$repo" ] || [ -z "$failure_type" ]; then
continue
fi
marker="panache-debug-format-key:repo=${repo};type=${failure_type}"
title="CI: debug-format regression in ${repo} (${failure_type})"
count="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {c++} END {print c+0}' "$FAILURES_TSV")"
samples="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {printf "- `%s`\n", $3; c++; if (c == 10) exit}' "$FAILURES_TSV")"
sample_file="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $3; exit}' "$FAILURES_TSV")"
sample_log_rel="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $4; exit}' "$FAILURES_TSV")"
sample_report_rel="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $5; exit}' "$FAILURES_TSV")"
sample_repo_sha="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $6; exit}' "$FAILURES_TSV")"
sample_panache_sha="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $7; exit}' "$FAILURES_TSV")"
sample_panache_version="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $8; exit}' "$FAILURES_TSV")"
sample_idempotency_input_rel="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $9; exit}' "$FAILURES_TSV")"
sample_idempotency_once_rel="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $10; exit}' "$FAILURES_TSV")"
sample_idempotency_twice_rel="$(awk -F '\t' -v r="$repo" -v t="$failure_type" '$1 == r && $2 == t {print $11; exit}' "$FAILURES_TSV")"
if [ -n "$sample_report_rel" ]; then
sample_report_path="$RESULTS_DIR/$sample_report_rel"
else
sample_report_path=""
fi
report_excerpt=""
sample_diff_line=""
if [ -n "$sample_report_path" ] && [ -f "$sample_report_path" ]; then
report_excerpt="$(sed -n '1,120p' "$sample_report_path")"
sample_diff_line="$(grep -Eo 'Approx\. diff start line: [0-9]+' "$sample_report_path" | head -n 1 | awk '{print $NF}' || true)"
fi
ISSUE_TITLE="$title"
export ISSUE_TITLE
issue_number="$(gh issue list --repo "$GITHUB_REPOSITORY" --state open --limit 200 --json number,title --jq '.[] | select(.title == env.ISSUE_TITLE) | .number' | head -n 1)"
if [ -n "$issue_number" ]; then
comment_file="$RUNNER_TEMP/panache-issue-comment.md"
{
echo "Regression still reproduces for \`$repo\` (\`$failure_type\`)."
echo ""
echo "- New failure records: $count"
echo "- Workflow run: $RUN_URL"
echo "- Panache command: \`panache debug format --checks all --report <FILE>\`"
if [ -n "$sample_repo_sha" ]; then
echo "- Target repository commit: \`$sample_repo_sha\`"
fi
if [ -n "$sample_panache_version" ]; then
echo "- Panache version used in scan: \`$sample_panache_version\`"
fi
if [ -n "$sample_panache_sha" ]; then
echo "- Panache commit used in scan: \`$sample_panache_sha\`"
fi
if [ -n "$sample_log_rel" ]; then
echo "- Sample log path: \`$sample_log_rel\`"
fi
if [ -n "$sample_report_rel" ]; then
echo "- Sample report path: \`$sample_report_rel\`"
fi
if [ -n "$sample_idempotency_input_rel" ]; then
echo "- Sample idempotency input artifact: \`$sample_idempotency_input_rel\`"
fi
if [ -n "$sample_idempotency_once_rel" ]; then
echo "- Sample idempotency pass1 artifact: \`$sample_idempotency_once_rel\`"
fi
if [ -n "$sample_idempotency_twice_rel" ]; then
echo "- Sample idempotency pass2 artifact: \`$sample_idempotency_twice_rel\`"
fi
if [ -n "$sample_diff_line" ]; then
echo "- Approx. diff start line: $sample_diff_line"
fi
echo ""
echo "Sample files:"
echo "$samples"
if [ -n "$sample_file" ] && [ -n "$sample_repo_sha" ]; then
echo ""
echo "Reproduce locally:"
echo ""
echo '```bash'
echo "git clone https://github.com/$repo.git"
echo "cd $(basename "$repo")"
echo "git checkout $sample_repo_sha"
echo "panache debug format --checks all --report \"$sample_file\""
echo '```'
fi
if [ -n "$report_excerpt" ]; then
echo ""
echo "<details>"
echo "<summary>Sample report excerpt</summary>"
echo ""
echo '````text'
printf '%s\n' "$report_excerpt"
echo '````'
echo "</details>"
fi
} > "$comment_file"
gh issue comment "$issue_number" --repo "$GITHUB_REPOSITORY" --body-file "$comment_file"
else
body_file="$RUNNER_TEMP/panache-issue-body.md"
{
echo "<!-- $marker -->"
echo "# Debug-format regression detected"
echo ""
echo "- Target repository: \`$repo\`"
echo "- Failure type: \`$failure_type\`"
echo "- Failure records in this run: $count"
echo "- Workflow run: $RUN_URL"
echo "- Panache command: \`panache debug format --checks all --report <FILE>\`"
if [ -n "$sample_repo_sha" ]; then
echo "- Target repository commit: \`$sample_repo_sha\`"
fi
if [ -n "$sample_panache_version" ]; then
echo "- Panache version used in scan: \`$sample_panache_version\`"
fi
if [ -n "$sample_panache_sha" ]; then
echo "- Panache commit used in scan: \`$sample_panache_sha\`"
fi
if [ -n "$sample_file" ]; then
echo "- Sample file: \`$sample_file\`"
fi
if [ -n "$sample_log_rel" ]; then
echo "- Sample log path: \`$sample_log_rel\`"
fi
if [ -n "$sample_report_rel" ]; then
echo "- Sample report path: \`$sample_report_rel\`"
fi
if [ -n "$sample_idempotency_input_rel" ]; then
echo "- Sample idempotency input artifact: \`$sample_idempotency_input_rel\`"
fi
if [ -n "$sample_idempotency_once_rel" ]; then
echo "- Sample idempotency pass1 artifact: \`$sample_idempotency_once_rel\`"
fi
if [ -n "$sample_idempotency_twice_rel" ]; then
echo "- Sample idempotency pass2 artifact: \`$sample_idempotency_twice_rel\`"
fi
if [ -n "$sample_diff_line" ]; then
echo "- Approx. diff start line: $sample_diff_line"
fi
echo ""
echo "Sample files:"
echo "$samples"
if [ -n "$sample_file" ] && [ -n "$sample_repo_sha" ]; then
echo ""
echo "Reproduce locally:"
echo ""
echo '```bash'
echo "git clone https://github.com/$repo.git"
echo "cd $(basename "$repo")"
echo "git checkout $sample_repo_sha"
echo "panache debug format --checks all --report \"$sample_file\""
echo '```'
fi
if [ -n "$report_excerpt" ]; then
echo ""
echo "<details>"
echo "<summary>Sample report excerpt</summary>"
echo ""
echo '````text'
printf '%s\n' "$report_excerpt"
echo '````'
echo "</details>"
fi
echo ""
echo "See artifact \`debug-format-repo-scan-results\` for full logs."
} > "$body_file"
ensure_label "ci" "0366D6" "Continuous integration"
ensure_label "debug-format" "5319E7" "Panache debug format scan regressions"
case "$failure_type" in
idempotency) failure_color="B60205" ;;
losslessness) failure_color="D93F0B" ;;
unknown) failure_color="6E7781" ;;
*) failure_color="6E7781" ;;
esac
ensure_label "$failure_type" "$failure_color" "Regression category from debug format scan"
label_args=()
for label in "ci" "debug-format" "$failure_type"; do
if has_label "$label"; then
label_args+=(--label "$label")
fi
done
gh issue create \
--repo "$GITHUB_REPOSITORY" \
--title "$title" \
"${label_args[@]}" \
--body-file "$body_file"
fi
done
- name: Comment on issues that no longer reproduce
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
set -euo pipefail
RESULTS_DIR="$RUNNER_TEMP/panache-debug-format-scan"
FAILURES_TSV="$RESULTS_DIR/failures.tsv"
RUN_URL="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
# Build the set of (repo, failure_type) pairs that failed in this run.
current_keys="$RUNNER_TEMP/panache-current-keys.txt"
: > "$current_keys"
if [ -f "$FAILURES_TSV" ]; then
tail -n +2 "$FAILURES_TSV" \
| awk -F '\t' 'NF >= 2 && $1 != "" && $2 != "" {printf "panache-debug-format-key:repo=%s;type=%s\n", $1, $2}' \
| sort -u > "$current_keys"
fi
# List open issues that carry our marker, then check each one.
open_issues_json="$(gh issue list \
--repo "$GITHUB_REPOSITORY" \
--state open \
--search 'in:body "panache-debug-format-key:"' \
--limit 500 \
--json number,body)"
echo "$open_issues_json" | jq -c '.[]' | while IFS= read -r issue; do
number="$(jq -r '.number' <<< "$issue")"
body="$(jq -r '.body' <<< "$issue")"
marker="$(grep -Eo 'panache-debug-format-key:repo=[^;]+;type=[^[:space:]<>-]+' <<< "$body" | head -n 1 || true)"
if [ -z "$marker" ]; then
continue
fi
if grep -Fxq "$marker" "$current_keys"; then
# Still reproducing — handled by the create/update step above.
continue
fi
# Avoid spamming: skip if the latest comment already reports a green run.
latest_comment="$(gh issue view "$number" --repo "$GITHUB_REPOSITORY" --json comments \
--jq '.comments | last | .body // ""')"
if grep -Fq 'No longer reproducing' <<< "$latest_comment"; then
continue
fi
comment_file="$RUNNER_TEMP/panache-issue-green-comment.md"
{
echo "No longer reproducing in the latest scan."
echo ""
echo "- Marker: \`$marker\`"
echo "- Workflow run: $RUN_URL"
echo ""
echo "If this stays green over the next few weekly scans, this issue can be closed."
} > "$comment_file"
gh issue comment "$number" --repo "$GITHUB_REPOSITORY" --body-file "$comment_file"
done
- name: Fail workflow when regressions are detected
if: needs.debug-format-repo-scan.outputs.failure_count != '0'
shell: bash
run: |
echo "Debug format regressions detected: ${{ needs.debug-format-repo-scan.outputs.failure_count }}"
exit 1