lihaaf 0.1.2 - Docs.rs

name: Secrets scan (public text)

# Public-text guard: scans the body of newly opened / edited issues,
# pull requests, and comments for sensitive-info patterns and posts a
# single warning comment on a match — naming only the pattern
# CATEGORY, never the matched value.
#
# Sibling of the local pre-commit guard at scripts/scan-secrets.sh.
# See SECURITY.md for the pattern set, allow-list convention, and
# threat model.
#
# Design notes (addressing Gemini-3-Pro round-1 findings against the
# original Spec F draft):
#
#  - `pull_request_target` (not `pull_request`) is used so the workflow
#    runs in the context of the upstream repo and gets a token with
#    write scope. Fork-PR `pull_request` runs get a read-only token
#    and cannot post comments. (Round-1 finding #2.)
#
#  - We DO NOT check out the fork PR's head code. The default checkout
#    for `pull_request_target` is the base ref, which is what we want:
#    only the event payload's text fields are scanned, and the
#    workflow never executes untrusted code. The Python scanner lives
#    in `.github/scripts/`, which is part of the base-ref tree.
#
#  - The scanner step writes findings to $GITHUB_OUTPUT in heredoc
#    form. `print()` does not set step outputs by itself. (Round-1
#    finding #1.)
#
#  - The github-script step is a complete `github.rest.issues.createComment`
#    payload that branches on `github.event_name` to pick the right
#    issue/PR number. (Round-1 finding #3.)
#
#  - The Python scanner's allow-list checks the WHOLE LINE for
#    placeholder syntax rather than the regex match span, fixing the
#    `m.group(0)` mistake. (Round-1 finding #4.)

on:
  issues:
    types: [opened, edited]
  issue_comment:
    types: [created, edited]
  pull_request_target:
    types: [opened, edited, reopened, synchronize]
  pull_request_review_comment:
    types: [created, edited]

permissions:
  issues: write
  pull-requests: write
  contents: read

# Concurrency: one scan per (event-target) at a time so a fast edit
# storm collapses into the final state rather than racing comments.
concurrency:
  group: secrets-scan-${{ github.event_name }}-${{ github.event.issue.number || github.event.pull_request.number }}
  cancel-in-progress: true

jobs:
  scan:
    name: Scan public text for sensitive-info patterns
    runs-on: ubuntu-latest
    steps:
      - name: Checkout base ref
        # For `pull_request_target` this checks out the BASE branch
        # (not the fork PR head). For issue/comment events the
        # default-branch checkout is fine. Either way we are running
        # the scanner from a trusted tree.
        uses: actions/checkout@v6
        with:
          persist-credentials: false

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.x"

      - name: Scan event text for sensitive-info patterns
        id: scan
        env:
          # The scanner reads TEXT_SCAN_TITLE + TEXT_SCAN_BODY. Each
          # event type maps to a different pair of fields. Unmapped
          # fields are empty strings.
          TEXT_SCAN_TITLE: |
            ${{ github.event.issue.title }}${{ github.event.pull_request.title }}
          TEXT_SCAN_BODY: |
            ${{ github.event.issue.body }}${{ github.event.pull_request.body }}${{ github.event.comment.body }}
        run: |
          set +e
          python3 .github/scripts/scan-text-for-secrets.py
          rc=$?
          set -e
          # Exit 0 from THIS step regardless: the github-script step
          # branches on steps.scan.outputs.findings to decide whether
          # to post a comment. Failing this step would prevent the
          # follow-up comment from running.
          echo "rc=$rc"
          exit 0

      - name: Post warning comment on findings
        if: steps.scan.outputs.findings != ''
        uses: actions/github-script@v8
        env:
          FINDINGS: ${{ steps.scan.outputs.findings }}
        with:
          script: |
            const event = context.eventName;
            const findings = process.env.FINDINGS || '';

            // Parse "CATEGORY: <name>" lines from the scanner output.
            // Anything that does not start with "CATEGORY: " is
            // dropped defensively; we never echo arbitrary text.
            const categories = findings
              .split('\n')
              .map(s => s.trim())
              .filter(s => s.startsWith('CATEGORY: '))
              .map(s => s.slice('CATEGORY: '.length))
              .filter(s => /^[a-z_]+$/.test(s));

            if (categories.length === 0) {
              core.info('No valid categories parsed; skipping comment.');
              return;
            }

            const kind = (() => {
              switch (event) {
                case 'issues': return 'issue';
                case 'issue_comment': return 'comment';
                case 'pull_request_target': return 'pull request';
                case 'pull_request_review_comment': return 'review comment';
                default: return 'submission';
              }
            })();

            const issueNumber =
              context.payload.issue?.number ||
              context.payload.pull_request?.number;

            if (!issueNumber) {
              core.warning(`No issue/PR number on event ${event}; cannot post comment.`);
              return;
            }

            const bullets = categories.map(c => `- \`${c}\``).join('\n');
            const body = [
              ':rotating_light: **Sensitive-info patterns detected**',
              '',
              `The text of this ${kind} appears to match patterns from the lihaaf sensitive-info guard:`,
              '',
              bullets,
              '',
              'Please redact and edit. The local pre-commit hook',
              '(`scripts/install-pre-commit-hook.sh`) catches these before push.',
              'See `SECURITY.md` for the pattern set, the `<word>` placeholder',
              'allow-list, and the bypass mechanism.',
            ].join('\n');

            // PRs are issues in the GitHub REST API for the
            // "conversation" comment thread. We post on the PR
            // conversation rather than a per-line review comment so
            // the warning is visible regardless of which surface
            // triggered the scan.
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: issueNumber,
              body,
            });
            core.info(`Posted sensitive-info warning on #${issueNumber} (${kind}).`);