name: Action E2E Test
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CARGO_TERM_COLOR: always
jobs:
action-e2e:
name: Action E2E (${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
permissions:
security-events: write
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Build AgentShield
run: cargo build --release
- name: Install binary
run: |
mkdir -p ${{ runner.temp }}/agentshield
cp target/release/agentshield ${{ runner.temp }}/agentshield/
echo "${{ runner.temp }}/agentshield" >> $GITHUB_PATH
- name: "Test 1: Scan safe_calculator (expect pass)"
id: scan-safe
shell: bash
run: |
set +e
agentshield scan tests/fixtures/mcp_servers/safe_calculator --format sarif --output ${{ runner.temp }}/safe.sarif --fail-on high
EXIT_CODE=$?
set -e
echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
if [ "$EXIT_CODE" -ne 0 ]; then
echo "::error::safe_calculator should produce exit code 0, got $EXIT_CODE"
exit 1
fi
echo "PASS: safe_calculator scanned clean (exit 0)"
- name: "Test 1: Verify SARIF output"
run: |
SARIF="${{ runner.temp }}/safe.sarif"
if [ ! -f "$SARIF" ]; then
echo "::error::SARIF file not created"
exit 1
fi
# Validate JSON structure and no high/critical findings
python3 -c "
import json, sys
with open('$SARIF') as f:
data = json.load(f)
assert 'runs' in data, 'Missing runs key'
results = data['runs'][0].get('results', [])
# May have low/medium supply chain findings, but no high/critical
high_crit = [r for r in results if r.get('level') in ('error',)]
assert len(high_crit) == 0, f'Expected 0 high/critical, got {len(high_crit)}'
print(f'PASS: SARIF valid, {len(results)} results (0 high/critical)')
"
- name: "Test 2: Scan vuln_cmd_inject (expect fail)"
id: scan-vuln
shell: bash
run: |
set +e
agentshield scan tests/fixtures/mcp_servers/vuln_cmd_inject --format sarif --output ${{ runner.temp }}/vuln.sarif --fail-on high
EXIT_CODE=$?
set -e
echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
if [ "$EXIT_CODE" -ne 1 ]; then
echo "::error::vuln_cmd_inject should produce exit code 1 (findings), got $EXIT_CODE"
exit 1
fi
echo "PASS: vuln_cmd_inject correctly detected (exit 1)"
- name: "Test 2: Verify findings in SARIF"
run: |
python3 -c "
import json, sys
with open('${{ runner.temp }}/vuln.sarif') as f:
data = json.load(f)
results = data['runs'][0].get('results', [])
assert len(results) > 0, 'Expected findings but got 0'
# Check for SHIELD-001 (command injection)
rule_ids = [r['ruleId'] for r in results]
assert 'SHIELD-001' in rule_ids, f'Expected SHIELD-001, got {rule_ids}'
print(f'PASS: {len(results)} findings, includes SHIELD-001')
"
- name: "Test 3: Scan vuln_ssrf (expect fail)"
shell: bash
run: |
set +e
agentshield scan tests/fixtures/mcp_servers/vuln_ssrf --format json --output ${{ runner.temp }}/ssrf.json --fail-on high
EXIT_CODE=$?
set -e
if [ "$EXIT_CODE" -ne 1 ]; then
echo "::error::vuln_ssrf should produce exit code 1, got $EXIT_CODE"
exit 1
fi
python3 -c "
import json
with open('${{ runner.temp }}/ssrf.json') as f:
data = json.load(f)
findings = data.get('findings', [])
rule_ids = [f['rule_id'] for f in findings]
assert 'SHIELD-003' in rule_ids, f'Expected SHIELD-003, got {rule_ids}'
print(f'PASS: {len(findings)} findings, includes SHIELD-003')
"
- name: "Test 4: Scan vuln_cred_exfil (expect fail)"
shell: bash
run: |
set +e
agentshield scan tests/fixtures/mcp_servers/vuln_cred_exfil --format json --output ${{ runner.temp }}/cred.json --fail-on high
EXIT_CODE=$?
set -e
if [ "$EXIT_CODE" -ne 1 ]; then
echo "::error::vuln_cred_exfil should produce exit code 1, got $EXIT_CODE"
exit 1
fi
python3 -c "
import json
with open('${{ runner.temp }}/cred.json') as f:
data = json.load(f)
findings = data.get('findings', [])
rule_ids = [f['rule_id'] for f in findings]
assert 'SHIELD-002' in rule_ids, f'Expected SHIELD-002, got {rule_ids}'
print(f'PASS: {len(findings)} findings, includes SHIELD-002')
"
- name: "Test 5: Console output format"
shell: bash
run: |
OUTPUT=$(agentshield scan tests/fixtures/mcp_servers/safe_calculator --format console 2>&1)
if echo "$OUTPUT" | grep -q "findings\|No issues"; then
echo "PASS: Console output contains expected text"
else
echo "::warning::Console output may not contain expected text"
fi
- name: "Test 6: HTML output format"
shell: bash
run: |
agentshield scan tests/fixtures/mcp_servers/vuln_cmd_inject --format html --output ${{ runner.temp }}/report.html || true
if [ -f "${{ runner.temp }}/report.html" ]; then
SIZE=$(wc -c < "${{ runner.temp }}/report.html" | tr -d ' ')
if [ "$SIZE" -gt 100 ]; then
echo "PASS: HTML report generated ($SIZE bytes)"
else
echo "::error::HTML report too small ($SIZE bytes)"
exit 1
fi
else
echo "::error::HTML report not created"
exit 1
fi
- name: "Test 7: list-rules shows all 18 detectors"
shell: bash
run: |
COUNT=$(agentshield list-rules 2>&1 | grep -c "SHIELD-0")
if [ "$COUNT" -ge 18 ]; then
echo "PASS: list-rules shows $COUNT detectors (>= 18)"
else
echo "::error::Expected >= 18 detectors, got $COUNT"
exit 1
fi
- name: "Test 8a: Unfiltered subdirectory scan sees ancestor metadata"
shell: bash
run: |
set +e
agentshield scan tests/fixtures/action_subdir_filters/src/mcp --format json --output ${{ runner.temp }}/subdir-unfiltered.json --fail-on low
UNFILTERED_EXIT=$?
set -e
if [ "$UNFILTERED_EXIT" -ne 1 ]; then
echo "::error::unfiltered subdirectory scan should fail on ancestor package.json findings, got $UNFILTERED_EXIT"
exit 1
fi
python3 -c "
from pathlib import Path
unfiltered = Path('${{ runner.temp }}/subdir-unfiltered.json').read_text()
assert 'package.json' in unfiltered, 'unfiltered scan should read ancestor package metadata'
assert 'event-stream' in unfiltered, 'unfiltered scan should report dependency metadata'
print('PASS: unfiltered subdirectory scan read ancestor dependency metadata')
"
- name: "Test 8b: Composite action honors root config path filters"
id: subdir-filtered-action
uses: ./
with:
binary-path: ${{ runner.temp }}/agentshield/agentshield
path: tests/fixtures/action_subdir_filters/src/mcp
config: tests/fixtures/action_subdir_filters/.agentshield.toml
format: sarif
fail-on: low
upload-sarif: false
- name: "Test 8c: Verify composite action filtered SARIF"
shell: bash
run: |
if [ "${{ steps.subdir-filtered-action.outputs.exit-code }}" != "0" ]; then
echo "::error::filtered composite action scan should pass, got ${{ steps.subdir-filtered-action.outputs.exit-code }}"
exit 1
fi
if [ "${{ steps.subdir-filtered-action.outputs.finding-count }}" != "0" ]; then
echo "::error::filtered composite action scan should have zero findings, got ${{ steps.subdir-filtered-action.outputs.finding-count }}"
exit 1
fi
python3 -c "
import json
from pathlib import Path
sarif = Path('${{ steps.subdir-filtered-action.outputs.sarif-file }}').read_text()
assert 'package.json' not in sarif, 'filtered SARIF must not mention excluded package.json'
assert 'event-stream' not in sarif, 'filtered SARIF must not mention excluded dependency'
data = json.loads(sarif)
results = data.get('runs', [{}])[0].get('results', [])
assert results == [], f'expected no filtered SARIF results, got {results}'
print('PASS: composite action used root config and suppressed excluded metadata findings')
"
- name: Upload SARIF to Code Scanning
if: always() && matrix.os == 'ubuntu-latest'
uses: github/codeql-action/upload-sarif@v4
with:
sarif_file: ${{ runner.temp }}/vuln.sarif
category: agentshield-e2e
continue-on-error: true
- name: Summary
if: always()
run: |
echo "## AgentShield Action E2E Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Test | Description | Status |" >> $GITHUB_STEP_SUMMARY
echo "|------|-------------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| 1 | Safe server → exit 0 | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 2 | Vuln server → exit 1 + SHIELD-001 | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 3 | SSRF detection → SHIELD-003 | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 4 | Cred exfil → SHIELD-002 | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 5 | Console output format | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 6 | HTML output format | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 7 | list-rules (18 detectors) | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 8 | Subdirectory path filters | Passed |" >> $GITHUB_STEP_SUMMARY
echo "| 9 | SARIF upload to Code Scanning | Attempted |" >> $GITHUB_STEP_SUMMARY