thoughtjack 0.6.0

Adversarial agent security testing tool
Documentation
name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

permissions: read-all

env:
  CARGO_TERM_COLOR: always

jobs:
  fmt:
    name: Format
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7 # stable
        with:
          components: rustfmt
      - run: cargo fmt -- --check

  clippy:
    name: Clippy
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - uses: dtolnay/rust-toolchain@e814c742d4444ce2f3f6abddea7faf00161ed941 # 1.88
        with:
          components: clippy
      - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1
      - run: cargo clippy --tests -- -D warnings

  test:
    name: Test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - uses: dtolnay/rust-toolchain@e814c742d4444ce2f3f6abddea7faf00161ed941 # 1.88
      - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1
      - run: cargo test

  deny:
    name: Cargo Deny
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: EmbarkStudios/cargo-deny-action@3fd3802e88374d3fe9159b834c7714ec57d6c979 # v2
        with:
          manifest-path: Cargo.toml

  mcp-conformance:
    name: MCP Conformance
    runs-on: ubuntu-latest
    needs: [test]
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - uses: dtolnay/rust-toolchain@e814c742d4444ce2f3f6abddea7faf00161ed941 # 1.88
      - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1
      - name: Build release binary
        run: cargo build --release

      - name: Start server
        run: |
          ./target/release/thoughtjack run \
            tests/fixtures/conformance.yaml \
            --mcp-server 127.0.0.1:3001 \
            --max-session 5m &
          echo "SERVER_PID=$!" >> "$GITHUB_ENV"

      - name: Wait for server
        run: |
          for i in $(seq 1 20); do
            # POST with empty body — server responds with 400, proving it's up
            status=$(curl -s -o /dev/null -w '%{http_code}' \
              -X POST http://127.0.0.1:3001/message \
              --max-time 2 2>/dev/null) || true
            if [ -n "$status" ] && [ "$status" != "000" ]; then
              echo "Server ready after ${i}×500ms (HTTP $status)"
              exit 0
            fi
            sleep 0.5
          done
          echo "Server failed to start within 10s"
          exit 1

      - name: Run conformance tests
        uses: modelcontextprotocol/conformance@5284fd022e14f040902db75803496edf10daaa30 # v0.1.14
        with:
          mode: server
          url: http://localhost:3001/message
          expected-failures: conformance-baseline.yml
          # Action defaults to Node 20, but dist/index.js uses fs.globSync (Node 22+)
          node-version: "22"

      - name: Stop server
        if: always()
        run: kill "$SERVER_PID" 2>/dev/null || true

  e2e-self-test:
    name: "E2E Self-Test (${{ matrix.scenario }})"
    runs-on: ubuntu-latest
    needs: [test]
    strategy:
      fail-fast: false
      matrix:
        include:
          - scenario: mcp-client-basic
          - scenario: a2a-client-basic
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - uses: dtolnay/rust-toolchain@e814c742d4444ce2f3f6abddea7faf00161ed941 # 1.88
      - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1
      - name: Build release binary
        run: cargo build --release
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.12"
      - name: Install orchestrator dependencies
        run: pip install "pyyaml==6.0.3"
      - name: Run self-test
        run: |
          python tests/e2e/run_conformance.py \
            --scenario "tests/e2e/fixtures/${{ matrix.scenario }}" \
            --self-test \
            --tj-binary ./target/release/thoughtjack \
            --base-port 19000 \
            --timeout 15 \
            --output-dir tests/e2e/results
      - name: Upload results
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: "e2e-selftest-${{ matrix.scenario }}"
          path: tests/e2e/results/
          retention-days: 7

  coverage:
    name: Coverage
    runs-on: ubuntu-latest
    needs: [test]
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - uses: dtolnay/rust-toolchain@e814c742d4444ce2f3f6abddea7faf00161ed941 # 1.88
        with:
          components: llvm-tools-preview
      - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1
      - uses: taiki-e/install-action@06203676c62f0d3c765be3f2fcfbebbcb02d09f5 # v2.69.6
        with:
          tool: cargo-llvm-cov
      - name: Generate coverage
        run: cargo llvm-cov -p thoughtjack --lcov --output-path lcov.info
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@4481f553995cc5011b158ce191746ac1a1d0f815 # v5.5.3
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: thoughtgate/thoughtjack
          files: lcov.info
          fail_ci_if_error: false