name: Live tests
on:
schedule:
- cron: "0 9 * * *" workflow_dispatch:
inputs:
scope:
description: "Which live tests to run"
type: choice
options: [full, smoke]
default: full
permissions:
contents: read
concurrency:
group: live-tests
cancel-in-progress: false
jobs:
live:
name: Live tests (real claude)
runs-on: ubuntu-latest
if: ${{ github.repository == 'joshrotenberg/roba' }}
timeout-minutes: 20
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
steps:
- uses: actions/checkout@v4
- name: Guard -- API key present
run: |
if [ -z "${ANTHROPIC_API_KEY}" ]; then
echo "::error::ANTHROPIC_API_KEY secret is not set. Add it under" \
"Settings -> Secrets and variables -> Actions before this workflow can run."
exit 1
fi
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Install claude CLI
run: |
curl -fsSL https://claude.ai/install.sh | bash
# GitHub Actions runs each step in a fresh non-login shell, so the
# installer's PATH edit to the shell profile doesn't carry over.
# Add the known install locations explicitly. Use `if` (not
# `[ -d ] && echo`): under `bash -e`, a false `[ -d ]` on a
# non-existent dir returns non-zero and fails the whole step.
for d in "$HOME/.local/bin" "$HOME/.claude/bin"; do
if [ -d "$d" ]; then echo "$d" >> "$GITHUB_PATH"; fi
done
- name: Preflight -- claude reachable + auth works
run: |
claude --version
# Minimal headless call: fails fast and clearly if the API key or
# a first-run onboarding step blocks claude, instead of a confusing
# hang deep in the test suite.
timeout 90 claude -p "reply with the single word: ok" \
--model haiku --output-format json | head -c 600
echo
echo "preflight ok"
- name: Run live tests
run: |
# --test-threads=2: real API calls; keep concurrency low to avoid
# rate-limit flakes (a 4-way run showed intermittent failures).
if [ "${{ github.event.inputs.scope }}" = "smoke" ]; then
echo "Running smoke subset"
cargo test --test live --all-features -- --ignored --test-threads=2 \
live_smoke live_session_continue live_output_json
else
echo "Running full live suite"
cargo test --test live --all-features -- --ignored --test-threads=2
fi