ralph-workflow 0.7.18

# Ralph Agents Configuration (example)
# Copy to `.agent/agents.toml` to override built-in defaults.
#
# Fields you may set per agent:
# - cmd, output_flag, yolo_flag, verbose_flag, can_commit
# - json_parser: claude | codex | gemini | opencode | generic
# - model_flag, display_name, session_flag, print_flag, streaming_flag
#
# Optional chain configuration:
# - [agent_chains] reusable ordered agent lists
# - [agent_drains] built-in runtime drain bindings
# Legacy [agent_chain] input is still accepted for compatibility, but the
# named chain + drain schema is the canonical model.
# Claude Code - Anthropic's AI coding assistant
# https://claude.ai/code
[agents.claude]
cmd = "claude -p"
output_flag = "--output-format=stream-json"
yolo_flag = "--dangerously-skip-permissions"
verbose_flag = "--verbose"
can_commit = true
json_parser = "claude"
display_name = "Claude Code"
# Session continuation: --resume <session_id> (from `claude --help`)
# Allows XSD retries to continue the same conversation so AI retains memory
session_flag = "--resume {}"
# CCS (Claude Code Switch) - Universal AI profile manager
# https://github.com/kaitranntt/ccs
# CCS wraps Claude Code and provides multi-profile/multi-provider support.
# Use CCS aliases like "ccs/work", "ccs/personal", "ccs/gemini" for different profiles.
# Configure aliases in ~/.config/ralph-workflow.toml under [ccs_aliases].
# Note: CCS uses Claude's stream-json format since it wraps Claude Code.
[agents.ccs]
cmd = "ccs"
output_flag = "--output-format=stream-json"
yolo_flag = "--dangerously-skip-permissions"
verbose_flag = "--verbose"
can_commit = true
json_parser = "ccs"
display_name = "CCS (Claude Code Switch)"
print_flag = "--print"  # Use Claude's --print (CCS reserves -p/--prompt for delegation)
# Session continuation: --resume <session_id> (passed through to Claude CLI)
session_flag = "--resume {}"

# GLM-Direct - GLM via Claude CLI with CCS environment variables
# This agent bypasses the CCS wrapper to get proper stream-json output.
# It calls Claude CLI directly with GLM environment variables loaded from CCS settings.
# This works around CCS's headless delegation formatting that interferes with stream-json.
#
# Optional: If you have a GLM profile configured in CCS (run: `ccs api create --preset glm`),
# you can enable CCS env var loading by setting `ccs_profile = "glm"`.
[agents.glm-direct]
cmd = "claude"
output_flag = "--output-format=stream-json"
yolo_flag = "--dangerously-skip-permissions"
verbose_flag = "--verbose"
can_commit = true
json_parser = "claude"
print_flag = "--print"  # Print mode for non-interactive runs
display_name = "GLM (Direct via Claude)"
# Session continuation: --resume <session_id> (from `claude --help`)
session_flag = "--resume {}"

# OpenAI Codex CLI
# https://github.com/openai/codex
# Uses --full-auto for automatic sandboxed execution
[agents.codex]
cmd = "codex exec"
output_flag = "--json"
yolo_flag = "--full-auto"
verbose_flag = ""
can_commit = true
json_parser = "codex"
display_name = "OpenAI Codex CLI"

# Aider - AI pair programming in your terminal
# https://github.com/paul-gauthier/aider
# Uses --message for single-message mode, --yes-always for auto-confirm
[agents.aider]
cmd = "aider --message"
output_flag = ""
yolo_flag = "--yes-always"
verbose_flag = "--verbose"
can_commit = true
json_parser = "generic"
display_name = "Aider"

# Goose - Block's open-source AI agent
# https://github.com/block/goose
# Uses run command with -t for text input, --output-format for JSON streaming
# Note: yolo_flag intentionally empty - goose is designed as an interactive,
# session-based agent and doesn't support unattended auto-approval mode.
# The --no-session flag only skips session file storage, not tool approval prompts.
# For unattended automation, consider using other agents with explicit yolo support.
[agents.goose]
cmd = "goose run -t"
output_flag = "--output-format=stream-json"
yolo_flag = ""
verbose_flag = "--debug"
can_commit = true
json_parser = "generic"
display_name = "Goose (Block)"

# Cline - Autonomous coding agent
# https://github.com/cline/cline
# Uses -F json for JSON output, -y for autonomous mode
[agents.cline]
cmd = "cline -o"
output_flag = "-F json"
yolo_flag = "-y"
verbose_flag = "-v"
can_commit = true
json_parser = "generic"
display_name = "Cline"

# Continue.dev CLI (cn)
# https://docs.continue.dev/guides/cli
# Uses -p for headless mode, --format json for JSON output
[agents.continue]
cmd = "cn -p"
output_flag = "--format json"
yolo_flag = "--auto"
verbose_flag = "--verbose"
can_commit = true
json_parser = "generic"
display_name = "Continue.dev"

# Amazon Q Developer CLI
# https://docs.aws.amazon.com/amazonq/latest/qdeveloper-ug/command-line.html
# Uses --no-interactive for non-interactive mode
[agents.amazon-q]
cmd = "q chat --no-interactive"
output_flag = ""
yolo_flag = "--trust-all-tools"
verbose_flag = ""
can_commit = true
json_parser = "generic"
display_name = "Amazon Q Developer"

# Gemini CLI
# https://github.com/google-gemini/gemini-cli
# Positional argument for prompt, -o/--output-format for output format
# -y/--yolo for auto-approve all actions
[agents.gemini]
cmd = "gemini"
output_flag = "-o stream-json"
yolo_flag = "-y"
verbose_flag = "-d"
can_commit = true
json_parser = "gemini"
display_name = "Google Gemini CLI"

# =============================================================================
# LOWER-COST / OPEN-SOURCE AGENTS
# =============================================================================

# Qwen Code - Alibaba's open-source CLI coding agent (Qwen3-Coder)
# https://github.com/QwenLM/qwen-code
# Uses -p for non-interactive prompt mode, --output-format stream-json for JSON streaming
# --yolo for auto-approve all tool calls
# Note: Forked from Gemini CLI, output format compatible with Claude's stream-json
[agents.qwen]
cmd = "qwen -p"
output_flag = "--output-format stream-json"
yolo_flag = "--yolo"
verbose_flag = "--debug"
can_commit = true
json_parser = "claude"
display_name = "Qwen Code"

# Mistral Vibe - Mistral AI's CLI coding assistant (Devstral)
# https://github.com/mistralai/mistral-vibe
# https://docs.mistral.ai/mistral-vibe/introduction
# Uses --prompt <text> for non-interactive mode, --auto-approve for autonomous execution
# Note: No JSON streaming output format, uses generic line-based parsing
# Note: If you meant Mistral's API/SDK tooling (not mistral-vibe), add a separate agent.
[agents.vibe]
cmd = "vibe --prompt"
output_flag = ""
yolo_flag = "--auto-approve"
verbose_flag = ""
can_commit = true
json_parser = "generic"
display_name = "Mistral Vibe"

# llama.cpp CLI - Local LLM inference with llama-cli
# https://github.com/ggml-org/llama.cpp
# Uses -m for local model path, -cnv for conversation mode
# Note: Requires local model setup, no native JSON streaming, uses generic parsing.
# Not included in the default agent_chain because local models can be heavy to install/run.
# CHAT-ONLY AGENT: yolo_flag is intentionally empty. This is a simple chat tool without
# agentic tool use capabilities. It doesn't have autonomous mode or file operations.
[agents.llama-cli]
cmd = "llama-cli -m /path/to/model.gguf -cnv -p"
output_flag = ""
yolo_flag = ""
verbose_flag = "-v"
can_commit = false
json_parser = "generic"
display_name = "llama.cpp CLI"

# AIChat - Multi-provider LLM CLI tool (OpenAI, Claude, Gemini, Ollama, etc.)
# https://github.com/sigoden/aichat
# Supports 20+ LLM providers through unified interface
# Uses -S to disable streaming for scripting, -e for shell command execution
# CHAT-ONLY AGENT: yolo_flag is intentionally empty. This is primarily a chat interface
# with limited tool use. While it supports shell execution via -e, it lacks comprehensive
# agentic capabilities like autonomous file operations.
[agents.aichat]
cmd = "aichat"
output_flag = ""
yolo_flag = ""
verbose_flag = ""
can_commit = false
json_parser = "generic"
display_name = "AIChat"

# Cursor CLI - Cursor's terminal-based AI coding agent
# https://cursor.com/docs/cli/overview
# Uses 'agent' command with -p for print/non-interactive mode, -f for force-allow commands
# Supports GPT-5, Claude, and other frontier models
# Note: The -p flag enables non-interactive mode (for scripts/automation), and -f forces
# command execution unless explicitly denied, enabling autonomous operation.
[agents.cursor]
cmd = "cursor agent -p -f"
output_flag = "--output-format text"
yolo_flag = ""
verbose_flag = ""
can_commit = true
json_parser = "generic"
display_name = "Cursor CLI"

# Plandex - Terminal-based AI for large codebases
# https://github.com/plandex-ai/plandex
# Handles up to 2M tokens of context, tree-sitter project maps
# Uses --apply for auto-apply, --commit for auto-commit
[agents.plandex]
cmd = "plandex tell"
output_flag = ""
yolo_flag = "--apply --commit"
verbose_flag = ""
can_commit = true
json_parser = "generic"
display_name = "Plandex"

# Ollama - Local LLM server with CLI
# https://github.com/ollama/ollama
# Run local models like Llama, Mistral, CodeLlama
# CHAT-ONLY AGENT: yolo_flag is intentionally empty. This is a general-purpose chat
# interface for running local models. It lacks agentic tool use capabilities like
# autonomous file operations. Suitable for simple Q&A and chat tasks.
[agents.ollama]
cmd = "ollama run codellama"
output_flag = ""
yolo_flag = ""
verbose_flag = ""
can_commit = false
json_parser = "generic"
display_name = "Ollama"

# =============================================================================
# AGENT CHAIN CONFIGURATION (required)
# =============================================================================
# Define reusable ordered chains, then bind Ralph's built-in drains to them.
# The first agent in each chain is preferred; the rest are fallbacks.
#
# Ralph automatically switches to fallback agents on certain failures
# (rate limits, auth issues, token exhaustion, command not found).

[agent_chains]
developer = [
  "claude",
  "codex",
  "cursor",
  "qwen",
  "vibe",
  "plandex",
  "opencode",
  "aider",
  "goose",
  "cline",
  "continue",
  "amazon-q",
  "gemini",
]
reviewer = [
  "codex",
  "claude",
  "cursor",
  "qwen",
  "vibe",
  "plandex",
  "opencode",
  "aider",
  "goose",
  "cline",
  "continue",
  "amazon-q",
  "gemini",
]
commit = [
  "claude",
  "codex",
  "cursor",
  "qwen",
  "vibe",
  "plandex",
  "opencode",
  "aider",
  "goose",
  "cline",
  "continue",
  "amazon-q",
  "gemini",
]

[agent_drains]
planning = "developer"
development = "developer"
review = "reviewer"
fix = "reviewer"
commit = "commit"
analysis = "developer"

# Note: shared retry/backoff metadata still comes from the legacy compatibility
# input path today. Keep the named schema focused on reusable chains + drain
# bindings; runtime code normalizes everything into resolved built-in drains.

# =============================================================================
# OPENCODE PROVIDER TYPES EXPLAINED
# =============================================================================
# OpenCode supports multiple provider backends via the -m flag format:
#
# PROVIDER TYPE 1: OpenCode Zen Routing (opencode/*)
#   - Uses OpenCode's intelligent model routing gateway
#   - Requires OpenCode Zen account (opencode auth login -> select "OpenCode Zen")
#   - Examples: opencode/glm-4.7-free, opencode/claude-sonnet-4
#   - Benefits: Curated models, unified billing through OpenCode, free tier available
#
# PROVIDER TYPE 2: Z.AI Direct Access (zai/* or zhipuai/*)
#   - Connects directly to Z.AI's API (api.z.ai)
#   - Requires Z.AI API key (opencode auth login -> select "Z.AI")
#   - Examples: zai/glm-4.7, zai/glm-4.5, zhipuai/glm-4.7
#   - Note: Both "zai" and "zhipuai" prefixes are supported for Z.AI
#   - Benefits: Direct access to Z.AI's GLM models
#
# PROVIDER TYPE 2b: Z.AI Coding Plan (auth tier)
#   - Same Z.AI API endpoint, but with the Coding Plan subscription tier
#   - Requires Z.AI Coding Plan (opencode auth login -> select "Z.AI Coding Plan")
#   - Model prefix remains: zai/* (OpenCode expects provider/model)
#   - Examples: zai/glm-4.7, zai/glm-4.5
#   - Benefits: 3x usage at 1/7 the cost compared to standard Z.AI
#
# PROVIDER TYPE 3: Direct API Access ({provider}/*)
#   - Connects directly to provider's API (Anthropic, OpenAI, etc.)
#   - Requires your own API key configured via opencode auth
#   - Examples: anthropic/claude-sonnet-4, openai/gpt-5
#   - Benefits: Use existing API credits, no OpenCode/Z.AI dependency
#
# IMPORTANT:
#   - opencode/* routes through OpenCode's gateway (Zen)
#   - zai/* or zhipuai/* connects directly to Z.AI's API (standard tier)
#   - Z.AI Coding Plan is a separate subscription tier selected during auth (model prefix stays zai/*)
#   - These require different authentication selections in `opencode auth login`
#
# CHOOSING A PROVIDER - Decision Flowchart:
#   Q: Do you want free tier access via OpenCode?
#      YES -> Use opencode/glm-4.7-free (OpenCode Zen)
#      NO  -> Continue...
#   Q: Do you have a Z.AI Coding Plan subscription? (3x usage, 1/7 cost)
#      YES -> Use zai/* (and select "Z.AI Coding Plan" during auth)
#      NO  -> Continue...
#   Q: Do you have a standard Z.AI API key?
#      YES -> Use zai/* (Z.AI Direct)
#      NO  -> Continue...
#   Q: Do you already have Anthropic/OpenAI API keys?
#      YES -> Use anthropic/* or openai/* (Direct API)
#      NO  -> Use opencode/* (OpenCode Zen)
#   Q: Do you want unified billing through OpenCode?
#      YES -> Use opencode/* (OpenCode Zen)
#      NO  -> Use zai/*, anthropic/* or openai/* (Direct access)
#
# TL;DR:
#   - Use opencode/* for OpenCode Zen routing and unified billing
#   - Use zai/* for direct Z.AI API access (standard tier)
#   - Z.AI Coding Plan is selected during auth; model prefix stays zai/*
#   - Use anthropic/* or openai/* if you have your own provider API keys
#   - Provider fallback can mix all: try free Zen first, then Z.AI Coding Plan, then direct API

# =============================================================================
# OPENCODE DYNAMIC PROVIDER/MODEL SUPPORT
# =============================================================================
# OpenCode agents are now DYNAMIC based on the official OpenCode API catalog.
#
# The provider/model list is fetched from https://models.dev/api.json and cached
# for 24 hours. Set RALPH_OPENCODE_CACHE_TTL_SECONDS to customize the cache duration.
#
# USAGE
# -----
# Use the "opencode/provider/model" syntax in your agent chain configuration:
#
#   [agent_chains]
#   developer = ["opencode/opencode/glm-4.7-free", "opencode/anthropic/claude-sonnet-4-5", "claude"]
#   reviewer = ["opencode/anthropic/claude-sonnet-4-5", "claude"]
#
#   [agent_drains]
#   planning = "developer"
#   development = "developer"
#   review = "reviewer"
#   fix = "reviewer"
#   commit = "reviewer"
#   analysis = "developer"
#
# DYNAMIC VALIDATION
# ------------------
# At startup, Ralph validates all "opencode/*" agent references against the
# cached API catalog. If a provider or model doesn't exist, you'll see a helpful
# error message with suggestions for typos.
#
# COMMON PROVIDERS
# ----------------
# - opencode/*       - OpenCode Zen gateway (requires opencode auth login)
# - anthropic/*      - Direct Anthropic API (requires ANTHROPIC_API_KEY)
# - openai/*         - Direct OpenAI API (requires OPENAI_API_KEY)
# - google/*         - Google AI Studio (requires GOOGLE_GENERATIVE_AI_API_KEY)
# - zai/*            - Z.AI Direct API (requires Z.AI credentials)
# - deepseek/*       - DeepSeek API (requires DEEPSEEK_API_KEY)
# - groq/*           - Groq ultra-fast inference (requires GROQ_API_KEY)
# - ollama/*         - Local Ollama models (requires ollama serve)
# - And 60+ more providers - see https://models.dev for the full list
#
# EXAMPLES
# --------
# # Try free GLM first, then Claude via Anthropic direct API
# developer = ["opencode/opencode/glm-4.7-free", "opencode/anthropic/claude-sonnet-4-5", "claude"]
#
# # Z.AI Coding Plan (3x usage, 1/7 cost) with fallback to Zen
# developer = ["opencode/zai/glm-4.7", "opencode/opencode/claude-sonnet-4", "claude"]
#
# # Local Ollama with cloud backup
# developer = ["opencode/ollama/llama3", "opencode/openai/gpt-4o", "claude"]
#
# # Multiple OpenCode agents in the same chain (each represents a SEPARATE agent)
# developer = ["opencode/zai/glm-4.7", "opencode/anthropic/claude-sonnet-4-5"]
# reviewer = ["opencode/opencode/glm-4.7-free"]
#
# PROVIDER-LEVEL FALLBACK (Legacy Compatibility Input)
# ----------------------------------------------------
# For backward compatibility, a metadata-only legacy [agent_chain] section can
# still configure provider_fallback and shared retry metadata alongside the
# canonical named [agent_chains] / [agent_drains] schema. Named drain bindings
# remain the authoritative runtime model after normalization.
#
# FINDING AVAILABLE MODELS
# -------------------------
# To see all available providers and models:
# 1. Check the API catalog: https://models.dev
# 2. Or inspect the cache file: ~/.cache/ralph-workflow/opencode-api-cache.json
# 3. Run: ralph --developer-agent opencode/anthropic/claude-sonnet-4-5 --trace
#    (validation errors will show available options)
#
# AUTHENTICATION
# --------------
# Most providers require API keys or authentication:
# - Direct API providers (anthropic, openai, etc.): Run: opencode auth <provider>
# - OpenCode Zen: Run: opencode auth login (select "OpenCode Zen")
# - Z.AI: Run: opencode auth login (select "Z.AI" or "Z.AI Coding Plan")
# - Local providers (ollama, lmstudio): Just start the service first
#
# UNATTENDED OPERATION
# --------------------
# All OpenCode invocations use --auto-approve flag for unattended operation.
# Ralph is designed to run without user interaction (yolo mode).
# -------------------------------------------------------------------------
# EXAMPLE AGENT CHAINS
# -------------------------------------------------------------------------
# Example 1: OpenCode Zen only (all requests go through OpenCode billing)
# [agent_chains]
# developer = ["opencode/opencode/glm-4.7-free", "opencode/opencode/claude-sonnet-4", "claude"]
# reviewer = ["opencode/opencode/claude-sonnet-4", "claude"]
# [agent_drains]
# planning = "developer"
# development = "developer"
# review = "reviewer"
# fix = "reviewer"
# commit = "reviewer"
# analysis = "developer"
#
# Example 2: Direct API only (use your own Anthropic/OpenAI keys)
# [agent_chains]
# developer = ["opencode/anthropic/claude-sonnet-4", "opencode/openai/gpt-4o", "claude"]
# reviewer = ["opencode/anthropic/claude-sonnet-4", "claude"]
# [agent_drains]
# planning = "developer"
# development = "developer"
# review = "reviewer"
# fix = "reviewer"
# commit = "reviewer"
# analysis = "developer"
#
# Example 3: Mixed fallback (try free Zen first, then direct API)
# [agent_chains]
# developer = ["opencode/opencode/glm-4.7-free", "opencode/anthropic/claude-sonnet-4", "claude"]
# reviewer = ["opencode/opencode/claude-sonnet-4", "opencode/anthropic/claude-sonnet-4"]
# [agent_drains]
# planning = "developer"
# development = "developer"
# review = "reviewer"
# fix = "reviewer"
# commit = "reviewer"
# analysis = "developer"
#
# Example 4: Z.AI Direct only (all requests go directly to Z.AI)
# [agent_chains]
# developer = ["opencode/zai/glm-4.7", "opencode/opencode/glm-4.7-free", "claude"]
# reviewer = ["opencode/zai/glm-4.7", "claude"]
# [agent_drains]
# planning = "developer"
# development = "developer"
# review = "reviewer"
# fix = "reviewer"
# commit = "reviewer"
# analysis = "developer"
#
# Example 5: Full fallback chain (Z.AI Direct -> Zen -> Direct API)
# [agent_chains]
# developer = ["opencode/zai/glm-4.7", "opencode/opencode/glm-4.7-free", "opencode/anthropic/claude-sonnet-4"]
# reviewer = ["opencode/zai/glm-4.7", "opencode/opencode/claude-sonnet-4", "opencode/anthropic/claude-sonnet-4"]
# [agent_drains]
# planning = "developer"
# development = "developer"
# review = "reviewer"
# fix = "reviewer"
# commit = "reviewer"
# analysis = "developer"

# =============================================================================
# CUSTOM AGENT EXAMPLE
# =============================================================================
# Add your own agents by defining new sections:

# [agents.myagent]
# cmd = "my-ai-tool run"
# output_flag = "--json-stream"
# yolo_flag = "--auto-fix"
# verbose_flag = "--verbose"
# can_commit = true
# json_parser = "generic"