opencrabs 0.3.38

# OpenCrabs Configuration File
# Copy this file to one of these locations:
#   - Linux/macOS: ~/.opencrabs/config.toml
#   - Windows: %APPDATA%\opencrabs\config.toml or opencrabs\config.toml
#
# IMPORTANT: API keys should NOT be stored here!
# Instead, store API keys in keys.toml (chmod 600) for security:
#   - ~/.opencrabs/keys.toml
# Keys in keys.toml take priority over this file.

[database]
# Database file location (stores conversation history)
# path = "~/.opencrabs/opencrabs.db"  # Default; only override if needed

[providers]
# ========================================
# Xiaomi MiMo  ⭐ DEFAULT (opencrabs x Xiaomi collab)
# ========================================
# FREE + KEYLESS during the launch window (until 2026-06-25): leave api_key
# unset. Requests go through our proxy, which supplies the key server-side, so
# no key ever lives on your machine. This is the default provider for fresh
# installs — new users land almost straight in chat.
#
# After the cutoff, set your own key in keys.toml ([providers.xiaomi] api_key).
#
# Models are fetched live from the proxy's /v1/models (chat models only), so the
# /models picker always shows what's actually available. The list below is just
# an offline fallback. Thinking is ON by default (the model streams its
# reasoning); set enable_thinking = false to turn it off. Prompt caching is
# automatic server-side — nothing to configure.
[providers.xiaomi]
enabled = true
base_url = "https://xiaomi-collab.opencrabs.com/v1/chat/completions"
default_model = "mimo-v2.5-pro"
models = ["mimo-v2.5-pro", "mimo-v2-pro", "mimo-v2.5", "mimo-v2-omni", "mimo-v2-flash"]
context_window = 1000000
# enable_thinking = false  # uncomment to disable the model's reasoning output

# ========================================
# Ollama Provider (Local + Cloud)
# ========================================
# Local: runs on localhost:11434 — no API key needed
# Cloud: set base_url to your Ollama Cloud endpoint + optional API key
# Models are fetched automatically from /api/tags
# Requires: ollama serve (local) or a cloud endpoint
[providers.ollama]
enabled = false
base_url = "http://localhost:11434/v1/chat/completions"
# default_model = "llama3.1"  # Optional: override default
# api_key = ""  # Optional: only needed for Ollama Cloud

# ========================================
# Custom: OpenAI-Compatible Provider (Local LLMs, and any OpenAI Compatible model)
# ========================================
# Use this for LM Studio, Ollama, LocalAI, etc.
# Every custom provider needs a name — the label after "custom." (e.g. lm_studio, nvidia, groq).
# You can define as many as you need and switch between them via /models.
[providers.custom.lm_studio]
enabled = true
base_url = "http://localhost:1234/v1/chat/completions"  # LM Studio default
models = ["kimi-k2.5", "glm-5", "MiniMax-M2.7", "qwen3-coder", "gpt-oss-120b", "llama-4-70B", "mistral-Large-3", "qwen3-coder-next"]

# ⭐ IMPORTANT: Set this to match the model name loaded in LM Studio!
# Common examples:
#   - qwen2.5-coder-7b-instruct
#   - codellama-7b-instruct
#   - deepseek-coder-6.7b-instruct
#   - llama-3.2-1b-instruct
default_model = "qwen3-coder-next"

# Other local LLM servers — just add another named section:
#
# [providers.custom.ollama]
# enabled = false
# base_url = "http://localhost:11434/v1/chat/completions"
# default_model = "mistral"
# models = ["mistral", "llama3", "codellama"]

# ========================================
# GitHub Copilot Provider
# ========================================
# Uses your GitHub Copilot subscription via OAuth device flow.
# Run /onboard:provider and select GitHub Copilot to sign in.
# base_url = "https://api.githubcopilot.com/chat/completions"
[providers.github]
enabled = false
default_model = "gpt-4o"
# Models are fetched live from the Copilot API

# ========================================
# Official OpenAI Provider
# ========================================
[providers.openai]
enabled = false
default_model = "gpt-5-nano"  # Optional: override default model
# vision_model = "gpt-5-nano"  # Optional: describes images for the chat model when it lacks vision
# generation_model = "gpt-image-1"  # Optional: routes generate_image to OpenAI /v1/images/generations

# ========================================
# Anthropic Provider (Claude)
# ========================================
[providers.anthropic]
enabled = false
default_model = "claude-sonnet-4-6"  # Optional: override default

# ========================================
# OpenRouter Provider (100+ models via OpenAI-compatible API)
# ========================================
[providers.openrouter]
enabled = false
base_url = "https://openrouter.ai/api/v1/chat/completions"
default_model = "qwen/qwen3-coder-next"  # Many options at openrouter.ai/models

# ========================================
# OpenCode Provider (Go and Zen plans)
# ========================================
# Native API provider — no CLI subprocess needed.
# Models are fetched live from opencode.ai/zen/go/v1/models
# API key goes in keys.toml: [providers.opencode] api_key = "..."
# Get key from: opencode.ai/settings
[providers.opencode]
enabled = false
base_url = "https://opencode.ai/zen/go/v1/chat/completions"
# default_model = "qwen3.6-plus"  # Optional: override default

# ========================================
# Google Gemini Provider
# ========================================
# Models fetched live from the Gemini API during onboarding and /models
# API key goes in keys.toml under [providers.gemini]
# Get key from: aistudio.google.com
[providers.gemini]
enabled = false
default_model = "gemini-2.5-flash"
# generation_model = "imagen-4.0-generate-001"  # Optional: per-provider override for generate_image
# (Gemini host is detected automatically — call stays on the Gemini wire, not /v1/images/generations.)

# ========================================
# Claude CLI (Max Subscription — no API key needed)
# ========================================
# Spawns the local 'claude' CLI directly — uses your Claude Max subscription.
# No proxy, no API key. Just install Claude Code CLI and authenticate it.
# Install: npm install -g @anthropic-ai/claude-code
[providers.claude_cli]
enabled = false
default_model = "sonnet"  # "sonnet", "opus", or "haiku"

# ========================================
# Codex CLI (ChatGPT/Codex Subscription — no API key needed)
# ========================================
# Spawns the local 'codex' CLI directly — piggybacks on the auth stored in
# ~/.codex/auth.json by `codex login`. No proxy, no API key.
# Install: npm install -g @openai/codex   (then run `codex` once to sign in)
# Models per developers.openai.com/codex/models. gpt-5.5 is ChatGPT-auth only;
# fall back to gpt-5.4 if your account doesn't have 5.5 yet.
[providers.codex_cli]
enabled = false
default_model = "gpt-5.5"
models = [
    "gpt-5.5",
    "gpt-5.4",
    "gpt-5.4-mini",
    "gpt-5.3-codex",
    "gpt-5.3-codex-spark",
    "gpt-5.2",
]

# ========================================
# Codex (OAuth — no CLI needed)
# ========================================
# Authenticates directly with OpenAI Codex subscription via device-code
# OAuth flow. Tokens stored in ~/.opencrabs/auth/codex.json with automatic
# refresh. No `codex` CLI binary required — authenticate through the
# onboarding wizard (/onboard:provider).
# Uses the same public client ID as Codex CLI: app_EMoamEEZ73f0CkXaXp7hrann
[providers.codex]
enabled = false
default_model = "gpt-5.5"
models = [
    "gpt-5.5",
    "gpt-5.4",
    "gpt-5.4-mini",
    "gpt-5.3-codex",
    "gpt-5.3-codex-spark",
    "gpt-5.2",
    "gpt-4o",
    "o3",
    "o4-mini",
]

# ========================================
# Minimax Provider (Chinese AI, OpenAI-compatible)
# ========================================
# Note: Minimax does NOT have a /models endpoint, so add models manually
[providers.minimax]
enabled = false
base_url = "https://api.minimax.io/v1"
default_model = "MiniMax-M2.7"
models = ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-Text-01"]
vision_model = "MiniMax-Text-01"  # Describes images for the chat model when it lacks vision

[providers.zhipu]
enabled = false
# endpoint_type = "coding"  # "api" (default) or "coding" for code-optimized endpoint
default_model = "glm-5.1"
models = ["glm-5.1", "glm-5-turbo", "glm-5", "glm-4.7", "glm-4.6", "glm-4.5", "glm-4.5-air"]
# API key goes in keys.toml: [providers.zhipu] api_key = "..."
# Get key from open.bigmodel.cn or z.ai

# ========================================
# Qwen / DashScope Provider (Alibaba Cloud Model Studio, OpenAI-compatible)
# ========================================
# DashScope OpenAI-compatible endpoints (use the one matching your API key's region):
#   - China (Beijing):  https://dashscope.aliyuncs.com/compatible-mode/v1
#   - Singapore (intl): https://dashscope-intl.aliyuncs.com/compatible-mode/v1
#   - US (Virginia):    https://dashscope-us.aliyuncs.com/compatible-mode/v1
#   - Alibaba Coding Plan (intl): https://coding-intl.dashscope.aliyuncs.com/v1
#   - Alibaba Coding Plan (CN):   https://coding.dashscope.aliyuncs.com/v1
# Regions are NOT interchangeable — match base_url to the region that issued your key.
# API key goes in keys.toml: [providers.qwen] api_key = "..."
# Get key from: bailian.console.aliyun.com (DashScope) or qwen.ai/apiplatform (Coding Plan)
[providers.qwen]
enabled = false
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
default_model = "qwen3.6-plus"
models = [
    "qwen3.6-plus",
    "qwen3-max",
    "qwen3-coder-plus",
    "qwen3.5-plus",
    "qwen-max",
    "qwen-plus",
    "qwen-flash",
    "qwen-vl-max",
]
# enable_thinking = true  # opt-in to Qwen3 hybrid thinking mode

# ========================================
# Fallback Providers (automatic failover)
# ========================================
# When the primary provider fails, try these in order.
# Each must already have API keys configured in keys.toml.
# Supports single or multiple fallbacks.
[providers.fallback]
enabled = false
providers = ["openrouter", "anthropic"]  # Tried in order on failure
# provider = "openrouter"               # Legacy: single fallback (use providers array instead)

# ========================================
# STT (Speech-to-Text) Providers
# ========================================
# Groq Whisper for transcription
[providers.stt.groq]
enabled = false
default_model = "whisper-large-v3-turbo"

# OpenAI-compatible STT (self-hosted Whisper, Together AI, etc.)
[providers.stt.openai_compatible]
enabled = false
base_url = "http://localhost:11434"        # Ollama, LM Studio, or any OpenAI-compatible STT
model = "whisper-large-v3-turbo"

# Voicebox STT (local voice-to-text service)
[providers.stt.voicebox]
enabled = false
base_url = "http://localhost:8000"

# ========================================
# TTS (Text-to-Speech) Providers
# ========================================
# OpenAI TTS for voice output
[providers.tts.openai]
enabled = false
default_model = "gpt-4o-mini-tts"
voice = "ash"             # TTS voice name
model = "gpt-4o-mini-tts" # TTS model

# OpenAI-compatible TTS (self-hosted, Together AI, etc.)
[providers.tts.openai_compatible]
enabled = false
base_url = "http://localhost:11434"
model = "tts-1"
voice = "alloy"

# Voicebox TTS (local voice-to-text service)
[providers.tts.voicebox]
enabled = false
base_url = "http://localhost:8000"
profile_id = ""

# ========================================
# Image Generation & Vision (Google Gemini)
# ========================================
# API key goes in keys.toml under [image]
# Get key from: aistudio.google.com
# Use /onboard:image to configure via wizard

# ========================================
# Cron Job Defaults
# ========================================
# Default provider/model for cron jobs that don't specify their own.
# Priority: per-job --provider > [cron] default > session's active provider.
# Useful for routing cron jobs to a cheaper provider.
[cron]
# default_provider = "minimax"
# default_model = "MiniMax-M2.7"

# ========================================
# Agent / Sub-Agent Defaults
# ========================================
# Default provider/model for spawned sub-agents (e.g., research tasks, parallel tool calls).
# Priority: subagent_provider > session's active provider.
# Useful for routing sub-agents to a cheaper/faster model while keeping the main session on a premium one.
[agent]
# context_limit = 200000  # Context window in tokens (default: 200000)
# subagent_provider = "zhipu"
# subagent_model = "GLM 5"

# Lazy tool-schema loading (default: true — RECOMMENDED). Each request ships
# only the CORE tool schemas (~4k tokens) plus `tool_search`. The agent calls
# `tool_search("what I need")` to discover and activate the extended tools
# (browser, channels, sub-agents, media, system) on demand — they then ride on
# subsequent requests for the session. We recommend leaving this on: load only
# the core set and let the agent pull the rest as needed.
#
# Setting `lazy_tools = false` reverts to sending the FULL tool-schema set —
# ALL ~95 tools (~20k tokens) — in EVERY single request, whether the turn uses
# them or not. That overhead is counted by the provider on every call (real
# cost, not just display). Only set false if a model in your setup struggles
# to reach for `tool_search`.
# lazy_tools = false

# Provider/model for Recursive Self-Improvement (RSI) cycles. The model is
# paired with the provider, same as subagent_provider/subagent_model.
# Unset self_improvement_provider → inherit the active provider; unset
# self_improvement_model → use that provider's default model.
# self_improvement_provider = "zhipu"
# self_improvement_model = "glm-5.1"

# Auto-install new releases on startup (default: true).
# When true, OpenCrabs checks GitHub on startup and silently installs any
# new release, then hot-restarts. Set to false if you prefer to be prompted
# before each update (or to /evolve manually).
# auto_update = true

# Redact sensitive data from tool outputs and display (default: true).
# When true, API keys, tokens, passwords, and IP addresses are redacted
# from tool call displays and outputs. Set to false during sysadmin/devops
# work where seeing IPs, tokens, and passwords is necessary.
# redact_sensitive_data = false

[image.generation]
enabled = false
model = "gemini-3.1-flash-image-preview"   # Gemini image-gen model ("Nano Banana")
# Override the model per-provider by setting `generation_model` under
# `[providers.<name>]` — e.g. `imagen-4.0-generate-001` on gemini, or
# `gpt-image-1` / `black-forest-labs/flux-1.1-pro` on any OpenAI-
# compatible provider (openai, openrouter, custom, …). When the active
# provider has the field set, generate_image picks the right wire
# backend automatically: Gemini host → Gemini API, anything else →
# `/v1/images/generations` with Bearer auth.

# ========================================
# Vision setup — two paths, pick one
# ========================================
#
# Path A (preferred, simpler): set `vision_model = "<model>"` on your
# active [providers.<name>] block above. Works for every built-in and
# custom provider. The agent calls the vision model on the SAME
# provider endpoint via the `analyze_image` tool, so no second API key
# is needed. Pick a vision-capable model on that provider — DeepSeek
# chat models like `deepseek-v4-flash` reject image_url content, so
# point `vision_model` at a vision-capable variant of the same family
# (every provider has at least one).
#
# Path B (fallback): enable Gemini globally below. Use this only when
# your active provider has no vision-capable model.
#
# IMPORTANT: the api_key for vision must be set in `keys.toml`
# `[image]` section, NOT here. The api_key field on `[image.vision]`
# carries `#[serde(skip)]` and is silently dropped on load — put it in
# keys.toml or it will look enabled but produce zero vision calls.
#
# When vision is unavailable for any reason, `is_vision_available`
# now logs the exact reason at INFO level in
# `~/.opencrabs/logs/opencrabs.YYYY-MM-DD` — search for `target=vision`.
#
# Easiest setup: run `/onboard:image` and it walks you through Gemini.

[image.vision]
enabled = false
model = "gemini-3.1-flash-image-preview"   # Gemini vision model

# ========================================
# Browser automation (feature: browser)
# ========================================
# By default each profile launches its own headless Chromium (~250-300MB each).
# To share ONE Chromium across multiple profiles (big memory win on small VDS),
# start a standalone instance with remote debugging:
#
#   chromium --remote-debugging-port=9222 --headless --no-sandbox
#
# then point every profile's config at it. Prefer the http:// form — OpenCrabs
# queries /json/version to find the real devtools websocket URL. (A bare
# ws://host:port is also accepted and normalized to http:// automatically.)
# [browser]
# cdp_endpoint = "http://localhost:9222"

# ========================================
# Tips for Using Local LLMs
# ========================================
# 1. Make sure LM Studio is running before starting OpenCrabs
# 2. Load a model in LM Studio first
# 3. Set default_model to EXACTLY match the model name shown in LM Studio
# 4. Increase context length in LM Studio if you get overflow errors:
#    - Recommended: 8192 or higher
#    - Location: LM Studio > Model Settings > Context Length

# ==================================================
# Channels (Telegram / WhatsApp / Slack / Discord / Trello)
# ==================================================

# respond_to controls which messages the bot replies to (applies to Telegram, Discord, Slack):
#   "all"      — reply to every message in allowed channels (default)
#   "dm_only"  — reply only to direct/private messages
#   "mention"  — reply only when the bot is @mentioned or replied-to

[channels.whatsapp]
enabled = false
# Phone numbers allowed to message the bot (E.164 format, leading + optional)
# Access control is purely phone-based — add numbers here to restrict who can message the bot.
allowed_phones = ["+15551234567"]
# session_idle_hours = 24.0             # Archive inactive non-owner sessions after N hours (default: never)

[channels.discord]
enabled = false
allowed_channels = ["channel_id"]       # Where the bot operates (empty = all channels)
allowed_users = [123456789012345]        # Who the bot replies to (numeric user ID, empty = everyone)
# respond_to = "all"                    # "all" | "dm_only" | "mention"
# session_idle_hours = 24.0             # Archive inactive non-owner sessions after N hours (default: never)

[channels.telegram]
enabled = false
allowed_users = [123456789]             # Who the bot replies to (numeric user ID, empty = everyone)
# allowed_channels = ["-100123456789"]  # Chat/group IDs to restrict to (empty = all chats)
# respond_to = "all"                    # "all" | "dm_only" | "mention"
# session_idle_hours = 24.0             # Archive inactive non-owner sessions after N hours (default: never)

[channels.slack]
enabled = false
allowed_channels = ["C12345678"]        # Where the bot operates (Slack channel ID, empty = all)
allowed_users = ["U12345678"]           # Who the bot replies to (Slack user ID, empty = everyone)
# respond_to = "all"                    # "all" | "dm_only" | "mention"
# session_idle_hours = 24.0             # Archive inactive non-owner sessions after N hours (default: never)

# ========================================
# Trello — board card management
# ========================================
# API keys/tokens go in keys.toml under [channels.trello]
# Default mode: tool-only — no automatic polling. The AI acts on Trello only
# when you explicitly ask it to via trello_send / trello_connect.
# Opt-in polling: set poll_interval_secs > 0 to have the agent watch boards
# for @mentions of the bot username and respond to them.
[channels.trello]
enabled = false
# Board IDs or names to monitor — you can mix 24-char hex IDs and human-readable board names.
# The agent resolves names at startup. Comma-separated in the wizard; TOML array here.
board_ids = ["your-board-name", "abc123def456abc123def456"]
allowed_users = []                      # Trello member IDs allowed to @mention the bot (empty = all)
# poll_interval_secs = 30              # Opt-in: poll boards every N seconds for @mentions. Default = disabled.
# session_idle_hours = 24.0             # Archive inactive non-owner sessions after N hours (default: never)

# ========================================
# Daemon Mode (systemd / launchd service)
# ========================================
# Settings for `opencrabs daemon` headless mode.
[daemon]
# HTTP health check port. When set, GET /health returns 200 OK + JSON.
# Useful for systemd watchdog, uptime monitors, and external health probes.
# health_port = 8080

# ========================================
# Agent-to-Agent (A2A) Protocol
# ========================================
# Enables HTTP gateway for peer-to-peer agent communication.
# Other A2A-compatible agents can send tasks, collaborate, and debate.
[a2a]
enabled = false
bind = "127.0.0.1"     # Loopback only by default for security
port = 18790            # A2A gateway port
# CORS allowed origins (empty = no cross-origin requests allowed)
# allowed_origins = ["http://localhost:3000"]
# API key for Bearer token auth on /a2a/v1 (optional, recommended for non-loopback)
# Can also be set in keys.toml under [a2a] api_key = "..."
# api_key = "your-secret-key"

# ========================================
# Web Search Providers (default to free Duck Duck Go, no need additional web search provider)
# ========================================

[providers.web_search.exa]
enabled = true
# MCP is enabled by default as its free. If want through API its free up to 1000 requests. API key goes in keys.toml: [providers.web_search.exa] api_key = "..."

[providers.web_search.duckduckgo]
enabled = true
# Completely free, enabled by default"

[providers.web_search.brave]
enabled = false
# Its free up to 1000 requests. API key goes in keys.toml: [providers.web_search.brave] api_key = "..."

# ========================================
# Memory / Embeddings
# ========================================
# Controls vector embeddings for semantic memory search.
# When disabled, only FTS5 (keyword) search is used — no model download,
# no llama.cpp init, zero GPU/CPU overhead.
#
# Automatically set to false on VPS/cloud (detected at startup).
# You can also set it manually here.
#
# [memory]
# vector_enabled = false

# --------------------------------------------
# OpenAI-compatible Embedding API
# --------------------------------------------
# When configured, embeddings are generated via API instead of the local
# GGUF model (embeddinggemma-300M). This eliminates the ~300MB model
# download and ~2.9GB RAM overhead of llama.cpp.
#
# Supports any OpenAI-compatible /v1/embeddings endpoint:
# OpenAI, Ollama, LM Studio, localai, Jina, etc.
#
# Example: OpenAI
# [memory.embedding]
# url = "https://api.openai.com/v1"
# model = "text-embedding-3-small"
# # api_key goes in keys.toml: [providers.memory_embedding] api_key = "sk-..."
# # dimensions = 1536   # auto-detected from first API response if unset
#
# Example: Ollama (local, free)
# [memory.embedding]
# url = "http://localhost:11434/v1"
# model = "nomic-embed-text"
#
# Example: Jina
# [memory.embedding]
# url = "https://api.jina.ai/v1"
# model = "jina-embeddings-v3"
# # api_key goes in keys.toml