brainos-core 0.5.0

# ─── Brain OS — Configuration ─────────────────────────────────────────────────
# Generated by `brain init`. Edit to customise your setup.
# Env-var override for any key: BRAIN_<SECTION>__<KEY>  (e.g. BRAIN_LLM__API_KEY)

# ── LLM Providers ──────────────────────────────────────────────────────────────
# Brain probes each entry at startup, picks the first reachable one, and
# automatically falls over to the next on rate-limit or error.
#
# kind: ollama | groq | openai | openrouter | deepseek | together | gemini-compat
llm:
  temperature: 0.7
  max_tokens: 4096
  # The active model's input context window, in tokens. Drives how much
  # file/attachment + memory content the prompt assembler packs in. Raise this
  # to your model's real size (e.g. 32768, 128000) so large-window models read
  # in detail instead of clipping to the conservative 8k default.
  context_window: 8192
  providers:
    - name: ollama
      kind: ollama
      base_url: "http://localhost:11434"
      model: "qwen2.5-coder:7b"
      preferred_models: ["qwen2.5-coder:7b", "llama3.1:8b"]
    # - name: groq
    #   kind: groq
    #   api_key: "gsk_..."
    #   model: "llama-3.3-70b-versatile"
    #   preferred_models: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]
    # - name: openrouter
    #   kind: openrouter
    #   api_key: "sk-or-..."
    #   model: "meta-llama/llama-3.1-8b-instruct:free"
    #   preferred_models: ["meta-llama/llama-3.1-8b-instruct:free"]
  # Legacy single-provider fallback — only used when providers list is empty.
  provider: "ollama"
  model: "qwen2.5-coder:7b"
  base_url: "http://localhost:11434"
  api_key: ""

# ── Embedding ──────────────────────────────────────────────────────────────────
# Run `ollama pull nomic-embed-text` before starting Brain.
# dimensions must match the model's actual output size exactly.
embedding:
  model: "nomic-embed-text"
  dimensions: 768

# ── Memory ─────────────────────────────────────────────────────────────────────
memory:
  semantic:
    similarity_threshold: 0.65
    max_results: 20
  search:
    rrf_k: 60                   # Reciprocal Rank Fusion constant
    pre_fusion_limit: 50        # candidates fetched from each source (BM25, ANN) before fusion
    importance_weight: 0.3      # weight for importance in final reranking
    recency_weight: 0.2         # weight for recency in final reranking
    decay_rate: 0.01            # forgetting-curve decay rate (higher = faster forgetting)
  consolidation:
    enabled: true
    interval_hours: 24
    forgetting_threshold: 0.05

# ── Encryption ─────────────────────────────────────────────────────────────────
# Run `brain init --encrypt` to generate a salt and enable at-rest encryption.
encryption:
  enabled: false

# ── Security ───────────────────────────────────────────────────────────────────
security:
  # Binaries the sandbox is allowed to execute. The list is intentionally
  # narrow — read-only inspection plus the toolchain. To run anything else
  # (docker, brew, ssh, custom scripts), add it here explicitly.
  exec_allowlist: [
    "ls", "cat", "head", "tail", "wc", "file", "stat",
    "grep", "find", "sort", "uniq", "cut", "awk", "sed",
    "which", "command", "type", "test", "basename", "dirname", "realpath",
    "echo", "printf", "true", "false",
    "git", "cargo", "rustc", "rustup",
    # `sh` enables the shell-wrapped execution tier for commands the
    # LLM produces with pipes, redirects, or PATH-dependent binaries.
    # When invoked via the shell tier the per-binary allowlist is
    # bypassed for the wrapped command; rlimits, Seatbelt network deny,
    # timeout, and the explicit forbidden_commands list still apply.
    "sh",
  ]
  exec_timeout_seconds: 30
  # Roots that read-only filesystem inspection (project_inspect) is
  # allowed to touch. Empty defaults to $HOME; set explicit entries
  # like ["~/code", "~/work"] to restrict further. Paths outside any
  # allowed root — after canonicalization — are rejected.
  allowed_paths: []

# ── Actions ────────────────────────────────────────────────────────────────────
actions:
  web_search:
    # On by default. The "duckduckgo" provider is a zero-config built-in
    # that works without Docker or an API key — basic quality, but always
    # available. Switch to "searxng" (run `brain deps up` first) for the
    # best results, or "tavily" with an API key for a hosted option.
    enabled: true
    provider: "duckduckgo"      # duckduckgo | searxng | tavily | custom
    endpoint: "http://localhost:8888"  # used by searxng/custom only
    api_key: ""                 # required for tavily
    timeout_ms: 3000
    default_top_k: 5
  scheduling:
    enabled: false                      # WRITE axis: lets Brain create/persist
                                        # scheduled intents. Firing them is the
                                        # FIRE axis — see reflex.cron below.
    mode: "persist_only"
  messaging:
    enabled: false
    timeout_ms: 3000
    channels: {}
    # Webhook channel example — works for Discord, Telegram, Slack, or any HTTP endpoint.
    # Template vars: {{channel}} {{recipient}} {{content}} {{namespace}} {{timestamp}}
    #
    #   discord:
    #     url: "https://discord.com/api/webhooks/<ID>/<TOKEN>"
    #     body: '{"content": "{{content}}"}'
    #     headers: {}
    #   telegram:
    #     url: "https://api.telegram.org/bot<TOKEN>/sendMessage"
    #     body: '{"chat_id": "<CHAT_ID>", "text": "{{content}}", "parse_mode": "Markdown"}'
    #     headers: {}
  resilience:
    max_retries: 2
    retry_base_ms: 500
    circuit_breaker_threshold: 5
    circuit_breaker_cooldown_secs: 60

# ── Proactivity ────────────────────────────────────────────────────────────────
proactivity:
  enabled: true
  max_per_day: 2
  min_interval_minutes: 60
  quiet_hours:
    start: "20:00"
    end: "10:00"
    timezone: "UTC"             # IANA timezone, e.g. "America/New_York"
  delivery:
    outbox: true
    broadcast: true
    webhook_channels: []        # channel keys from actions.messaging.channels
    max_outbox_age_days: 7
  open_loop:
    enabled: true
    scan_window_hours: 72
    resolution_window_hours: 24
    check_interval_minutes: 120

# ── Adapters ───────────────────────────────────────────────────────────────────
adapters:
  http:
    enabled: true
    host: "127.0.0.1"
    port: 19789
    cors: true
  ws:
    enabled: true
    port: 19790
  mcp:
    enabled: true
    port: 19791
  grpc:
    enabled: true
    port: 19792
  terminal:
    enabled: true
    port: 19793

# ── Reactive signal sources ────────────────────────────────────────────────────
# Default is empty — no reflex tasks spawn unless you configure them here.
# Each firing becomes a Signal with Provenance::Reflex { trigger, ts } and
# flows through the normal pipeline (identity, confirmation, dispatch).
reflex:
  fs: []                              # watchers; one entry per path set
  # Example:
  # fs:
  #   - name: project-watch
  #     paths: ["~/Developer/workspace/brain"]
  #     recursive: true
  #     debounce_ms: 200
  cron:
    enabled: false                    # FIRE axis: fires due scheduled_intents
                                      # through the pipeline. Required for
                                      # actions.scheduling intents to ever run.
    poll_interval_seconds: 60
  sys:
    enabled: false                    # edge-triggered system state
    poll_interval_seconds: 30
    rules: []
    # Example rules (uncomment to enable):
    # rules:
    #   - kind: battery_below
    #     threshold: 20
    #   - kind: network_changed

# ── Logging ────────────────────────────────────────────────────────────────────
# Drives the tracing subscriber. `RUST_LOG` still overrides the computed filter
# at runtime. Long-running services (`serve`, `mcp`) log to a rotating file at
# ~/.brain/logs/brain.log; one-shot commands log to stderr.
logging:
  level: "info"                     # base level for the `brain` target
  format: "pretty"                  # "pretty" (human) or "json" (structured)
  rotation: "daily"                 # "daily" | "hourly" | "never" (file rotation)
  targets: {}                       # per-subsystem overrides, e.g.:
  # targets:
  #   hippocampus: "debug"
  #   signal: "info"

# ── Learned self-model ─────────────────────────────────────────────────────────
# Capability fitness: Brain records whether each tool succeeds or fails, decays
# those observations under the forgetting curve, and uses them as a tie-breaker
# when ranking the tools it offers the chat model (plus a "proven tools" line in
# its capability digest). Awareness only — execution stays consent-gated.
learning:
  capability_fitness:
    enabled: true                   # record outcomes + boost ranking + surface
    half_life_days: 30              # how long an observation keeps half its weight

# ── Observability ──────────────────────────────────────────────────────────────
# Runtime resource gauges. A single background task samples process RSS, CPU,
# open SQLite connections, and `~/.brain` disk usage; crossing a ceiling emits an
# edge-triggered `ResourcePressure` event onto the bus (visible in `brain tail`,
# `brain doctor --deep`, and `/status`). Ceilings are generous and fail-safe —
# set any threshold to 0 to disable it.
observability:
  resource_sample_secs: 30          # seconds between resource samples
  thresholds:
    rss_mb: 2048                    # resident-set-size ceiling (MiB)
    cpu_pct: 90.0                   # process CPU ceiling (percent, single-core basis)
    disk_mb: 10240                  # ~/.brain disk-usage ceiling (MiB)
    open_fds: 1024                  # open file-descriptor ceiling (count; fd-leak warning)
  log_sampling:
    high_volume_1_in_n: 1           # emit 1 in N high-volume log lines (heartbeat); 1 = log all

# ── Service health monitoring ──────────────────────────────────────────────────
# External endpoints to health-check. Each entry spawns one bounded background
# probe loop (HTTP GET or raw TCP connect). Probes are edge-triggered: a
# proactive notification fires only when a service crosses between reachable and
# unreachable — never once per interval while it stays in one state — and is
# delivered through the same router as resource-pressure alerts. Empty by default.
monitoring:
  services: []
  # - name: ollama                  # label used in the alert + triggered_by
  #   kind: http                    # http | tcp
  #   target: "http://localhost:11434/api/tags"   # URL (http) or host:port (tcp)
  #   interval_secs: 60             # seconds between probes
  #   timeout_secs: 10              # a probe over this window counts as down
  #   expect_status: 200            # http only; omit to accept any 2xx
  # - name: postgres
  #   kind: tcp
  #   target: "127.0.0.1:5432"
  #   interval_secs: 30

# ── Channel Relays ─────────────────────────────────────────────────────────────
# Bidirectional WebSocket gateways. Unlike webhooks these are long-lived
# connections — approval responses from any relay are correlated automatically.
channel:
  relays: []
  # - id: telegram
  #   label: "Telegram"
  #   url: "ws://127.0.0.1:7000/brain"
  #   namespace: "personal"
  #   api_key: ""
  #   initial_backoff_ms: 1000
  #   max_backoff_ms: 60000

# ── Agents ─────────────────────────────────────────────────────────────────────
# Specialist agents the orchestrator delegates multi-step tasks to.
agents:
  delegates: []
  fallbacks: []
  retry_on_timeout: true
  # Auto-discovery (default ON below) finds well-known CLI agents on $PATH
  # without needing manual entries. Use `delegates[]` for bespoke binaries
  # or non-standard invocation flags.
  # - name: script
  #   kind: subprocess
  #   binary: "/usr/local/bin/my-agent"
  #   args: ["--task", "{task_id}"]
  #   prompt_via_stdin: true
  #   tags: ["custom"]
  #
  # Per-agent overrides for the auto-discovered registry. Keyed by canonical
  # agent id (`claude_code`, `aider`, `cursor`, …). Every field is optional —
  # unset ones fall back to the fingerprint default.
  # discovery_overrides:
  #   claude_code:
  #     binary: "/opt/homebrew/bin/claude"          # pin path
  #     args: ["--print", "--task", "{task_id}"]    # override invocation
  #     prompt_via_stdin: true
  #     disabled: false
  #     capabilities:
  #       tags: ["code-edit", "plan", "rust"]
  #       languages: ["rust", "typescript"]
  #       max_concurrency: 2
  #       needs_network: true

# ── Access ─────────────────────────────────────────────────────────────────────
# A random key is generated on `brain init` and printed once to stdout.
access:
  api_keys: []
  # Per-client rate limiting (Issue 51). Keyed by API key for authenticated
  # routes; anonymous requests bypass and hit the auth wall instead.
  rate_limit:
    enabled: true
    tokens_per_refill: 60
    refill_interval_ms: 60000
    burst_capacity: 20

# ── Internal defaults (safe to leave unchanged) ────────────────────────────────
brain:
  version: "0.4.0"
  data_dir: "~/.brain"

storage:
  ruvector_path: "~/.brain/ruvector/"
  sqlite_path: "~/.brain/db/brain.db"
  hnsw:
    ef_construction: 200
    m: 16
    ef_search: 50
    # HNSW pre-allocates the index graph for max_elements up-front, so
    # this is a real memory cost. 100k covers personal-scale installs;
    # raise to 1_000_000+ if you're storing facts for a team or large
    # corpus. (Wave F, Issue 71.)
    max_elements: 100000