1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# ─── Brain OS — Configuration ─────────────────────────────────────────────────
# Generated by `brain init`. Edit to customise your setup.
# Env-var override for any key: BRAIN_<SECTION>__<KEY> (e.g. BRAIN_LLM__API_KEY)
# ── LLM Providers ──────────────────────────────────────────────────────────────
# Brain probes each entry at startup, picks the first reachable one, and
# automatically falls over to the next on rate-limit or error.
#
# kind: ollama | groq | openai | openrouter | deepseek | together | gemini-compat
llm:
temperature: 0.7
max_tokens: 4096
# The active model's input context window, in tokens. Drives how much
# file/attachment + memory content the prompt assembler packs in. Raise this
# to your model's real size (e.g. 32768, 128000) so large-window models read
# in detail instead of clipping to the conservative 8k default.
context_window: 8192
providers:
- name: ollama
kind: ollama
base_url: "http://localhost:11434"
model: "qwen2.5-coder:7b"
preferred_models:
# - name: groq
# kind: groq
# api_key: "gsk_..."
# model: "llama-3.3-70b-versatile"
# preferred_models: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]
# - name: openrouter
# kind: openrouter
# api_key: "sk-or-..."
# model: "meta-llama/llama-3.1-8b-instruct:free"
# preferred_models: ["meta-llama/llama-3.1-8b-instruct:free"]
# Legacy single-provider fallback — only used when providers list is empty.
provider: "ollama"
model: "qwen2.5-coder:7b"
base_url: "http://localhost:11434"
api_key: ""
# ── Embedding ──────────────────────────────────────────────────────────────────
# Run `ollama pull nomic-embed-text` before starting Brain.
# dimensions must match the model's actual output size exactly.
embedding:
model: "nomic-embed-text"
dimensions: 768
# ── Memory ─────────────────────────────────────────────────────────────────────
memory:
semantic:
similarity_threshold: 0.65
max_results: 20
search:
rrf_k: 60 # Reciprocal Rank Fusion constant
pre_fusion_limit: 50 # candidates fetched from each source (BM25, ANN) before fusion
importance_weight: 0.3 # weight for importance in final reranking
recency_weight: 0.2 # weight for recency in final reranking
decay_rate: 0.01 # forgetting-curve decay rate (higher = faster forgetting)
consolidation:
enabled: true
interval_hours: 24
forgetting_threshold: 0.05
# ── Encryption ─────────────────────────────────────────────────────────────────
# Run `brain init --encrypt` to generate a salt and enable at-rest encryption.
encryption:
enabled: false
# ── Security ───────────────────────────────────────────────────────────────────
security:
# Binaries the sandbox is allowed to execute. The list is intentionally
# narrow — read-only inspection plus the toolchain. To run anything else
# (docker, brew, ssh, custom scripts), add it here explicitly.
exec_allowlist:
exec_timeout_seconds: 30
# Roots that read-only filesystem inspection (project_inspect) is
# allowed to touch. Empty defaults to $HOME; set explicit entries
# like ["~/code", "~/work"] to restrict further. Paths outside any
# allowed root — after canonicalization — are rejected.
allowed_paths:
# ── Actions ────────────────────────────────────────────────────────────────────
actions:
web_search:
# On by default. The "duckduckgo" provider is a zero-config built-in
# that works without Docker or an API key — basic quality, but always
# available. Switch to "searxng" (run `brain deps up` first) for the
# best results, or "tavily" with an API key for a hosted option.
enabled: true
provider: "duckduckgo" # duckduckgo | searxng | tavily | custom
endpoint: "http://localhost:8888" # used by searxng/custom only
api_key: "" # required for tavily
timeout_ms: 3000
default_top_k: 5
scheduling:
enabled: false # WRITE axis: lets Brain create/persist
# scheduled intents. Firing them is the
# FIRE axis — see reflex.cron below.
mode: "persist_only"
messaging:
enabled: false
timeout_ms: 3000
channels:
# Webhook channel example — works for Discord, Telegram, Slack, or any HTTP endpoint.
# Template vars: {{channel}} {{recipient}} {{content}} {{namespace}} {{timestamp}}
#
# discord:
# url: "https://discord.com/api/webhooks/<ID>/<TOKEN>"
# body: '{"content": "{{content}}"}'
# headers: {}
# telegram:
# url: "https://api.telegram.org/bot<TOKEN>/sendMessage"
# body: '{"chat_id": "<CHAT_ID>", "text": "{{content}}", "parse_mode": "Markdown"}'
# headers: {}
resilience:
max_retries: 2
retry_base_ms: 500
circuit_breaker_threshold: 5
circuit_breaker_cooldown_secs: 60
# ── Proactivity ────────────────────────────────────────────────────────────────
proactivity:
enabled: true
max_per_day: 2
min_interval_minutes: 60
quiet_hours:
start: "20:00"
end: "10:00"
timezone: "UTC" # IANA timezone, e.g. "America/New_York"
delivery:
outbox: true
broadcast: true
webhook_channels: # channel keys from actions.messaging.channels
max_outbox_age_days: 7
open_loop:
enabled: true
scan_window_hours: 72
resolution_window_hours: 24
check_interval_minutes: 120
# ── Adapters ───────────────────────────────────────────────────────────────────
adapters:
http:
enabled: true
host: "127.0.0.1"
port: 19789
cors: true
ws:
enabled: true
port: 19790
mcp:
enabled: true
port: 19791
grpc:
enabled: true
port: 19792
terminal:
enabled: true
port: 19793
# ── Reactive signal sources ────────────────────────────────────────────────────
# Default is empty — no reflex tasks spawn unless you configure them here.
# Each firing becomes a Signal with Provenance::Reflex { trigger, ts } and
# flows through the normal pipeline (identity, confirmation, dispatch).
reflex:
fs: # watchers; one entry per path set
# Example:
# fs:
# - name: project-watch
# paths: ["~/Developer/workspace/brain"]
# recursive: true
# debounce_ms: 200
cron:
enabled: false # FIRE axis: fires due scheduled_intents
# through the pipeline. Required for
# actions.scheduling intents to ever run.
poll_interval_seconds: 60
sys:
enabled: false # edge-triggered system state
poll_interval_seconds: 30
rules:
# Example rules (uncomment to enable):
# rules:
# - kind: battery_below
# threshold: 20
# - kind: network_changed
# ── Logging ────────────────────────────────────────────────────────────────────
# Drives the tracing subscriber. `RUST_LOG` still overrides the computed filter
# at runtime. Long-running services (`serve`, `mcp`) log to a rotating file at
# ~/.brain/logs/brain.log; one-shot commands log to stderr.
logging:
level: "info" # base level for the `brain` target
format: "pretty" # "pretty" (human) or "json" (structured)
rotation: "daily" # "daily" | "hourly" | "never" (file rotation)
targets: # per-subsystem overrides, e.g.:
# targets:
# hippocampus: "debug"
# signal: "info"
# ── Learned self-model ─────────────────────────────────────────────────────────
# Capability fitness: Brain records whether each tool succeeds or fails, decays
# those observations under the forgetting curve, and uses them as a tie-breaker
# when ranking the tools it offers the chat model (plus a "proven tools" line in
# its capability digest). Awareness only — execution stays consent-gated.
learning:
capability_fitness:
enabled: true # record outcomes + boost ranking + surface
half_life_days: 30 # how long an observation keeps half its weight
# ── Observability ──────────────────────────────────────────────────────────────
# Runtime resource gauges. A single background task samples process RSS, CPU,
# open SQLite connections, and `~/.brain` disk usage; crossing a ceiling emits an
# edge-triggered `ResourcePressure` event onto the bus (visible in `brain tail`,
# `brain doctor --deep`, and `/status`). Ceilings are generous and fail-safe —
# set any threshold to 0 to disable it.
observability:
resource_sample_secs: 30 # seconds between resource samples
thresholds:
rss_mb: 2048 # resident-set-size ceiling (MiB)
cpu_pct: 90.0 # process CPU ceiling (percent, single-core basis)
disk_mb: 10240 # ~/.brain disk-usage ceiling (MiB)
open_fds: 1024 # open file-descriptor ceiling (count; fd-leak warning)
log_sampling:
high_volume_1_in_n: 1 # emit 1 in N high-volume log lines (heartbeat); 1 = log all
# ── Service health monitoring ──────────────────────────────────────────────────
# External endpoints to health-check. Each entry spawns one bounded background
# probe loop (HTTP GET or raw TCP connect). Probes are edge-triggered: a
# proactive notification fires only when a service crosses between reachable and
# unreachable — never once per interval while it stays in one state — and is
# delivered through the same router as resource-pressure alerts. Empty by default.
monitoring:
services:
# - name: ollama # label used in the alert + triggered_by
# kind: http # http | tcp
# target: "http://localhost:11434/api/tags" # URL (http) or host:port (tcp)
# interval_secs: 60 # seconds between probes
# timeout_secs: 10 # a probe over this window counts as down
# expect_status: 200 # http only; omit to accept any 2xx
# - name: postgres
# kind: tcp
# target: "127.0.0.1:5432"
# interval_secs: 30
# ── Channel Relays ─────────────────────────────────────────────────────────────
# Bidirectional WebSocket gateways. Unlike webhooks these are long-lived
# connections — approval responses from any relay are correlated automatically.
channel:
relays:
# - id: telegram
# label: "Telegram"
# url: "ws://127.0.0.1:7000/brain"
# namespace: "personal"
# api_key: ""
# initial_backoff_ms: 1000
# max_backoff_ms: 60000
# ── Agents ─────────────────────────────────────────────────────────────────────
# Specialist agents the orchestrator delegates multi-step tasks to.
agents:
delegates:
fallbacks:
retry_on_timeout: true
# Auto-discovery (default ON below) finds well-known CLI agents on $PATH
# without needing manual entries. Use `delegates[]` for bespoke binaries
# or non-standard invocation flags.
# - name: script
# kind: subprocess
# binary: "/usr/local/bin/my-agent"
# args: ["--task", "{task_id}"]
# prompt_via_stdin: true
# tags: ["custom"]
#
# Per-agent overrides for the auto-discovered registry. Keyed by canonical
# agent id (`claude_code`, `aider`, `cursor`, …). Every field is optional —
# unset ones fall back to the fingerprint default.
# discovery_overrides:
# claude_code:
# binary: "/opt/homebrew/bin/claude" # pin path
# args: ["--print", "--task", "{task_id}"] # override invocation
# prompt_via_stdin: true
# disabled: false
# capabilities:
# tags: ["code-edit", "plan", "rust"]
# languages: ["rust", "typescript"]
# max_concurrency: 2
# needs_network: true
# ── Access ─────────────────────────────────────────────────────────────────────
# A random key is generated on `brain init` and printed once to stdout.
access:
api_keys:
# Per-client rate limiting (Issue 51). Keyed by API key for authenticated
# routes; anonymous requests bypass and hit the auth wall instead.
rate_limit:
enabled: true
tokens_per_refill: 60
refill_interval_ms: 60000
burst_capacity: 20
# ── Internal defaults (safe to leave unchanged) ────────────────────────────────
brain:
version: "0.4.0"
data_dir: "~/.brain"
storage:
ruvector_path: "~/.brain/ruvector/"
sqlite_path: "~/.brain/db/brain.db"
hnsw:
ef_construction: 200
m: 16
ef_search: 50
# HNSW pre-allocates the index graph for max_elements up-front, so
# this is a real memory cost. 100k covers personal-scale installs;
# raise to 1_000_000+ if you're storing facts for a team or large
# corpus. (Wave F, Issue 71.)
max_elements: 100000