netsky-core 0.1.7

netsky core: agent model, prompt loader, spawner, config
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
//! Canonical string constants. If a string literal appears twice in the
//! code base and carries semantic meaning, it belongs here.

// ---- identity ---------------------------------------------------------------

/// Name of the tmux session and agent id for the root orchestrator.
pub const AGENT0_NAME: &str = "agent0";
/// Name of the tmux session and agent id for the watchdog.
pub const AGENTINFINITY_NAME: &str = "agentinfinity";
/// Prefix applied to clone sessions: `agent<N>` where N > 0.
pub const CLONE_PREFIX: &str = "agent";

// ---- env vars ---------------------------------------------------------------

/// Env var carrying the agent number. Read by skills to route notes + sessions.
pub const ENV_AGENT_N: &str = "AGENT_N";
/// Env var carrying the PATH to a file holding the fully-rendered system
/// prompt. The shell tmux runs expands `$(cat "$NETSKY_PROMPT_FILE")` at
/// exec time. We pass a path (~100 bytes) instead of the 20KB+ prompt
/// content through `tmux new-session -e` because tmux's internal command
/// parser rejects oversized argv elements with "command too long" — the
/// bug that took the constellation down in session-11.
pub const ENV_NETSKY_PROMPT_FILE: &str = "NETSKY_PROMPT_FILE";
pub const ENV_CODEX_CHANNEL_DIR: &str = "CODEX_CHANNEL_DIR";

// ---- MCP servers (names used in per-agent mcp-config.json) ------------------

pub const MCP_SERVER_AGENT: &str = "agent";
pub const MCP_SERVER_IMESSAGE: &str = "imessage";

// ---- paths (relative to $HOME unless absolute) ------------------------------

/// State directory outside the macOS /tmp reaper window.
pub const STATE_DIR: &str = ".netsky/state";
/// Durable logs directory. Backend-agnostic JSONL event streams land
/// here (watchdog events, future meta-db error spool) so forensics
/// survive meta.db outages and macOS /tmp reaping.
pub const LOGS_SUBDIR: &str = ".netsky/logs";
/// Subdirectory under state-dir holding per-agent system-prompt files.
/// One file per agent, atomically overwritten on each spawn.
pub const PROMPTS_SUBDIR: &str = ".netsky/state/prompts";
/// Subdirectory under state-dir holding crash-handoff drafts written by
/// the watchdog on crash-recovery. Kept under the durable state dir so
/// the macOS /tmp reaper does not eat the forensic trail after ~3 days
/// of a handoff never being consumed.
pub const CRASH_HANDOFFS_SUBDIR: &str = ".netsky/state/crash-handoffs";
/// Filename prefix for crash-handoff drafts. Full name is
/// `<prefix><pid><suffix>` under [`CRASH_HANDOFFS_SUBDIR`]. The `$TMPDIR`
/// version of the same prefix is swept by the one-time migration at
/// watchdog startup.
pub const CRASH_HANDOFF_FILENAME_PREFIX: &str = "netsky-crash-handoff.";
pub const CRASH_HANDOFF_FILENAME_SUFFIX: &str = ".txt";
/// Readiness marker written by agentinfinity as its final startup step.
pub const AGENTINFINITY_READY_MARKER: &str = ".netsky/state/agentinfinity-ready";
/// Marker file written when agentinit fails repeatedly.
pub const AGENTINIT_ESCALATION_MARKER: &str = ".netsky/state/agentinit-escalation";
/// Per-session resume file refreshed by agent0 before a planned restart.
pub const LOOP_RESUME_FILE: &str = ".netsky/state/netsky-loop-resume.txt";
/// Watchdog-driven tmux ticker session name.
pub const TICKER_SESSION: &str = "netsky-ticker";
/// Watchdog gap threshold. If the watchdog log has not advanced in
/// this long, the next tick records a durable ticker-stopped event.
pub const WATCHDOG_TICK_GAP_WARN_S: u64 = 300;
/// Watchdog escalation threshold for a stalled tick driver.
pub const WATCHDOG_TICK_GAP_ESCALATE_S: u64 = 600;
/// How long a detached restart may remain unverified before the
/// watchdog marks it failed and pages the owner.
pub const WATCHDOG_RESTART_VERIFY_WINDOW_S: u64 = 180;
/// Handoff archive directory written by `netsky restart` alongside the
/// inbox delivery. Durable record of every handoff to agent0.
pub const HANDOFF_ARCHIVE_SUBDIR: &str = "Library/Logs/netsky-handoffs";
/// Planned-restart request file claimed by the watchdog.
pub const RESTART_REQUEST_FILE: &str = "/tmp/netsky-restart-request.txt";
/// In-flight restart sentinel.
pub const RESTART_PROCESSING_FILE: &str = "/tmp/netsky-restart-processing.txt";

// ---- claude CLI flags (passed verbatim) -------------------------------------

pub const CLAUDE: &str = "claude";
pub const CLAUDE_FLAG_MODEL: &str = "--model";
pub const CLAUDE_FLAG_EFFORT: &str = "--effort";
pub const CLAUDE_FLAG_ALLOWED_TOOLS: &str = "--allowed-tools";
pub const CLAUDE_FLAG_DISALLOWED_TOOLS: &str = "--disallowed-tools";
pub const CLAUDE_FLAG_DANGEROUSLY_SKIP_PERMISSIONS: &str = "--dangerously-skip-permissions";
pub const CLAUDE_FLAG_PERMISSION_MODE: &str = "--permission-mode";
pub const CLAUDE_FLAG_MCP_CONFIG: &str = "--mcp-config";
pub const CLAUDE_FLAG_STRICT_MCP_CONFIG: &str = "--strict-mcp-config";
pub const CLAUDE_FLAG_APPEND_SYSTEM_PROMPT: &str = "--append-system-prompt";
pub const CLAUDE_FLAG_LOAD_DEV_CHANNELS: &str = "--dangerously-load-development-channels";

/// Default model for spawned agents. Overridable via `AGENT_MODEL` env.
pub const DEFAULT_MODEL: &str = "opus[1m]";
/// Default effort level for clones + agent0. agentinfinity overrides to "medium".
pub const DEFAULT_EFFORT: &str = "high";
pub const AGENTINFINITY_EFFORT: &str = "medium";
/// Default clone count for `netsky up` (agent0 + this many clones).
/// 0 means "agent0 + agentinfinity only" — clones spawn on-demand via
/// `netsky agent <N>`. Pre-warming a constellation stays explicit
/// (`netsky up 8`). Idle clones were burning tokens on /up + /down +
/// /notes without ever executing a brief; lazy spawn keeps the bus
/// cheap and matches the "use clones heavily, not always-on" policy.
pub const DEFAULT_CLONE_COUNT: u32 = 0;

// ---- cwd addendum filenames (relative to invocation cwd) --------------------

/// cwd addendum loaded for agent0 on top of the baked base prompt.
pub const CWD_ADDENDUM_AGENT0: &str = "0.md";
/// cwd addendum loaded for agentinfinity on top of the baked base prompt.
pub const CWD_ADDENDUM_AGENTINFINITY: &str = "agentinfinity.md";
/// cwd addendum template for clone N: `N.md` where N > 0.
pub const CWD_ADDENDUM_CLONE_EXT: &str = ".md";

// ---- model + effort overrides ----------------------------------------------

pub const ENV_AGENT_MODEL_OVERRIDE: &str = "AGENT_MODEL";
pub const ENV_AGENT_EFFORT_OVERRIDE: &str = "AGENT_EFFORT";

// ---- dependencies on PATH --------------------------------------------------

pub const NETSKY_IO_BIN: &str = "netsky";
pub const TMUX_BIN: &str = "tmux";

// ---- claude tool + channel lists -------------------------------------------

/// Tools agent0 + clones expose. Clone-specific injection guards live in
/// the clone stanza + `.agents/skills/spawn/SKILL.md`. All MCP reply /
/// query / mutation tools across every netsky-io source are allowlisted
/// unconditionally here; per-agent gating (who actually sees which
/// channel's inbound events) is enforced by the dev-channel flag set in
/// `runtime::claude::build_command`, not by this list. The parity test
/// `allowed_tools_agent_subset_of_agentinfinity` pins this set as a
/// subset of [`ALLOWED_TOOLS_AGENTINFINITY`] so the watchdog never
/// regresses to a narrower allowlist than the primary agent.
pub const ALLOWED_TOOLS_AGENT: &str = "Bash,CronCreate,CronDelete,CronList,Edit,Glob,Grep,Monitor,Read,Skill,TaskCreate,TaskGet,TaskList,TaskStop,TaskUpdate,Write,mcp__imessage__reply,mcp__agent__reply,mcp__email__reply,mcp__email__list_messages,mcp__email__read_message,mcp__email__create_draft,mcp__email__send_draft,mcp__email__list_drafts,mcp__email__archive_message,mcp__email__trash_message,mcp__calendar__list_calendars,mcp__calendar__list_events,mcp__calendar__get_event,mcp__calendar__create_event,mcp__calendar__delete_event,mcp__drive__list_files,mcp__drive__get_file,mcp__drive__download_file,mcp__drive__upload_file,mcp__drive__create_folder,mcp__drive__delete_file,mcp__drive__list_trash,mcp__drive__move_file,mcp__drive__rename_file,mcp__drive__copy_file,mcp__drive__share_file,mcp__drive__list_permissions,mcp__iroh__iroh_send,mcp__tasks__list_tasks,mcp__tasks__create_task,mcp__tasks__complete_task,mcp__tasks__delete_task";
/// Tools the watchdog exposes. Must be a superset of
/// [`ALLOWED_TOOLS_AGENT`] — agentinfinity acts as a backstop for any
/// tool the primary agents can invoke. No task/cron tools: agentinfinity
/// does not orchestrate. WebFetch + WebSearch are agentinfinity-only
/// (needed for meta-docs + repair research).
pub const ALLOWED_TOOLS_AGENTINFINITY: &str = "Bash,CronCreate,CronDelete,CronList,Edit,Glob,Grep,Monitor,Read,Skill,TaskCreate,TaskGet,TaskList,TaskStop,TaskUpdate,WebFetch,WebSearch,Write,mcp__imessage__reply,mcp__agent__reply,mcp__email__reply,mcp__email__list_messages,mcp__email__read_message,mcp__email__create_draft,mcp__email__send_draft,mcp__email__list_drafts,mcp__email__archive_message,mcp__email__trash_message,mcp__calendar__list_calendars,mcp__calendar__list_events,mcp__calendar__get_event,mcp__calendar__create_event,mcp__calendar__delete_event,mcp__drive__list_files,mcp__drive__get_file,mcp__drive__download_file,mcp__drive__upload_file,mcp__drive__create_folder,mcp__drive__delete_file,mcp__drive__list_trash,mcp__drive__move_file,mcp__drive__rename_file,mcp__drive__copy_file,mcp__drive__share_file,mcp__drive__list_permissions,mcp__iroh__iroh_send,mcp__tasks__list_tasks,mcp__tasks__create_task,mcp__tasks__complete_task,mcp__tasks__delete_task";

/// Tools explicitly denied for agent0, clones, and agentinfinity. The
/// `Agent` tool is reserved for bounded subsystems spawned via `/spawn`;
/// top-level agents delegate concurrent work to clones over the bus,
/// never by spinning up anonymous subagents inside their own context.
/// Passed via `--disallowed-tools` for defense-in-depth (bypass mode
/// may otherwise open tools absent from the allowlist).
pub const DISALLOWED_TOOLS: &str = "Agent";

/// `--permission-mode` value used across the board.
pub const PERMISSION_MODE_BYPASS: &str = "bypassPermissions";

/// Dev-channel identifiers passed to `--dangerously-load-development-channels`.
pub const DEV_CHANNEL_AGENT: &str = "server:agent";
pub const DEV_CHANNEL_IMESSAGE: &str = "server:imessage";

// ---- per-agent MCP config layout -------------------------------------------

/// Subdirectory of $HOME holding per-agent mcp-config.json files.
/// Claude reads `~/.claude/channels/agent/<agent-name>/mcp-config.json`
/// when launched with `--mcp-config` pointing into it.
pub const MCP_CHANNEL_DIR_PREFIX: &str = ".claude/channels/agent";
pub const MCP_CONFIG_FILENAME: &str = "mcp-config.json";

// ---- agentinit (bootstrap helper) ------------------------------------------

/// Haiku pin for agentinit. Fast cold-start, cheap, no orchestration needs.
/// If deprecated, this pin breaks loudly at the next tick — intentional.
pub const AGENTINIT_MODEL: &str = "claude-haiku-4-5-20251001";
pub const AGENTINIT_EFFORT: &str = "low";
pub const AGENTINIT_ALLOWED_TOOLS: &str = "Bash,Read";
/// `-p` flag for non-interactive claude output.
pub const CLAUDE_FLAG_PRINT: &str = "-p";
/// Ceiling on a single `agentinit` claude-haiku invocation. Held under
/// the watchdog's D1 lock, so unbounded waits cascade the same way
/// escalate does. 90s accommodates a cold start + a slow turn; if we
/// exceed it the agentinit-failure counter handles it.
pub const AGENTINIT_TIMEOUT_S: u64 = 90;

// ---- netsky binary name (PATH lookup) -------------------------------------

pub const NETSKY_BIN: &str = "netsky";

// ---- canonical source-checkout root (NETSKY_DIR resolution) ---------------

/// Env var that pins the netsky source-checkout root. When set, takes
/// precedence over the `$HOME/netsky` default; lets the owner relocate
/// the checkout (e.g. `~/code/netsky`) without forking the binary. Read
/// by [`paths::resolve_netsky_dir`] and passed through to launchd-spawned
/// subprocesses so the watchdog tick agrees with the interactive shell.
pub const ENV_NETSKY_DIR: &str = "NETSKY_DIR";

/// Default location of the netsky source checkout, relative to `$HOME`.
/// `$HOME/netsky` is the canonical convention referenced from
/// `ONBOARDING.md`, `bin/onboard`, the launchd plist baker, and every
/// skill that assumes a stable cwd.
pub const NETSKY_DIR_DEFAULT_SUBDIR: &str = "netsky";
/// Binary-mode state root, relative to `$HOME`.
///
/// When no checkout is found, the CLI falls back to `~/.netsky` and
/// stores prompts, addenda, notes, and state there.
pub const NETSKY_STATE_DIR: &str = ".netsky";

// ---- launchd -----------------------------------------------------------------

pub const LAUNCHD_LABEL: &str = "dev.dkdc.netsky-watchdog";
pub const LAUNCHD_PLIST_SUBDIR: &str = "Library/LaunchAgents";
pub const LAUNCHD_STDOUT_LOG: &str = "/tmp/netsky-watchdog.out.log";
pub const LAUNCHD_STDERR_LOG: &str = "/tmp/netsky-watchdog.err.log";
pub const LAUNCHD_BOOTSTRAP_ERR: &str = "/tmp/netsky-launchd-bootstrap.err";
/// Watchdog cadence in seconds. macOS pauses StartInterval during sleep.
pub const LAUNCHD_INTERVAL_S: u32 = 120;
/// PATH baked into the LaunchAgent env. Includes `$HOME/.local/bin`
/// substitution marker `<<HOME>>` replaced at install time.
pub const LAUNCHD_JOB_PATH_TEMPLATE: &str =
    "<<HOME>>/.local/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin";

// ---- tick driver -----------------------------------------------------------

/// Default ticker interval. Override via [`ENV_TICKER_INTERVAL`].
pub const TICKER_INTERVAL_DEFAULT_S: u64 = 60;
pub const ENV_TICKER_INTERVAL: &str = "NETSKY_TICKER_INTERVAL_S";
pub const TICKER_LOG_PATH: &str = "/tmp/netsky-watchdog.out.log";
/// Rotate the watchdog log when its size exceeds this. The rotated
/// file is renamed to `TICKER_LOG_PATH.1`, overwriting any prior
/// rotation. One generation is enough: doctor/morning only read the
/// live file, and `.1` is available for forensics.
pub const TICKER_LOG_ROTATE_BYTES: u64 = 5 * 1024 * 1024;
/// Config file holding the agent0 status-tick interval, written by
/// `netsky tick enable <secs>`. Absence = ticks disabled.
pub const TICK_INTERVAL_CONFIG: &str = "/tmp/netsky-tick-interval-s";
/// Marker file touched on each successful tick-request drop; used to
/// gate interval enforcement.
pub const TICK_LAST_MARKER: &str = "/tmp/netsky-last-tick";
/// Floor on status-tick interval. Below this = spam, rejected.
pub const TICK_MIN_INTERVAL_S: u64 = 60;

/// agent0 channel inbox, relative to `$HOME`. Envelopes written here
/// are surfaced by netsky-io's agent poll loop.
pub const AGENT0_INBOX_SUBDIR: &str = ".claude/channels/agent/agent0/inbox";

// ---- watchdog-tick tunables -----------------------------------------------

/// Watchdog D1 lock dir. `mkdir` is atomic on posix. The holder writes
/// its PID to `WATCHDOG_LOCK_DIR/pid`; the next tick checks that PID
/// with `kill -0` and force-releases only if the holder is dead. The
/// stale-age threshold is a last-resort fallback for legacy locks with
/// no PID file, sized to exceed the worst-case restart time.
pub const WATCHDOG_LOCK_DIR: &str = "/tmp/netsky-watchdog.lock";
pub const WATCHDOG_LOCK_PID_FILE: &str = "pid";
/// Upper bound on a legitimate tick. Covers a restart with 8 clones at
/// 120s /up-wait each (~1100s) plus margin. Legacy locks older than
/// this with no PID file are force-removed.
pub const WATCHDOG_LOCK_STALE_S: u64 = 1500;
/// Archive stale `.processing` files older than this (D2).
pub const RESTART_PROCESSING_STALE_S: u64 = 600;
/// Warn if /tmp partition has less than this many MB free (C5).
pub const DISK_MIN_MB_DEFAULT: u64 = 500;
pub const ENV_DISK_MIN_MB: &str = "NETSKY_DISK_MIN_MB";

/// agentinit failure sliding-window state file (E2).
pub const AGENTINIT_FAILURES_FILE: &str = ".netsky/state/agentinit-failures";
pub const AGENTINIT_WINDOW_S_DEFAULT: u64 = 600;
pub const AGENTINIT_THRESHOLD_DEFAULT: u64 = 3;
pub const ENV_AGENTINIT_WINDOW_S: &str = "NETSKY_AGENTINIT_WINDOW_S";
pub const ENV_AGENTINIT_THRESHOLD: &str = "NETSKY_AGENTINIT_THRESHOLD";

/// B3 hang-detection state.
pub const AGENT0_PANE_HASH_FILE: &str = ".netsky/state/agent0-pane-hash";
pub const AGENT0_HANG_MARKER: &str = ".netsky/state/agent0-hang-suspected";
pub const AGENT0_HANG_PAGED_MARKER: &str = ".netsky/state/agent0-hang-paged";

/// P0-1 crashloop-detection state. Newline-delimited unix ts of restart
/// attempts, pruned to a 600s sliding window. Paired with the crashloop
/// marker (written once N attempts accumulate) + the restart-status
/// subdir (P0-2) which captures the last-known restart error for the
/// marker body + escalation page.
pub const AGENT0_RESTART_ATTEMPTS_FILE: &str = ".netsky/state/agent0-restart-attempts";
pub const AGENT0_CRASHLOOP_MARKER: &str = ".netsky/state/agent0-crashloop-suspected";
pub const AGENT0_CRASHLOOP_WINDOW_S_DEFAULT: u64 = 600;
pub const AGENT0_CRASHLOOP_THRESHOLD_DEFAULT: u64 = 3;
pub const ENV_AGENT0_CRASHLOOP_WINDOW_S: &str = "NETSKY_AGENT0_CRASHLOOP_WINDOW_S";
pub const ENV_AGENT0_CRASHLOOP_THRESHOLD: &str = "NETSKY_AGENT0_CRASHLOOP_THRESHOLD";

/// P0-2 restart-child status subdir. The detached `netsky restart`
/// subprocess writes one status file per invocation at known phase
/// transitions (spawned / up-detected / errored). The next watchdog
/// tick reads the most-recent file to feed the crashloop detector's
/// marker body + escalation page with the actual failure cause.
pub const RESTART_STATUS_SUBDIR: &str = ".netsky/state/restart-status";
/// Max status files retained after each write. Older entries pruned by
/// mtime. Mirrors the handoff-archive prune pattern in restart.rs.
pub const RESTART_STATUS_KEEP: usize = 20;
pub const AGENT0_HANG_S_DEFAULT: u64 = 1800;
pub const AGENT0_HANG_REPAGE_S_DEFAULT: u64 = 21600;
pub const ENV_AGENT0_HANG_S: &str = "NETSKY_AGENT0_HANG_S";
pub const ENV_AGENT0_HANG_REPAGE_S: &str = "NETSKY_AGENT0_HANG_REPAGE_S";
pub const ENV_HANG_DETECT: &str = "NETSKY_HANG_DETECT";

/// Quiet-sentinel prefix. A file `agent0-quiet-until-<epoch>` in the
/// state dir suppresses hang detection while `<epoch>` is in the future.
/// Written by `netsky quiet <seconds>` before a legit long nap or a
/// /loop stop; read by the watchdog tick. Past-epoch files are reaped
/// by the reader so they self-clean.
pub const AGENT0_QUIET_UNTIL_PREFIX: &str = "agent0-quiet-until-";

/// Archived `/tmp/netsky-restart-processing.txt` forensic records land in
/// [`RESTART_ARCHIVE_SUBDIR`] under `<prefix><stamp><suffix>`. Filenames
/// only — the directory comes from the paths helper.
pub const RESTART_PROCESSING_ARCHIVE_FILENAME_PREFIX: &str = "netsky-restart-processing.";
pub const RESTART_PROCESSING_ARCHIVE_FILENAME_SUFFIX: &str = ".archived";

/// Durable home for restart-related forensic artifacts: the detached
/// restart log + archived stale-processing files. Out of the macOS /tmp
/// reaper window so post-mortem traces survive reboots.
pub const RESTART_ARCHIVE_SUBDIR: &str = ".netsky/state/restart-archive";

/// Default TTL for entries in [`RESTART_ARCHIVE_SUBDIR`]. The sweep
/// preflight deletes files older than this on every tick. 30 days
/// matches the `find -mtime +30` guidance in the audit brief.
pub const RESTART_ARCHIVE_TTL_S_DEFAULT: u64 = 30 * 24 * 60 * 60;
pub const ENV_RESTART_ARCHIVE_TTL_S: &str = "NETSKY_RESTART_ARCHIVE_TTL_S";

/// In-flight marker for a detached `netsky restart` subprocess. The
/// watchdog tick writes `<pid>\n<iso-ts>\n` here after spawning the
/// detached restart, then releases its own lock. Subsequent ticks read
/// this file and, if the pid is still alive, skip their own
/// mode-switch body — the restart is already in hand, and running it
/// again would race with clone teardown.
pub const RESTART_INFLIGHT_FILE: &str = "/tmp/netsky-restart-inflight";
/// Consecutive-miss counter for the ticker tmux session. When the
/// ticker disappears, the watchdog increments this state so the second
/// consecutive miss can self-heal instead of requiring manual start.
pub const TICKER_MISSING_COUNT_FILE: &str = ".netsky/state/netsky-ticker-missing-count";
/// Hard ceiling on a detached restart's runtime before the in-flight
/// marker is treated as stale and removed. A legitimate restart should
/// finish in <20min even with 8 pathologically slow clones; anything
/// beyond is a stuck subprocess and the next tick should take over.
pub const RESTART_INFLIGHT_STALE_S: u64 = 1800;
/// Filename of the detached restart subprocess stdout+stderr log under
/// [`RESTART_ARCHIVE_SUBDIR`]. Captures what used to print directly to
/// the tick's stdout so post-mortem debugging still has it. Resolved to
/// a full path via `paths::restart_detached_log_path()`.
pub const RESTART_DETACHED_LOG_FILENAME: &str = "netsky-restart-detached.log";

/// Default clone-count fed into `netsky restart` by the watchdog.
/// Aliased to [`DEFAULT_CLONE_COUNT`] so tuning one tunes the other —
/// the two carry the same contract and drifted silently before.
pub const WATCHDOG_RESTART_CLONE_COUNT: u32 = DEFAULT_CLONE_COUNT;

// ---- owner identity (template substitutions + escalate) -------------------

/// Display name for the owner, substituted into prompt templates that
/// address the owner by name (currently `prompts/tick-request.md`).
/// Defaults to a system-neutral phrase so a fresh deployment works
/// without any env wiring; set `NETSKY_OWNER_NAME` in the per-deployment
/// environment to personalize.
pub const OWNER_NAME_DEFAULT: &str = "the owner";
pub const ENV_OWNER_NAME: &str = "NETSKY_OWNER_NAME";

// ---- escalate (iMessage floor page) ---------------------------------------

pub const ENV_OWNER_IMESSAGE: &str = "NETSKY_OWNER_IMESSAGE";
pub const ESCALATE_ERR_FILE: &str = "/tmp/netsky-escalate.err";
pub const OSASCRIPT_BIN: &str = "osascript";
/// Ceiling on osascript execution. Messages.app can hang on modal
/// dialogs or a stuck iMessage sync; escalate runs under the watchdog's
/// D1 lock, so an unbounded wait cascades into concurrent watchdog
/// ticks. 15s is generous for a one-shot AppleScript send.
pub const ESCALATE_TIMEOUT_S: u64 = 15;
/// Backoff between the first osascript attempt and the retry. One-shot
/// AppleScript sends hit transient Messages.app sync stalls; a 1s pause
/// lets the sync settle without pushing total wall-time past the
/// watchdog's D1 lock budget (2 * timeout + backoff < mode-switch).
pub const ESCALATE_RETRY_BACKOFF_MS: u64 = 1000;
/// Filename prefix for the durable failure marker written when both
/// escalate attempts fall over. Full name is `escalate-failed-<ts>`
/// under `state_dir()`. Surfaced by `netsky doctor` in a later pass.
pub const ESCALATE_FAILED_MARKER_PREFIX: &str = "escalate-failed-";

// ---- restart (constellation respawn) --------------------------------------

pub const RESTART_AGENT0_TOS_WAIT_S: u64 = 30;
pub const RESTART_AGENT0_UP_WAIT_S: u64 = 90;
pub const RESTART_TEARDOWN_SETTLE_MS: u64 = 2000;
pub const RESTART_TOS_PROBE: &str = "I am using this for local development";
pub const RESTART_UP_DONE_REGEX: &str = r"session \d+";
pub const HANDOFF_FROM: &str = "agentinfinity";
pub const ENV_HANDOFF_KEEP: &str = "NETSKY_HANDOFF_KEEP";
pub const HANDOFF_KEEP_DEFAULT: usize = 100;

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::BTreeSet;

    fn tool_set(raw: &str) -> BTreeSet<&str> {
        raw.split(',')
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .collect()
    }

    /// Discover every MCP tool a netsky-io source registers by scanning
    /// the source file for `Tool::new("<name>", ...)` literals and for
    /// the `.on_reply(` handler (which registers an implicit "reply"
    /// tool on sources that don't use `Tool::new` — agent + imessage).
    /// Parsing is deliberately string-based (no regex, no syn): current
    /// formatting across all sources is `Tool::new(\n    "<name>",`,
    /// and inline `Tool::new("<name>", ...)` is equally matched. A new
    /// source that spreads `Tool::new` across files other than its
    /// entrypoint would miss detections here — the failure mode is
    /// under-counting (test passes when it shouldn't), caught at the
    /// next allowlist-drift incident.
    fn discover_source_tools(src: &str) -> Vec<String> {
        let mut tools: Vec<String> = Vec::new();
        let mut seen: BTreeSet<String> = BTreeSet::new();
        for segment in src.split("Tool::new(").skip(1) {
            let Some(open) = segment.find('"') else {
                continue;
            };
            let rest = &segment[open + 1..];
            let Some(close) = rest.find('"') else {
                continue;
            };
            let name = &rest[..close];
            if !name.is_empty()
                && name
                    .chars()
                    .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
                && seen.insert(name.to_string())
            {
                tools.push(name.to_string());
            }
        }
        if src.contains(".on_reply(") && seen.insert("reply".to_string()) {
            tools.push("reply".to_string());
        }
        tools
    }

    #[test]
    fn allowed_tools_agent_subset_of_agentinfinity() {
        // Invariant: agentinfinity is a strict superset of the primary
        // agent toolset. The watchdog acts as a backstop — it must never
        // lack a tool the main agents can invoke. Regressions here have
        // shipped silently before (session 5: ALLOWED_TOOLS_AGENT omitted
        // both reply tools while AGENTINFINITY had them, and nobody
        // noticed until bus traffic dried up).
        let agent: BTreeSet<_> = tool_set(ALLOWED_TOOLS_AGENT);
        let watchdog: BTreeSet<_> = tool_set(ALLOWED_TOOLS_AGENTINFINITY);
        let missing: Vec<_> = agent.difference(&watchdog).copied().collect();
        assert!(
            missing.is_empty(),
            "ALLOWED_TOOLS_AGENTINFINITY must be a superset of ALLOWED_TOOLS_AGENT; \
             missing from watchdog: {missing:?}"
        );
    }

    #[test]
    fn allowed_tools_have_no_duplicates() {
        for (name, raw) in [
            ("ALLOWED_TOOLS_AGENT", ALLOWED_TOOLS_AGENT),
            ("ALLOWED_TOOLS_AGENTINFINITY", ALLOWED_TOOLS_AGENTINFINITY),
        ] {
            let parts: Vec<_> = raw.split(',').map(str::trim).collect();
            let uniq: BTreeSet<_> = parts.iter().copied().collect();
            assert_eq!(
                parts.len(),
                uniq.len(),
                "{name} contains duplicate entries: {parts:?}"
            );
        }
    }

    #[test]
    fn allowed_tools_include_every_netsky_io_source_reply() {
        // Every netsky-io source that exposes a reply/mutation tool must
        // appear in ALLOWED_TOOLS_AGENT — otherwise the tool is silently
        // blocked at the claude CLI boundary (the session-5 regression).
        for tool in [
            "mcp__agent__reply",
            "mcp__imessage__reply",
            "mcp__email__reply",
            "mcp__calendar__create_event",
        ] {
            assert!(
                tool_set(ALLOWED_TOOLS_AGENT).contains(tool),
                "ALLOWED_TOOLS_AGENT missing `{tool}`"
            );
        }
    }

    #[test]
    fn netsky_io_sources_have_3_place_sync() {
        // Closes failure-mode FM-2 from briefs/failure-mode-codification.md:
        // Every production netsky-io source must stay registered in
        // `.mcp.json`, and the repo-default `.agents/settings.json`
        // allowlist must stay narrow. External channels are runtime
        // gated by `channels.toml`, so fresh clones should auto-enable
        // only the agent bus.
        //
        // Source list is discovered from netsky-io's sources/mod.rs to
        // avoid hardcoding (which is exactly the drift this test prevents).
        // `demo` is excluded — it is a dev scaffold, not a registered
        // production source.

        let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
            .ancestors()
            .nth(3)
            .expect("repo root sits 3 levels above netsky-core's manifest dir");

        // 1. Discover sources from sources/mod.rs (`pub mod <name>;`).
        let sources_mod = repo_root.join("src/crates/netsky-io/src/sources/mod.rs");
        let mod_src = std::fs::read_to_string(&sources_mod)
            .unwrap_or_else(|e| panic!("read {}: {e}", sources_mod.display()));
        let sources: Vec<&str> = mod_src
            .lines()
            .filter_map(|l| {
                l.trim()
                    .strip_prefix("pub mod ")
                    .and_then(|s| s.strip_suffix(';'))
            })
            .filter(|s| *s != "demo")
            .collect();
        assert!(
            !sources.is_empty(),
            "no production sources discovered from {}",
            sources_mod.display()
        );

        // 2. Parse .mcp.json mcpServers.
        let mcp_path = repo_root.join(".mcp.json");
        let mcp_v: serde_json::Value = serde_json::from_str(
            &std::fs::read_to_string(&mcp_path)
                .unwrap_or_else(|e| panic!("read {}: {e}", mcp_path.display())),
        )
        .unwrap_or_else(|e| panic!("parse {}: {e}", mcp_path.display()));
        let mcp_servers = mcp_v
            .get("mcpServers")
            .and_then(|v| v.as_object())
            .expect(".mcp.json missing top-level `mcpServers` object");

        // 3. Parse .agents/settings.json enabledMcpjsonServers.
        let settings_path = repo_root.join(".agents/settings.json");
        let settings_v: serde_json::Value = serde_json::from_str(
            &std::fs::read_to_string(&settings_path)
                .unwrap_or_else(|e| panic!("read {}: {e}", settings_path.display())),
        )
        .unwrap_or_else(|e| panic!("parse {}: {e}", settings_path.display()));
        let enabled: Vec<String> = settings_v
            .get("enabledMcpjsonServers")
            .and_then(|v| v.as_array())
            .expect(".agents/settings.json missing `enabledMcpjsonServers` array")
            .iter()
            .filter_map(|v| v.as_str().map(str::to_owned))
            .collect();

        // 4. ALLOWED_TOOLS_AGENT tokens.
        let allowed = tool_set(ALLOWED_TOOLS_AGENT);

        // 5. Per-source registration check plus default allowlist
        //    shape. Collect all failures so a single test run names
        //    every drift, not just the first.
        let mut failures: Vec<String> = Vec::new();
        for src in &sources {
            if !mcp_servers.contains_key(*src) {
                failures.push(format!(
                    "`{src}` missing from .mcp.json `mcpServers` (add an entry that runs `netsky io serve -s {src}`)"
                ));
            }
            // Per-tool allowlist check: every Tool::new / on_reply in
            // the source's entrypoint must have its matching
            // `mcp__<source>__<tool>` token allowlisted. Tightens the
            // prior "at least one tool allowlisted" heuristic, which
            // missed mcp__calendar__delete_event after the calendar
            // source added delete_event without backfilling the
            // allowlist.
            let source_path = [
                repo_root.join(format!("src/crates/netsky-io/src/sources/{src}/mod.rs")),
                repo_root.join(format!("src/crates/netsky-io/src/sources/{src}.rs")),
            ]
            .into_iter()
            .find(|p| p.exists());
            let Some(source_path) = source_path else {
                failures.push(format!(
                    "`{src}` declared in sources/mod.rs but neither `sources/{src}/mod.rs` nor `sources/{src}.rs` exists"
                ));
                continue;
            };
            let src_contents = std::fs::read_to_string(&source_path)
                .unwrap_or_else(|e| panic!("read {}: {e}", source_path.display()));
            let tools = discover_source_tools(&src_contents);
            if tools.is_empty() {
                failures.push(format!(
                    "`{src}` at {} exposes no tools (no `Tool::new(` call and no `.on_reply(` handler found)",
                    source_path.display()
                ));
            }
            for tool in &tools {
                let token = format!("mcp__{src}__{tool}");
                if !allowed.contains(token.as_str()) {
                    failures.push(format!(
                        "ALLOWED_TOOLS_AGENT missing `{token}` (source declares it at {}; allowlist in src/crates/netsky-core/src/consts.rs ALLOWED_TOOLS_AGENT and ALLOWED_TOOLS_AGENTINFINITY)",
                        source_path.display()
                    ));
                }
            }
        }
        if enabled != vec!["agent".to_string()] {
            failures.push(format!(
                ".agents/settings.json `enabledMcpjsonServers` must default to [\"agent\"], got {:?}",
                enabled
            ));
        }

        assert!(
            failures.is_empty(),
            "netsky-io 3-place sync drift detected (sources: {sources:?}):\n  - {}",
            failures.join("\n  - ")
        );
    }
}