Skip to main content

varta_watch/config/
help.rs

1use super::types::Config;
2
3impl Config {
4    /// Class-A (compile-time-config) builds replace the long help body with
5    /// a neutral one-liner so the binary's `strings` output never carries
6    /// flag literals.  The static `pub const` is always linked into the
7    /// binary — even when the corresponding code path is `#[cfg]`-gated —
8    /// so the only way to keep flag names out of the binary is to keep
9    /// them out of the constant body itself.
10    #[cfg(feature = "compile-time-config")]
11    pub const HELP: &'static str = "varta-watch (compile-time configured; no argv accepted; see \
12         book/src/architecture/compile-time-config.md)\n";
13
14    /// Verbatim `--help` text. The acceptance test asserts that every
15    /// documented long-flag substring appears in this body.
16    #[cfg(not(feature = "compile-time-config"))]
17    pub const HELP: &'static str = "\
18varta-watch — observe Varta Lifeline Protocol agents over configurable transports.
19
20USAGE:
21    varta-watch --socket <PATH> --threshold-ms <MS> [OPTIONS]
22
23REQUIRED:
24    --socket <PATH>                Path to bind the observer's UDS.
25    --threshold-ms <MS>            Per-pid silence window before a stall is
26                                    surfaced (milliseconds).
27
28OPTIONAL:
29    --recovery-exec <CMD>          Command and arguments invoked via execvp
30                                     on each unique stall. Split on
31                                     whitespace into argv; {pid} in any
32                                     argument is replaced with the numeric
33                                     PID. No shell — metacharacters have
34                                     no effect.
35    --recovery-exec-file <PATH>    Read --recovery-exec command from a file.
36                                     File must be owned by the observer's
37                                     UID and mode 0600 or stricter.
38    --recovery-debounce-ms <MS>    Per-pid debounce window for recovery
39                                     invocations (default 1000).
40    --recovery-env <KEY=VALUE>     Repeatable. Pass an environment variable
41                                     to recovery child processes. Layered on
42                                     top of the base env (cleared by default;
43                                     inherited if --recovery-inherit-env is
44                                     set).
45    --recovery-inherit-env         Inherit the observer's full environment
46                                     into recovery child processes (legacy
47                                     behaviour). WARNING: any AWS_*,
48                                     *_TOKEN, OAuth bearers, or database
49                                     URLs in the observer's env will be
50                                     visible to recovery subprocesses. The
51                                     default (without this flag) is to
52                                     clear the child env to PATH=/usr/bin:
53                                     /bin plus any explicit --recovery-env
54                                     entries. Use --recovery-env KEY=VAL
55                                     instead of this flag whenever feasible.
56    --socket-mode <OCTAL>           File mode for the observer socket
57                                     (default 0600 — owner-only r/w).
58    --export-file <PATH>            Append one tab-separated event line per
59                                     observer event to this file.
60    --export-file-max-bytes <N>     Rotate export file when its size exceeds
61                                     N bytes (keeps up to 5 generations:
62                                     PATH.1 .. PATH.5).  Without this flag
63                                     the file grows without bound.
64    --export-file-sync-every <N>    Force fdatasync(2) on the export file
65                                     every N records appended. 0 (default)
66                                     disables per-record durability — the
67                                     BufWriter is flushed only on clean
68                                     shutdown and during rotation, so a
69                                     crash can lose up to one BufWriter
70                                     worth of events. Non-zero values
71                                     trade IO for crash-time durability;
72                                     `1` matches the recovery audit log's
73                                     per-record guarantee.
74    --prom-addr <IP:PORT>          Bind a Prometheus text-format endpoint at
75                                    GET /metrics on this address.  Requires
76                                    --prom-token-file; /metrics has no
77                                    anonymous access.
78    --prom-token-file <PATH>       Path to a file containing the 64-hex-char
79                                     bearer token enforced on every /metrics
80                                     scrape.  File must be mode 0600 or
81                                     stricter, owned by the observer UID,
82                                     not a symlink.  Required when
83                                     --prom-addr is set.  Scrapers must send
84                                     'Authorization: Bearer <hex>' to
85                                     receive 200; missing/wrong tokens
86                                     return 401 and bump
87                                     varta_prom_auth_failures_total.
88    --shutdown-grace-ms <MS>       Maximum time the daemon spends in
89                                     Recovery::drop waiting for outstanding
90                                     recovery children to exit after SIGKILL
91                                     during shutdown.  Default 5000.  Minimum
92                                     100.  systemd unit's TimeoutStopSec
93                                     must be at least this value plus ~2
94                                     seconds of reap margin.
95    --recovery-timeout-ms <MS>     Kill-after deadline for recovery children;
96                                     if a child runs longer than this it is
97                                     killed via kill(2) (default: none —
98                                     child runs until completion).
99    --read-timeout-ms <MS>         UDS read timeout per poll call
100                                     (default 100).  Bounded so a stalled peer
101                                     cannot hold the observer loop indefinitely.
102    --tracker-capacity <N>          Maximum number of distinct agent pids
103                                      tracked concurrently (default 256).
104                                      Beats for new pids beyond this limit are
105                                      dropped.
106    --eviction-scan-window <N>      Maximum slots scanned per eviction
107                                      attempt (default 256). Smaller = lower
108                                      per-frame upper bound; a full table
109                                      sweep takes ceil(tracker_capacity / N)
110                                      calls. Range [1, 4096].
111    --tracker-eviction-policy <P>   Eviction policy when tracker is full:
112                                      strict (default) evicts only confirmed-
113                                      stalled agents; balanced falls back to
114                                      evicting the oldest active slot to
115                                      prevent capacity-exhaustion attacks.
116    --clock-source <MODE>          Kernel clock for stall-threshold
117                                     accounting:
118                                       monotonic     (default; pauses during
119                                                     suspend on Linux/BSD/
120                                                     macOS — SRE semantics)
121                                       boottime      (Linux only; advances
122                                                     through suspend —
123                                                     medical/embedded)
124                                       monotonic-raw (macOS/iOS only;
125                                                     mach_continuous_time;
126                                                     advances through sleep —
127                                                     macOS equivalent of
128                                                     boottime)
129                                     See book/src/architecture/safety-profiles.md.
130    --signal-handler-mode <MODE>   Signal-handler installation path on Linux:
131                                       direct  (default) — direct rt_sigaction(2)
132                                                syscall; owns the kernel ABI
133                                                end-to-end including the x86_64
134                                                trampoline. Startup readback +
135                                                live SIGUSR1 smoke test verify
136                                                correctness before the first
137                                                real SIGTERM.
138                                       libc    — libc sigaction(3) fallback;
139                                                sa_restorer is libc's __restore_rt.
140                                                Use when running on a kernel not
141                                                yet certified for the direct path.
142                                     Ignored on macOS/FreeBSD (libc is the only
143                                     option). See
144                                     book/src/architecture/signal-install.md.
145    --shutdown-after-secs <SECS>   Exit cleanly after the given uptime
146                                     (used by integration tests).
147    --udp-port <PORT>              Bind a UDP listener on this port for
148                                     network-based agents (requires --features
149                                     udp at build time). Combine with UDS or
150                                     use alone.
151    --udp-bind-addr <IP>           IP address to bind the UDP listener on.
152                                     Defaults to 127.0.0.1 (loopback) when
153                                     secure-UDP keys are configured, and
154                                     0.0.0.0 when only plaintext UDP is in
155                                     play.  A non-loopback secure-UDP bind
156                                     requires --i-accept-secure-udp-non-loopback.
157                                     Requires --udp-port.
158    --key-file <PATH>              Path to a file containing a 64-hex-char
159                                     key for secure UDP (requires --features
160                                     secure-udp at build time).
161    --accepted-key-file <PATH>     Path to a file with one hex key per line
162                                     for zero-downtime rotation (requires
163                                     --features secure-udp).
164    --master-key-file <PATH>       Path to a file containing a 64-hex-char
165                                     master key for per-agent key derivation
166                                     (requires --features secure-udp).
167    --max-beat-rate <N>            Per-pid maximum beat rate in beats/sec.
168                                     Beats arriving faster than this rate
169                                     from the same pid are dropped and
170                                     counted via varta_rate_limited_total
171                                     {reason=\"per_pid\"}.  Default: 100.
172                                     Set to 0 to disable.
173    --global-beat-rate <N>         Global beat rate cap across all senders
174                                     (beats/sec).  Defends against per-pid
175                                     rotation attacks.  Default: 5000.
176                                     Set to 0 to disable.
177    --global-beat-burst <N>        Global token-bucket burst capacity.
178                                     Default: 10000.
179    --uds-rcvbuf-bytes <N>         SO_RCVBUF size requested for the
180                                     observer UDS socket (bytes).  Linux
181                                     doubles and clamps to rmem_max;
182                                     the granted size is surfaced as
183                                     varta_observer_uds_rcvbuf_bytes.
184                                     Default: 1048576.  Set to 0 to
185                                     leave the kernel default.
186    --heartbeat-file <PATH>        Write a timestamp + loop-counter line to
187                                     this file on every poll iteration.
188                                     External watchdogs can monitor the file
189                                     mtime to detect observer stalls.
190    --self-watchdog-secs <SECS>    Spawn a background thread that (a) calls
191                                     process::abort() if the poll loop has
192                                     not ticked for longer than SECS seconds
193                                     and (b) emits systemd WATCHDOG=1 from
194                                     its own cadence.  Catches hung poll
195                                     loops AND silent watchdog-thread
196                                     deaths (H5 — see
197                                     book/src/architecture/observer-liveness.md).
198                                     Auto-enabled with a 4 s deadline when
199                                     $WATCHDOG_USEC is set by the service
200                                     manager.  Minimum 1.
201    --hw-watchdog <PATH>           Open a hardware watchdog device (e.g.
202                                     /dev/watchdog) and kick it once per
203                                     poll iteration. On clean shutdown the
204                                     magic-close byte 'V' is written to
205                                     disarm the watchdog.
206    --prom-rate-limit-per-sec <N>  Per-source-IP refill rate for the
207                                     /metrics endpoint token bucket
208                                     (default 5).  Scrapes from any single
209                                     IP arriving faster than this rate are
210                                     accepted and immediately closed
211                                     without serving.  Counted as
212                                     varta_prom_connections_dropped_total
213                                     {reason=\"rate_limit\"}.
214    --prom-rate-limit-burst <N>    Maximum burst (and bucket capacity) for
215                                     the per-source-IP token bucket
216                                     (default 10).  Tune higher only if
217                                     legitimate scrapers cluster requests.
218    --i-accept-plaintext-udp       UNSAFE: explicitly accept the security
219                                     risk of binding an unauthenticated
220                                     plaintext UDP listener.  Required
221                                     when --udp-port is set and no
222                                     --key-file / --master-key-file is
223                                     configured.  Build must also include
224                                     --features unsafe-plaintext-udp.  NOT
225                                     for production / safety-critical use;
226                                     any device with network reach to the
227                                     bound port can inject heartbeats.
228    --i-accept-secure-udp-non-loopback
229                                   UNSAFE: explicitly accept the security
230                                     risk of binding a secure-UDP listener
231                                     to a non-loopback address.  The
232                                     per-sender replay-state map carries a
233                                     1-deep eviction shadow; an attacker
234                                     with ≥1025 spoofable UDP source
235                                     addresses can rotate the shadow and
236                                     replay one captured frame per target
237                                     sender.  Required whenever
238                                     --udp-bind-addr is set to any address
239                                     other than 127.0.0.0/8 or ::1 while
240                                     secure-UDP keys are configured.
241                                     Restrict the listener's reach with
242                                     firewall rules or a private VLAN
243                                     before enabling.  See
244                                     book/src/architecture/vlp-transports.md.
245    --secure-udp-i-accept-recovery-on-unauthenticated-transport
246                                   UNSAFE: accept the security risk of
247                                     running a recovery command while the
248                                     secure-UDP listener is bound.  Secure
249                                     UDP authenticates wire bytes but cannot
250                                     attest the sending process — a holder
251                                     of the AEAD key can forge a beat for
252                                     any pid.  Without this flag, combining
253                                     --udp-port (with key files) and a
254                                     recovery command is rejected at startup.
255                                     This flag stamps beats from the secure-
256                                     UDP listener as operator-attested so
257                                     the runtime recovery gate fires.
258    --plaintext-udp-i-accept-recovery-on-unauthenticated-transport
259                                   UNSAFE: accept the security risk of
260                                     running a recovery command while the
261                                     plaintext-UDP listener is bound.
262                                     Plaintext UDP has no authentication —
263                                     any host can forge any frame.  Without
264                                     this flag, combining --udp-port (without
265                                     key files) and a recovery command is
266                                     rejected at startup.  This flag stamps
267                                     beats from the plaintext-UDP listener
268                                     as operator-attested so recovery fires.
269    --allow-cross-namespace-agents UNSAFE: permit beats and recovery for
270                                     agents whose kernel-attested PID
271                                     namespace differs from the observer's.
272                                     Default behaviour drops cross-namespace
273                                     beats at receive and refuses recovery
274                                     with reason=cross_namespace_agent. Use
275                                     only when agents run with --pid=host or
276                                     an out-of-band PID translator is in the
277                                     recovery template — otherwise kill(2)
278                                     would target the wrong process. Linux
279                                     only; no-op on other platforms. See
280                                     book/src/architecture/namespaces.md.
281    --strict-namespace-check       Treat a cross-namespace agent as a fatal
282                                     startup error instead of the default
283                                     refuse-recovery behaviour. Useful when
284                                     the operator wants the daemon to fail
285                                     loudly rather than silently log audit
286                                     refusals.
287    --recovery-audit-file <PATH>   Append a tab-separated audit record for
288                                     every recovery spawn and completion.
289                                     Records carry wall-clock + observer
290                                     timestamps, agent pid, child pid,
291                                     mode, outcome, exit code, signal,
292                                     duration, and captured stdio
293                                     lengths. The file is created mode
294                                     0600.
295    --recovery-audit-max-bytes <N> Rotate the audit file after every write
296                                     that pushes it above N bytes. Up to
297                                     5 generations kept.
298    --recovery-audit-sync-every <N> How many records to write between
299                                     forced fdatasync(2) calls on the
300                                     audit file. Default 1 (sync every
301                                     record) — the only IEC 62304
302                                     Class C-conforming value. Values >1
303                                     emit a startup warning. 0 is
304                                     rejected at parse time.
305    --audit-fsync-budget-ms <MS>   Soft per-call budget for a single
306                                     fdatasync(2) on the audit file. If
307                                     one fsync exceeds this, the
308                                     remaining records in the current
309                                     drain are written-to-BufWriter only
310                                     and the fsync is deferred to the
311                                     next tick — bounds the worst-case
312                                     poll stall on a slow disk to one
313                                     fsync per tick. Overruns increment
314                                     varta_audit_fsync_budget_exceeded_total.
315                                     Default 50. 0 is rejected at parse
316                                     time.
317    --audit-sync-interval-ms <MS>  Time-based fdatasync cadence in
318                                     addition to --recovery-audit-sync-every.
319                                     0 (default) disables the time
320                                     cadence; with a non-zero value the
321                                     drain force-syncs after this many
322                                     ms have elapsed since the last
323                                     sync. Operators on safety-critical
324                                     profiles keep
325                                     --recovery-audit-sync-every=1 and
326                                     ignore this flag.
327    --audit-rotation-budget-ms <MS> Per-tick wall-clock budget for the
328                                     audit-log rotation state machine.
329                                     Rotation (rename × 5 + reopen +
330                                     header + boot record + fsync)
331                                     advances incrementally; if a tick
332                                     exceeds this budget the state is
333                                     preserved and the next tick
334                                     resumes. Overruns increment
335                                     varta_audit_rotation_budget_exceeded_total.
336                                     Default 50. 0 is rejected at parse
337                                     time.
338    --recovery-capture-stdio       Capture child stdout/stderr non-
339                                     blockingly so its length and
340                                     truncation status appear in the audit
341                                     record. Off by default — opt in only
342                                     when you have a recovery command whose
343                                     output is bounded.
344    --recovery-capture-bytes <N>   Total combined byte cap (stdout +
345                                     stderr) per child when capture is
346                                     enabled. Default 4096; max 1048576.
347    --iteration-budget-ms <MS>     Soft per-iteration budget for the
348                                     observer poll loop. Iterations that
349                                     exceed this increment
350                                     varta_observer_iteration_budget_exceeded_total
351                                     and are visible in the
352                                     varta_observer_iteration_seconds
353                                     histogram. Advisory only — hard
354                                     wedges are caught by
355                                     --self-watchdog-secs.  Default 250.
356                                     Range [50, 60000].  See
357                                     book/src/architecture/observer-liveness.md
358                                     for the worst-case derivation.
359    --scrape-budget-ms <MS>        Soft per-call budget for serve_pending
360                                     (the /metrics serving phase of one
361                                     poll iteration). Overruns increment
362                                     varta_observer_scrape_budget_exceeded_total
363                                     and are visible in
364                                     varta_observer_serve_pending_seconds.
365                                     Separates scrape-storm alarms from
366                                     beat-path slowness. Default 250.
367                                     Range [50, 60000].
368
369    -h, --help                     Print this message and exit.
370";
371}